File Coverage

blib/lib/Perl/Tidy/Formatter.pm
Criterion Covered Total %
statement 8648 10183 84.9
branch 4095 5552 73.7
condition 3245 4802 67.5
subroutine 347 383 90.6
pod 0 284 0.0
total 16335 21204 77.0


line stmt bran cond sub pod time code
1             ####################################################################
2             #
3             # The Perl::Tidy::Formatter package adds indentation, whitespace, and
4             # line breaks to the token stream
5             #
6             #####################################################################
7              
8             # Index...
9             # CODE SECTION 1: Preliminary code, global definitions and sub new
10             # sub new
11             # CODE SECTION 2: Some Basic Utilities
12             # CODE SECTION 3: Check and process options
13             # sub check_options
14             # CODE SECTION 4: Receive lines from the tokenizer
15             # sub write_line
16             # CODE SECTION 5: Pre-process the entire file
17             # sub finish_formatting
18             # CODE SECTION 6: Process line-by-line
19             # sub process_all_lines
20             # CODE SECTION 7: Process lines of code
21             # process_line_of_CODE
22             # CODE SECTION 8: Utilities for setting breakpoints
23             # sub set_forced_breakpoint
24             # CODE SECTION 9: Process batches of code
25             # sub grind_batch_of_CODE
26             # CODE SECTION 10: Code to break long statements
27             # sub break_long_lines
28             # CODE SECTION 11: Code to break long lists
29             # sub break_lists
30             # CODE SECTION 12: Code for setting indentation
31             # CODE SECTION 13: Preparing batch of lines for vertical alignment
32             # sub convey_batch_to_vertical_aligner
33             # CODE SECTION 14: Code for creating closing side comments
34             # sub add_closing_side_comment
35             # CODE SECTION 15: Summarize
36             # sub wrapup
37              
38             #######################################################################
39             # CODE SECTION 1: Preliminary code and global definitions up to sub new
40             #######################################################################
41              
42             package Perl::Tidy::Formatter;
43 38     38   311 use strict;
  38         76  
  38         1356  
44 38     38   183 use warnings;
  38         71  
  38         1103  
45              
46             # DEVEL_MODE gets switched on during automated testing for extra checking
47 38     38   178 use constant DEVEL_MODE => 0;
  38         75  
  38         2184  
48 38     38   225 use constant EMPTY_STRING => q{};
  38         100  
  38         1903  
49 38     38   231 use constant SPACE => q{ };
  38         71  
  38         2099  
50              
51             { #<<< A non-indenting brace to contain all lexical variables
52              
53 38     38   286 use Carp;
  38         120  
  38         2742  
54 38     38   318 use English qw( -no_match_vars );
  38         121  
  38         301  
55 38     38   14150 use List::Util qw( min max ); # min, max are in Perl 5.8
  38         84  
  38         41529  
56             our $VERSION = '20230701';
57              
58             # The Tokenizer will be loaded with the Formatter
59             ##use Perl::Tidy::Tokenizer; # for is_keyword()
60              
61             sub AUTOLOAD {
62              
63             # Catch any undefined sub calls so that we are sure to get
64             # some diagnostic information. This sub should never be called
65             # except for a programming error.
66 0     0   0 our $AUTOLOAD;
67 0 0       0 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
68 0         0 my ( $pkg, $fname, $lno ) = caller();
69 0         0 my $my_package = __PACKAGE__;
70 0         0 print STDERR <<EOM;
71             ======================================================================
72             Error detected in package '$my_package', version $VERSION
73             Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
74             Called from package: '$pkg'
75             Called from File '$fname' at line '$lno'
76             This error is probably due to a recent programming change
77             ======================================================================
78             EOM
79 0         0 exit 1;
80             } ## end sub AUTOLOAD
81              
82             sub DESTROY {
83 555     555   1486 my $self = shift;
84 555         2732 $self->_decrement_count();
85 555         38090 return;
86             }
87              
88             sub Die {
89 0     0 0 0 my ($msg) = @_;
90 0         0 Perl::Tidy::Die($msg);
91 0         0 croak "unexpected return from Perl::Tidy::Die";
92             }
93              
94             sub Warn {
95 0     0 0 0 my ($msg) = @_;
96 0         0 Perl::Tidy::Warn($msg);
97 0         0 return;
98             }
99              
100             sub Fault {
101 0     0 0 0 my ($msg) = @_;
102              
103             # This routine is called for errors that really should not occur
104             # except if there has been a bug introduced by a recent program change.
105             # Please add comments at calls to Fault to explain why the call
106             # should not occur, and where to look to fix it.
107 0         0 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
108 0         0 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
109 0         0 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
110 0         0 my $pkg = __PACKAGE__;
111              
112 0         0 my $input_stream_name = get_input_stream_name();
113              
114 0         0 Die(<<EOM);
115             ==============================================================================
116             While operating on input stream with name: '$input_stream_name'
117             A fault was detected at line $line0 of sub '$subroutine1'
118             in file '$filename1'
119             which was called from line $line1 of sub '$subroutine2'
120             Message: '$msg'
121             This is probably an error introduced by a recent programming change.
122             $pkg reports VERSION='$VERSION'.
123             ==============================================================================
124             EOM
125              
126             # We shouldn't get here, but this return is to keep Perl-Critic from
127             # complaining.
128 0         0 return;
129             } ## end sub Fault
130              
131             sub Fault_Warn {
132 0     0 0 0 my ($msg) = @_;
133              
134             # This is the same as Fault except that it calls Warn instead of Die
135             # and returns.
136 0         0 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
137 0         0 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
138 0         0 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
139 0         0 my $input_stream_name = get_input_stream_name();
140              
141 0         0 Warn(<<EOM);
142             ==============================================================================
143             While operating on input stream with name: '$input_stream_name'
144             A fault was detected at line $line0 of sub '$subroutine1'
145             in file '$filename1'
146             which was called from line $line1 of sub '$subroutine2'
147             Message: '$msg'
148             This is probably an error introduced by a recent programming change.
149             Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
150             ==============================================================================
151             EOM
152              
153 0         0 return;
154             } ## end sub Fault_Warn
155              
156             sub Exit {
157 0     0 0 0 my ($msg) = @_;
158 0         0 Perl::Tidy::Exit($msg);
159 0         0 croak "unexpected return from Perl::Tidy::Exit";
160             }
161              
162             # Global variables ...
163             my (
164              
165             #-----------------------------------------------------------------
166             # Section 1: Global variables which are either always constant or
167             # are constant after being configured by user-supplied
168             # parameters. They remain constant as a file is being processed.
169             # The INITIALIZER comment tells the sub responsible for initializing
170             # each variable. Failure to initialize or re-initialize a global
171             # variable can cause bugs which are hard to locate.
172             #-----------------------------------------------------------------
173              
174             # INITIALIZER: sub check_options
175             $rOpts,
176              
177             # short-cut option variables
178             # INITIALIZER: sub initialize_global_option_vars
179             $rOpts_add_newlines,
180             $rOpts_add_whitespace,
181             $rOpts_add_trailing_commas,
182             $rOpts_blank_lines_after_opening_block,
183             $rOpts_block_brace_tightness,
184             $rOpts_block_brace_vertical_tightness,
185             $rOpts_brace_follower_vertical_tightness,
186             $rOpts_break_after_labels,
187             $rOpts_break_at_old_attribute_breakpoints,
188             $rOpts_break_at_old_comma_breakpoints,
189             $rOpts_break_at_old_keyword_breakpoints,
190             $rOpts_break_at_old_logical_breakpoints,
191             $rOpts_break_at_old_semicolon_breakpoints,
192             $rOpts_break_at_old_ternary_breakpoints,
193             $rOpts_break_open_compact_parens,
194             $rOpts_closing_side_comments,
195             $rOpts_closing_side_comment_else_flag,
196             $rOpts_closing_side_comment_maximum_text,
197             $rOpts_comma_arrow_breakpoints,
198             $rOpts_continuation_indentation,
199             $rOpts_cuddled_paren_brace,
200             $rOpts_delete_closing_side_comments,
201             $rOpts_delete_old_whitespace,
202             $rOpts_delete_side_comments,
203             $rOpts_delete_trailing_commas,
204             $rOpts_delete_weld_interfering_commas,
205             $rOpts_extended_continuation_indentation,
206             $rOpts_format_skipping,
207             $rOpts_freeze_whitespace,
208             $rOpts_function_paren_vertical_alignment,
209             $rOpts_fuzzy_line_length,
210             $rOpts_ignore_old_breakpoints,
211             $rOpts_ignore_side_comment_lengths,
212             $rOpts_ignore_perlcritic_comments,
213             $rOpts_indent_closing_brace,
214             $rOpts_indent_columns,
215             $rOpts_indent_only,
216             $rOpts_keep_interior_semicolons,
217             $rOpts_line_up_parentheses,
218             $rOpts_logical_padding,
219             $rOpts_maximum_consecutive_blank_lines,
220             $rOpts_maximum_fields_per_table,
221             $rOpts_maximum_line_length,
222             $rOpts_one_line_block_semicolons,
223             $rOpts_opening_brace_always_on_right,
224             $rOpts_outdent_keywords,
225             $rOpts_outdent_labels,
226             $rOpts_outdent_long_comments,
227             $rOpts_outdent_long_quotes,
228             $rOpts_outdent_static_block_comments,
229             $rOpts_recombine,
230             $rOpts_short_concatenation_item_length,
231             $rOpts_space_prototype_paren,
232             $rOpts_stack_closing_block_brace,
233             $rOpts_static_block_comments,
234             $rOpts_tee_block_comments,
235             $rOpts_tee_pod,
236             $rOpts_tee_side_comments,
237             $rOpts_variable_maximum_line_length,
238             $rOpts_valign_code,
239             $rOpts_valign_side_comments,
240             $rOpts_valign_if_unless,
241             $rOpts_whitespace_cycle,
242             $rOpts_extended_block_tightness,
243             $rOpts_extended_line_up_parentheses,
244              
245             # Static hashes
246             # INITIALIZER: BEGIN block
247             %is_assignment,
248             %is_non_list_type,
249             %is_if_unless_and_or_last_next_redo_return,
250             %is_if_elsif_else_unless_while_until_for_foreach,
251             %is_if_unless_while_until_for_foreach,
252             %is_last_next_redo_return,
253             %is_if_unless,
254             %is_if_elsif,
255             %is_if_unless_elsif,
256             %is_if_unless_elsif_else,
257             %is_elsif_else,
258             %is_and_or,
259             %is_chain_operator,
260             %is_block_without_semicolon,
261             %ok_to_add_semicolon_for_block_type,
262             %is_opening_type,
263             %is_closing_type,
264             %is_opening_token,
265             %is_closing_token,
266             %is_ternary,
267             %is_equal_or_fat_comma,
268             %is_counted_type,
269             %is_opening_sequence_token,
270             %is_closing_sequence_token,
271             %matching_token,
272             %is_container_label_type,
273             %is_die_confess_croak_warn,
274             %is_my_our_local,
275             %is_soft_keep_break_type,
276             %is_indirect_object_taker,
277             @all_operators,
278             %is_do_follower,
279             %is_anon_sub_brace_follower,
280             %is_anon_sub_1_brace_follower,
281             %is_other_brace_follower,
282              
283             # INITIALIZER: sub check_options
284             $controlled_comma_style,
285             %keep_break_before_type,
286             %keep_break_after_type,
287             %outdent_keyword,
288             %keyword_paren_inner_tightness,
289             %container_indentation_options,
290             %tightness,
291             %line_up_parentheses_control_hash,
292             $line_up_parentheses_control_is_lxpl,
293              
294             # These can be modified by grep-alias-list
295             # INITIALIZER: sub initialize_grep_and_friends
296             %is_sort_map_grep,
297             %is_sort_map_grep_eval,
298             %is_sort_map_grep_eval_do,
299             %is_block_with_ci,
300             %is_keyword_returning_list,
301             %block_type_map, # initialized in BEGIN, but may be changed
302             %want_one_line_block, # may be changed in prepare_cuddled_block_types
303              
304             # INITIALIZER: sub prepare_cuddled_block_types
305             $rcuddled_block_types,
306              
307             # INITIALIZER: sub initialize_whitespace_hashes
308             %binary_ws_rules,
309             %want_left_space,
310             %want_right_space,
311              
312             # INITIALIZER: sub initialize_bond_strength_hashes
313             %right_bond_strength,
314             %left_bond_strength,
315              
316             # INITIALIZER: sub initialize_token_break_preferences
317             %want_break_before,
318             %break_before_container_types,
319              
320             # INITIALIZER: sub initialize_space_after_keyword
321             %space_after_keyword,
322              
323             # INITIALIZER: sub initialize_extended_block_tightness_list
324             %extended_block_tightness_list,
325              
326             # INITIALIZED BY initialize_global_option_vars
327             %opening_vertical_tightness,
328             %closing_vertical_tightness,
329             %closing_token_indentation,
330             $some_closing_token_indentation,
331             %opening_token_right,
332             %stack_opening_token,
333             %stack_closing_token,
334              
335             # INITIALIZER: sub initialize_weld_nested_exclusion_rules
336             %weld_nested_exclusion_rules,
337              
338             # INITIALIZER: sub initialize_weld_fat_comma_rules
339             %weld_fat_comma_rules,
340              
341             # INITIALIZER: sub initialize_trailing_comma_rules
342             %trailing_comma_rules,
343              
344             # regex patterns for text identification.
345             # Most can be configured by user parameters.
346             # Most are initialized in a sub make_**_pattern during configuration.
347              
348             # INITIALIZER: sub make_sub_matching_pattern
349             $SUB_PATTERN,
350             $ASUB_PATTERN,
351              
352             # INITIALIZER: make_static_block_comment_pattern
353             $static_block_comment_pattern,
354              
355             # INITIALIZER: sub make_static_side_comment_pattern
356             $static_side_comment_pattern,
357              
358             # INITIALIZER: make_format_skipping_pattern
359             $format_skipping_pattern_begin,
360             $format_skipping_pattern_end,
361              
362             # INITIALIZER: sub make_non_indenting_brace_pattern
363             $non_indenting_brace_pattern,
364              
365             # INITIALIZER: sub make_bl_pattern
366             $bl_exclusion_pattern,
367              
368             # INITIALIZER: make_bl_pattern
369             $bl_pattern,
370              
371             # INITIALIZER: sub make_bli_pattern
372             $bli_exclusion_pattern,
373              
374             # INITIALIZER: sub make_bli_pattern
375             $bli_pattern,
376              
377             # INITIALIZER: sub make_block_brace_vertical_tightness_pattern
378             $block_brace_vertical_tightness_pattern,
379              
380             # INITIALIZER: sub make_blank_line_pattern
381             $blank_lines_after_opening_block_pattern,
382             $blank_lines_before_closing_block_pattern,
383              
384             # INITIALIZER: sub make_keyword_group_list_pattern
385             $keyword_group_list_pattern,
386             $keyword_group_list_comment_pattern,
387              
388             # INITIALIZER: sub make_closing_side_comment_prefix
389             $closing_side_comment_prefix_pattern,
390              
391             # INITIALIZER: sub make_closing_side_comment_list_pattern
392             $closing_side_comment_list_pattern,
393              
394             # Table to efficiently find indentation and max line length
395             # from level.
396             # INITIALIZER: sub initialize_line_length_vars
397             @maximum_line_length_at_level,
398             @maximum_text_length_at_level,
399             $stress_level_alpha,
400             $stress_level_beta,
401             $high_stress_level,
402              
403             # Total number of sequence items in a weld, for quick checks
404             # INITIALIZER: weld_containers
405             $total_weld_count,
406              
407             #--------------------------------------------------------
408             # Section 2: Work arrays for the current batch of tokens.
409             #--------------------------------------------------------
410              
411             # These are re-initialized for each batch of code
412             # INITIALIZER: sub initialize_batch_variables
413             $max_index_to_go,
414             @block_type_to_go,
415             @type_sequence_to_go,
416             @forced_breakpoint_to_go,
417             @token_lengths_to_go,
418             @summed_lengths_to_go,
419             @levels_to_go,
420             @leading_spaces_to_go,
421             @reduced_spaces_to_go,
422             @mate_index_to_go,
423             @ci_levels_to_go,
424             @nesting_depth_to_go,
425             @nobreak_to_go,
426             @old_breakpoint_to_go,
427             @tokens_to_go,
428             @K_to_go,
429             @types_to_go,
430             @inext_to_go,
431             @parent_seqno_to_go,
432              
433             # forced breakpoint variables associated with each batch of code
434             $forced_breakpoint_count,
435             $forced_breakpoint_undo_count,
436             $index_max_forced_break,
437             );
438              
439 0         0 BEGIN {
440              
441             # Index names for token variables.
442             # Do not combine with other BEGIN blocks (c101).
443 38     38   12029 my $i = 0;
444             use constant {
445 38         4687 _CI_LEVEL_ => $i++,
446             _CUMULATIVE_LENGTH_ => $i++,
447             _LINE_INDEX_ => $i++,
448             _KNEXT_SEQ_ITEM_ => $i++,
449             _LEVEL_ => $i++,
450             _TOKEN_ => $i++,
451             _TOKEN_LENGTH_ => $i++,
452             _TYPE_ => $i++,
453             _TYPE_SEQUENCE_ => $i++,
454              
455             # Number of token variables; must be last in list:
456             _NVARS => $i++,
457 38     38   344 };
  38         111  
458             } ## end BEGIN
459              
460 0         0 BEGIN {
461              
462             # Index names for $self variables.
463             # Do not combine with other BEGIN blocks (c101).
464 38     38   2919 my $i = 0;
465             use constant {
466 38         30796 _rlines_ => $i++,
467             _rLL_ => $i++,
468             _Klimit_ => $i++,
469             _rdepth_of_opening_seqno_ => $i++,
470             _rSS_ => $i++,
471             _Iss_opening_ => $i++,
472             _Iss_closing_ => $i++,
473             _rblock_type_of_seqno_ => $i++,
474             _ris_asub_block_ => $i++,
475             _ris_sub_block_ => $i++,
476             _K_opening_container_ => $i++,
477             _K_closing_container_ => $i++,
478             _K_opening_ternary_ => $i++,
479             _K_closing_ternary_ => $i++,
480             _K_first_seq_item_ => $i++,
481             _rtype_count_by_seqno_ => $i++,
482             _ris_function_call_paren_ => $i++,
483             _rlec_count_by_seqno_ => $i++,
484             _ris_broken_container_ => $i++,
485             _ris_permanently_broken_ => $i++,
486             _rblank_and_comment_count_ => $i++,
487             _rhas_list_ => $i++,
488             _rhas_broken_list_ => $i++,
489             _rhas_broken_list_with_lec_ => $i++,
490             _rfirst_comma_line_index_ => $i++,
491             _rhas_code_block_ => $i++,
492             _rhas_broken_code_block_ => $i++,
493             _rhas_ternary_ => $i++,
494             _ris_excluded_lp_container_ => $i++,
495             _rlp_object_by_seqno_ => $i++,
496             _rwant_reduced_ci_ => $i++,
497             _rno_xci_by_seqno_ => $i++,
498             _rbrace_left_ => $i++,
499             _ris_bli_container_ => $i++,
500             _rparent_of_seqno_ => $i++,
501             _rchildren_of_seqno_ => $i++,
502             _ris_list_by_seqno_ => $i++,
503             _ris_cuddled_closing_brace_ => $i++,
504             _rbreak_container_ => $i++,
505             _rshort_nested_ => $i++,
506             _length_function_ => $i++,
507             _is_encoded_data_ => $i++,
508             _fh_tee_ => $i++,
509             _sink_object_ => $i++,
510             _file_writer_object_ => $i++,
511             _vertical_aligner_object_ => $i++,
512             _logger_object_ => $i++,
513             _radjusted_levels_ => $i++,
514             _this_batch_ => $i++,
515              
516             _ris_special_identifier_token_ => $i++,
517             _last_output_short_opening_token_ => $i++,
518              
519             _last_line_leading_type_ => $i++,
520             _last_line_leading_level_ => $i++,
521              
522             _added_semicolon_count_ => $i++,
523             _first_added_semicolon_at_ => $i++,
524             _last_added_semicolon_at_ => $i++,
525              
526             _deleted_semicolon_count_ => $i++,
527             _first_deleted_semicolon_at_ => $i++,
528             _last_deleted_semicolon_at_ => $i++,
529              
530             _embedded_tab_count_ => $i++,
531             _first_embedded_tab_at_ => $i++,
532             _last_embedded_tab_at_ => $i++,
533              
534             _first_tabbing_disagreement_ => $i++,
535             _last_tabbing_disagreement_ => $i++,
536             _tabbing_disagreement_count_ => $i++,
537             _in_tabbing_disagreement_ => $i++,
538             _first_brace_tabbing_disagreement_ => $i++,
539             _in_brace_tabbing_disagreement_ => $i++,
540              
541             _saw_VERSION_in_this_file_ => $i++,
542             _saw_END_or_DATA_ => $i++,
543              
544             _rK_weld_left_ => $i++,
545             _rK_weld_right_ => $i++,
546             _rweld_len_right_at_K_ => $i++,
547              
548             _rspecial_side_comment_type_ => $i++,
549              
550             _rseqno_controlling_my_ci_ => $i++,
551             _ris_seqno_controlling_ci_ => $i++,
552             _save_logfile_ => $i++,
553             _maximum_level_ => $i++,
554             _maximum_level_at_line_ => $i++,
555             _maximum_BLOCK_level_ => $i++,
556             _maximum_BLOCK_level_at_line_ => $i++,
557              
558             _rKrange_code_without_comments_ => $i++,
559             _rbreak_before_Kfirst_ => $i++,
560             _rbreak_after_Klast_ => $i++,
561             _converged_ => $i++,
562              
563             _rstarting_multiline_qw_seqno_by_K_ => $i++,
564             _rending_multiline_qw_seqno_by_K_ => $i++,
565             _rKrange_multiline_qw_by_seqno_ => $i++,
566             _rmultiline_qw_has_extra_level_ => $i++,
567              
568             _rcollapsed_length_by_seqno_ => $i++,
569             _rbreak_before_container_by_seqno_ => $i++,
570             _roverride_cab3_ => $i++,
571             _ris_assigned_structure_ => $i++,
572             _ris_short_broken_eval_block_ => $i++,
573             _ris_bare_trailing_comma_by_seqno_ => $i++,
574              
575             _rseqno_non_indenting_brace_by_ix_ => $i++,
576             _rmax_vertical_tightness_ => $i++,
577              
578             _no_vertical_tightness_flags_ => $i++,
579              
580             _LAST_SELF_INDEX_ => $i - 1,
581 38     38   298 };
  38         88  
582             } ## end BEGIN
583              
584 0         0 BEGIN {
585              
586             # Index names for batch variables.
587             # Do not combine with other BEGIN blocks (c101).
588             # These are stored in _this_batch_, which is a sub-array of $self.
589 38     38   1400 my $i = 0;
590             use constant {
591 38         5041 _starting_in_quote_ => $i++,
592             _ending_in_quote_ => $i++,
593             _is_static_block_comment_ => $i++,
594             _ri_first_ => $i++,
595             _ri_last_ => $i++,
596             _do_not_pad_ => $i++,
597             _peak_batch_size_ => $i++,
598             _batch_count_ => $i++,
599             _rix_seqno_controlling_ci_ => $i++,
600             _batch_CODE_type_ => $i++,
601             _ri_starting_one_line_block_ => $i++,
602             _runmatched_opening_indexes_ => $i++,
603             _lp_object_count_this_batch_ => $i++,
604 38     38   291 };
  38         93  
605             } ## end BEGIN
606              
607             BEGIN {
608              
609             # Sequence number assigned to the root of sequence tree.
610             # The minimum of the actual sequences numbers is 4, so we can use 1
611 38     38   252 use constant SEQ_ROOT => 1;
  38         105  
  38         2175  
612              
613             # Codes for insertion and deletion of blanks
614 38     38   242 use constant DELETE => 0;
  38         87  
  38         2031  
615 38     38   238 use constant STABLE => 1;
  38         74  
  38         1925  
616 38     38   236 use constant INSERT => 2;
  38         81  
  38         1958  
617              
618             # whitespace codes
619 38     38   242 use constant WS_YES => 1;
  38         96  
  38         1999  
620 38     38   241 use constant WS_OPTIONAL => 0;
  38         79  
  38         1959  
621 38     38   228 use constant WS_NO => -1;
  38         111  
  38         2106  
622              
623             # Token bond strengths.
624 38     38   273 use constant NO_BREAK => 10_000;
  38         91  
  38         2507  
625 38     38   287 use constant VERY_STRONG => 100;
  38         90  
  38         2309  
626 38     38   247 use constant STRONG => 2.1;
  38         81  
  38         1940  
627 38     38   232 use constant NOMINAL => 1.1;
  38         104  
  38         1944  
628 38     38   250 use constant WEAK => 0.8;
  38         78  
  38         2120  
629 38     38   234 use constant VERY_WEAK => 0.55;
  38         112  
  38         2256  
630              
631             # values for testing indexes in output array
632 38     38   267 use constant UNDEFINED_INDEX => -1;
  38         75  
  38         2044  
633              
634             # Maximum number of little messages; probably need not be changed.
635 38     38   249 use constant MAX_NAG_MESSAGES => 6;
  38         69  
  38         2065  
636              
637             # This is the decimal range of printable characters in ASCII. It is used to
638             # make quick preliminary checks before resorting to using a regex.
639 38     38   241 use constant ORD_PRINTABLE_MIN => 33;
  38         109  
  38         2091  
640 38     38   276 use constant ORD_PRINTABLE_MAX => 126;
  38         103  
  38         35337  
641              
642             # Initialize constant hashes ...
643 38     38   163 my @q;
644              
645 38         194 @q = qw(
646             = **= += *= &= <<= &&=
647             -= /= |= >>= ||= //=
648             .= %= ^=
649             x=
650             );
651 38         567 @is_assignment{@q} = (1) x scalar(@q);
652              
653             # a hash needed by break_lists for efficiency:
654 38         217 push @q, qw{ ; < > ~ f };
655 38         653 @is_non_list_type{@q} = (1) x scalar(@q);
656              
657 38         213 @q = qw(is if unless and or err last next redo return);
658 38         475 @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
659              
660             # These block types may have text between the keyword and opening
661             # curly. Note: 'else' does not, but must be included to allow trailing
662             # if/elsif text to be appended.
663             # patch for SWITCH/CASE: added 'case' and 'when'
664 38         172 @q = qw(if elsif else unless while until for foreach case when catch);
665 38         1620 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
666             (1) x scalar(@q);
667              
668 38         201 @q = qw(if unless while until for foreach);
669 38         152 @is_if_unless_while_until_for_foreach{@q} =
670             (1) x scalar(@q);
671              
672 38         96 @q = qw(last next redo return);
673 38         111 @is_last_next_redo_return{@q} = (1) x scalar(@q);
674              
675             # Map related block names into a common name to allow vertical alignment
676             # used by sub make_alignment_patterns. Note: this is normally unchanged,
677             # but it contains 'grep' and can be re-initialized in
678             # sub initialize_grep_and_friends in a testing mode.
679 38         239 %block_type_map = (
680             'unless' => 'if',
681             'else' => 'if',
682             'elsif' => 'if',
683             'when' => 'if',
684             'default' => 'if',
685             'case' => 'if',
686             'sort' => 'map',
687             'grep' => 'map',
688             );
689              
690 38         86 @q = qw(if unless);
691 38         97 @is_if_unless{@q} = (1) x scalar(@q);
692              
693 38         77 @q = qw(if elsif);
694 38         83 @is_if_elsif{@q} = (1) x scalar(@q);
695              
696 38         80 @q = qw(if unless elsif);
697 38         100 @is_if_unless_elsif{@q} = (1) x scalar(@q);
698              
699 38         84 @q = qw(if unless elsif else);
700 38         90 @is_if_unless_elsif_else{@q} = (1) x scalar(@q);
701              
702 38         94 @q = qw(elsif else);
703 38         87 @is_elsif_else{@q} = (1) x scalar(@q);
704              
705 38         78 @q = qw(and or err);
706 38         98 @is_and_or{@q} = (1) x scalar(@q);
707              
708             # Identify certain operators which often occur in chains.
709             # Note: the minus (-) causes a side effect of padding of the first line in
710             # something like this (by sub set_logical_padding):
711             # Checkbutton => 'Transmission checked',
712             # -variable => \$TRANS
713             # This usually improves appearance so it seems ok.
714 38         102 @q = qw(&& || and or : ? . + - * /);
715 38         366 @is_chain_operator{@q} = (1) x scalar(@q);
716              
717             # Operators that the user can request break before or after.
718             # Note that some are keywords
719 38         290 @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
720             = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
721             . : ? && || and or err xor
722             );
723              
724             # We can remove semicolons after blocks preceded by these keywords
725 38         508 @q =
726             qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
727             unless while until for foreach given when default);
728 38         400 @is_block_without_semicolon{@q} = (1) x scalar(@q);
729              
730             # We will allow semicolons to be added within these block types
731             # as well as sub and package blocks.
732             # NOTES:
733             # 1. Note that these keywords are omitted:
734             # switch case given when default sort map grep
735             # 2. It is also ok to add for sub and package blocks and a labeled block
736             # 3. But not okay for other perltidy types including:
737             # { } ; G t
738             # 4. Test files: blktype.t, blktype1.t, semicolon.t
739 38         251 @q =
740             qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
741             unless do while until eval for foreach );
742 38         337 @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
743              
744             # 'L' is token for opening { at hash key
745 38         116 @q = qw< L { ( [ >;
746 38         189 @is_opening_type{@q} = (1) x scalar(@q);
747              
748             # 'R' is token for closing } at hash key
749 38         143 @q = qw< R } ) ] >;
750 38         253 @is_closing_type{@q} = (1) x scalar(@q);
751              
752 38         120 @q = qw< { ( [ >;
753 38         231 @is_opening_token{@q} = (1) x scalar(@q);
754              
755 38         147 @q = qw< } ) ] >;
756 38         187 @is_closing_token{@q} = (1) x scalar(@q);
757              
758 38         104 @q = qw( ? : );
759 38         142 @is_ternary{@q} = (1) x scalar(@q);
760              
761 38         99 @q = qw< { ( [ ? >;
762 38         137 @is_opening_sequence_token{@q} = (1) x scalar(@q);
763              
764 38         93 @q = qw< } ) ] : >;
765 38         98 @is_closing_sequence_token{@q} = (1) x scalar(@q);
766              
767 38         181 %matching_token = (
768             '{' => '}',
769             '(' => ')',
770             '[' => ']',
771             '?' => ':',
772              
773             '}' => '{',
774             ')' => '(',
775             ']' => '[',
776             ':' => '?',
777             );
778              
779             # a hash needed by sub break_lists for labeling containers
780 38         102 @q = qw( k => && || ? : . );
781 38         157 @is_container_label_type{@q} = (1) x scalar(@q);
782              
783 38         98 @q = qw( die confess croak warn );
784 38         142 @is_die_confess_croak_warn{@q} = (1) x scalar(@q);
785              
786 38         109 @q = qw( my our local );
787 38         140 @is_my_our_local{@q} = (1) x scalar(@q);
788              
789             # Braces -bbht etc must follow these. Note: experimentation with
790             # including a simple comma shows that it adds little and can lead
791             # to poor formatting in complex lists.
792 38         78 @q = qw( = => );
793 38         108 @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
794              
795 38         101 @q = qw( => ; h f );
796 38         96 push @q, ',';
797 38         132 @is_counted_type{@q} = (1) x scalar(@q);
798              
799             # Tokens where --keep-old-break-xxx flags make soft breaks instead
800             # of hard breaks. See b1433 and b1436.
801             # NOTE: $type is used as the hash key for now; if other container tokens
802             # are added it might be necessary to use a token/type mixture.
803 38         127 @q = qw# -> ? : && || + - / * #;
804 38         206 @is_soft_keep_break_type{@q} = (1) x scalar(@q);
805              
806             # these functions allow an identifier in the indirect object slot
807 38         160 @q = qw( print printf sort exec system say);
808 38         249 @is_indirect_object_taker{@q} = (1) x scalar(@q);
809              
810             # Define here tokens which may follow the closing brace of a do statement
811             # on the same line, as in:
812             # } while ( $something);
813 38         200 my @dof = qw(until while unless if ; : );
814 38         136 push @dof, ',';
815 38         242 @is_do_follower{@dof} = (1) x scalar(@dof);
816              
817             # what can follow a multi-line anonymous sub definition closing curly:
818 38         148 my @asf = qw# ; : => or and && || ~~ !~~ ) #;
819 38         101 push @asf, ',';
820 38         214 @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
821              
822             # what can follow a one-line anonymous sub closing curly:
823             # one-line anonymous subs also have ']' here...
824             # see tk3.t and PP.pm
825 38         152 my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
826 38         80 push @asf1, ',';
827 38         188 @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
828              
829             # What can follow a closing curly of a block
830             # which is not an if/elsif/else/do/sort/map/grep/eval/sub
831             # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
832 38         122 my @obf = qw# ; : => or and && || ) #;
833 38         97 push @obf, ',';
834 38         75763 @is_other_brace_follower{@obf} = (1) x scalar(@obf);
835              
836             } ## end BEGIN
837              
838             { ## begin closure to count instances
839              
840             # methods to count instances
841             my $_count = 0;
842 555     555   2565 sub _increment_count { return ++$_count }
843 555     555   1205 sub _decrement_count { return --$_count }
844             } ## end closure to count instances
845              
846             sub new {
847              
848 555     555 0 3363 my ( $class, @args ) = @_;
849              
850             # we are given an object with a write_line() method to take lines
851             my %defaults = (
852             sink_object => undef,
853             diagnostics_object => undef,
854             logger_object => undef,
855 0     0   0 length_function => sub { return length( $_[0] ) },
856 555         6430 is_encoded_data => EMPTY_STRING,
857             fh_tee => undef,
858             );
859 555         4350 my %args = ( %defaults, @args );
860              
861 555         2277 my $length_function = $args{length_function};
862 555         1577 my $is_encoded_data = $args{is_encoded_data};
863 555         1377 my $fh_tee = $args{fh_tee};
864 555         1414 my $logger_object = $args{logger_object};
865 555         1227 my $diagnostics_object = $args{diagnostics_object};
866              
867             # we create another object with a get_line() and peek_ahead() method
868 555         1749 my $sink_object = $args{sink_object};
869 555         4028 my $file_writer_object =
870             Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
871              
872             # initialize closure variables...
873 555         2911 set_logger_object($logger_object);
874 555         2311 set_diagnostics_object($diagnostics_object);
875 555         2653 initialize_lp_vars();
876 555         2955 initialize_csc_vars();
877 555         2987 initialize_break_lists();
878 555         2840 initialize_undo_ci();
879 555         2885 initialize_process_line_of_CODE();
880 555         2902 initialize_grind_batch_of_CODE();
881 555         2740 initialize_get_final_indentation();
882 555         2481 initialize_postponed_breakpoint();
883 555         2311 initialize_batch_variables();
884 555         2828 initialize_write_line();
885              
886 555         4933 my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
887             rOpts => $rOpts,
888             file_writer_object => $file_writer_object,
889             logger_object => $logger_object,
890             diagnostics_object => $diagnostics_object,
891             length_function => $length_function,
892             );
893              
894 555         3063 write_logfile_entry("\nStarting tokenization pass...\n");
895              
896 555 100       3480 if ( $rOpts->{'entab-leading-whitespace'} ) {
    50          
897 2         14 write_logfile_entry(
898             "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
899             );
900             }
901             elsif ( $rOpts->{'tabs'} ) {
902 0         0 write_logfile_entry("Indentation will be with a tab character\n");
903             }
904             else {
905 553         2891 write_logfile_entry(
906             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
907             }
908              
909             # Initialize the $self array reference.
910             # To add an item, first add a constant index in the BEGIN block above.
911 555         2461 my $self = [];
912              
913             # Basic data structures...
914 555         1892 $self->[_rlines_] = []; # = ref to array of lines of the file
915              
916             # 'rLL' = reference to the continuous liner array of all tokens in a file.
917             # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
918             # 'LL' stuck because it is easy to type. The 'rLL' array is updated
919             # by sub 'respace_tokens' during reformatting. The indexes in 'rLL' begin
920             # with '$K' by convention.
921 555         1650 $self->[_rLL_] = [];
922 555         1496 $self->[_Klimit_] = undef; # = maximum K index for rLL.
923              
924             # Indexes into the rLL list
925 555         1661 $self->[_K_opening_container_] = {};
926 555         1646 $self->[_K_closing_container_] = {};
927 555         1547 $self->[_K_opening_ternary_] = {};
928 555         1504 $self->[_K_closing_ternary_] = {};
929 555         1482 $self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
930              
931             # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
932             # numbers with + or - indicating opening or closing. This list represents
933             # the entire container tree and is invariant under reformatting. It can be
934             # used to quickly travel through the tree. Indexes in the rSS array begin
935             # with '$I' by convention. The 'Iss' arrays give the indexes in this list
936             # of opening and closing sequence numbers.
937 555         1445 $self->[_rSS_] = [];
938 555         1822 $self->[_Iss_opening_] = [];
939 555         1406 $self->[_Iss_closing_] = [];
940              
941             # Arrays to help traverse the tree
942 555         1360 $self->[_rdepth_of_opening_seqno_] = [];
943 555         1325 $self->[_rblock_type_of_seqno_] = {};
944 555         1464 $self->[_ris_asub_block_] = {};
945 555         1419 $self->[_ris_sub_block_] = {};
946              
947             # Mostly list characteristics and processing flags
948 555         1368 $self->[_rtype_count_by_seqno_] = {};
949 555         1465 $self->[_ris_function_call_paren_] = {};
950 555         1576 $self->[_rlec_count_by_seqno_] = {};
951 555         1457 $self->[_ris_broken_container_] = {};
952 555         1509 $self->[_ris_permanently_broken_] = {};
953 555         1545 $self->[_rblank_and_comment_count_] = {};
954 555         1811 $self->[_rhas_list_] = {};
955 555         1444 $self->[_rhas_broken_list_] = {};
956 555         1531 $self->[_rhas_broken_list_with_lec_] = {};
957 555         1512 $self->[_rfirst_comma_line_index_] = {};
958 555         1525 $self->[_rhas_code_block_] = {};
959 555         1691 $self->[_rhas_broken_code_block_] = {};
960 555         1518 $self->[_rhas_ternary_] = {};
961 555         1528 $self->[_ris_excluded_lp_container_] = {};
962 555         1519 $self->[_rlp_object_by_seqno_] = {};
963 555         1482 $self->[_rwant_reduced_ci_] = {};
964 555         1366 $self->[_rno_xci_by_seqno_] = {};
965 555         1637 $self->[_rbrace_left_] = {};
966 555         1351 $self->[_ris_bli_container_] = {};
967 555         1338 $self->[_rparent_of_seqno_] = {};
968 555         1451 $self->[_rchildren_of_seqno_] = {};
969 555         1391 $self->[_ris_list_by_seqno_] = {};
970 555         1416 $self->[_ris_cuddled_closing_brace_] = {};
971              
972 555         1460 $self->[_rbreak_container_] = {}; # prevent one-line blocks
973 555         1662 $self->[_rshort_nested_] = {}; # blocks not forced open
974 555         1248 $self->[_length_function_] = $length_function;
975 555         1378 $self->[_is_encoded_data_] = $is_encoded_data;
976              
977             # Some objects...
978 555         1326 $self->[_fh_tee_] = $fh_tee;
979 555         1203 $self->[_sink_object_] = $sink_object;
980 555         1199 $self->[_file_writer_object_] = $file_writer_object;
981 555         1260 $self->[_vertical_aligner_object_] = $vertical_aligner_object;
982 555         1252 $self->[_logger_object_] = $logger_object;
983              
984             # Reference to the batch being processed
985 555         1559 $self->[_this_batch_] = [];
986              
987             # Memory of processed text...
988 555         1363 $self->[_ris_special_identifier_token_] = {};
989 555         1446 $self->[_last_line_leading_level_] = 0;
990 555         1574 $self->[_last_line_leading_type_] = '#';
991 555         1339 $self->[_last_output_short_opening_token_] = 0;
992 555         1250 $self->[_added_semicolon_count_] = 0;
993 555         1269 $self->[_first_added_semicolon_at_] = 0;
994 555         1313 $self->[_last_added_semicolon_at_] = 0;
995 555         1332 $self->[_deleted_semicolon_count_] = 0;
996 555         1413 $self->[_first_deleted_semicolon_at_] = 0;
997 555         1551 $self->[_last_deleted_semicolon_at_] = 0;
998 555         1215 $self->[_embedded_tab_count_] = 0;
999 555         1346 $self->[_first_embedded_tab_at_] = 0;
1000 555         1374 $self->[_last_embedded_tab_at_] = 0;
1001 555         1702 $self->[_first_tabbing_disagreement_] = 0;
1002 555         1256 $self->[_last_tabbing_disagreement_] = 0;
1003 555         1310 $self->[_tabbing_disagreement_count_] = 0;
1004 555         1130 $self->[_in_tabbing_disagreement_] = 0;
1005 555         1567 $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
1006 555         1625 $self->[_saw_END_or_DATA_] = 0;
1007 555         1396 $self->[_first_brace_tabbing_disagreement_] = undef;
1008 555         1300 $self->[_in_brace_tabbing_disagreement_] = undef;
1009              
1010             # Hashes related to container welding...
1011 555         1427 $self->[_radjusted_levels_] = [];
1012              
1013             # Weld data structures
1014 555         1735 $self->[_rK_weld_left_] = {};
1015 555         1528 $self->[_rK_weld_right_] = {};
1016 555         1448 $self->[_rweld_len_right_at_K_] = {};
1017              
1018             # -xci stuff
1019 555         1551 $self->[_rseqno_controlling_my_ci_] = {};
1020 555         1500 $self->[_ris_seqno_controlling_ci_] = {};
1021              
1022 555         1491 $self->[_rspecial_side_comment_type_] = {};
1023 555         1318 $self->[_maximum_level_] = 0;
1024 555         1293 $self->[_maximum_level_at_line_] = 0;
1025 555         1213 $self->[_maximum_BLOCK_level_] = 0;
1026 555         1249 $self->[_maximum_BLOCK_level_at_line_] = 0;
1027              
1028 555         1354 $self->[_rKrange_code_without_comments_] = [];
1029 555         1293 $self->[_rbreak_before_Kfirst_] = {};
1030 555         1764 $self->[_rbreak_after_Klast_] = {};
1031 555         1787 $self->[_converged_] = 0;
1032              
1033             # qw stuff
1034 555         1577 $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
1035 555         1641 $self->[_rending_multiline_qw_seqno_by_K_] = {};
1036 555         1757 $self->[_rKrange_multiline_qw_by_seqno_] = {};
1037 555         1628 $self->[_rmultiline_qw_has_extra_level_] = {};
1038              
1039 555         1315 $self->[_rcollapsed_length_by_seqno_] = {};
1040 555         1420 $self->[_rbreak_before_container_by_seqno_] = {};
1041 555         1394 $self->[_roverride_cab3_] = {};
1042 555         1283 $self->[_ris_assigned_structure_] = {};
1043 555         1339 $self->[_ris_short_broken_eval_block_] = {};
1044 555         1433 $self->[_ris_bare_trailing_comma_by_seqno_] = {};
1045              
1046 555         1311 $self->[_rseqno_non_indenting_brace_by_ix_] = {};
1047 555         1368 $self->[_rmax_vertical_tightness_] = {};
1048              
1049 555         1311 $self->[_no_vertical_tightness_flags_] = 0;
1050              
1051             # This flag will be updated later by a call to get_save_logfile()
1052 555         1637 $self->[_save_logfile_] = defined($logger_object);
1053              
1054             # Be sure all variables in $self have been initialized above. To find the
1055             # correspondence of index numbers and array names, copy a list to a file
1056             # and use the unix 'nl' command to number lines 1..
1057 555         1026 if (DEVEL_MODE) {
1058             my @non_existant;
1059             foreach ( 0 .. _LAST_SELF_INDEX_ ) {
1060             if ( !exists( $self->[$_] ) ) {
1061             push @non_existant, $_;
1062             }
1063             }
1064             if (@non_existant) {
1065             Fault("These indexes in self not initialized: (@non_existant)\n");
1066             }
1067             }
1068              
1069 555         1525 bless $self, $class;
1070              
1071             # Safety check..this is not a class yet
1072 555 50       2546 if ( _increment_count() > 1 ) {
1073 0         0 confess
1074             "Attempt to create more than 1 object in $class, which is not a true class yet\n";
1075             }
1076 555         5725 return $self;
1077             } ## end sub new
1078              
1079             ######################################
1080             # CODE SECTION 2: Some Basic Utilities
1081             ######################################
1082              
1083             sub check_rLL {
1084              
1085             # Verify that the rLL array has not been auto-vivified
1086 0     0 0 0 my ( $self, $msg ) = @_;
1087 0         0 my $rLL = $self->[_rLL_];
1088 0         0 my $Klimit = $self->[_Klimit_];
1089 0         0 my $num = @{$rLL};
  0         0  
1090 0 0 0     0 if ( ( defined($Klimit) && $Klimit != $num - 1 )
      0        
      0        
1091             || ( !defined($Klimit) && $num > 0 ) )
1092             {
1093              
1094             # This fault can occur if the array has been accessed for an index
1095             # greater than $Klimit, which is the last token index. Just accessing
1096             # the array above index $Klimit, not setting a value, can cause @rLL to
1097             # increase beyond $Klimit. If this occurs, the problem can be located
1098             # by making calls to this routine at different locations in
1099             # sub 'finish_formatting'.
1100 0 0       0 $Klimit = 'undef' if ( !defined($Klimit) );
1101 0 0       0 $msg = EMPTY_STRING unless $msg;
1102 0         0 Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
1103             }
1104 0         0 return;
1105             } ## end sub check_rLL
1106              
1107             sub check_keys {
1108 0     0 0 0 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
1109              
1110             # Check the keys of a hash:
1111             # $rtest = ref to hash to test
1112             # $rvalid = ref to hash with valid keys
1113              
1114             # $msg = a message to write in case of error
1115             # $exact_match defines the type of check:
1116             # = false: test hash must not have unknown key
1117             # = true: test hash must have exactly same keys as known hash
1118             my @unknown_keys =
1119 0         0 grep { !exists $rvalid->{$_} } keys %{$rtest};
  0         0  
  0         0  
1120             my @missing_keys =
1121 0         0 grep { !exists $rtest->{$_} } keys %{$rvalid};
  0         0  
  0         0  
1122 0         0 my $error = @unknown_keys;
1123 0 0 0     0 if ($exact_match) { $error ||= @missing_keys }
  0         0  
1124 0 0       0 if ($error) {
1125 0         0 local $LIST_SEPARATOR = ')(';
1126 0         0 my @expected_keys = sort keys %{$rvalid};
  0         0  
1127 0         0 @unknown_keys = sort @unknown_keys;
1128 0         0 Fault(<<EOM);
1129             ------------------------------------------------------------------------
1130             Program error detected checking hash keys
1131             Message is: '$msg'
1132             Expected keys: (@expected_keys)
1133             Unknown key(s): (@unknown_keys)
1134             Missing key(s): (@missing_keys)
1135             ------------------------------------------------------------------------
1136             EOM
1137             }
1138 0         0 return;
1139             } ## end sub check_keys
1140              
1141             sub check_token_array {
1142 0     0 0 0 my $self = shift;
1143              
1144             # Check for errors in the array of tokens. This is only called
1145             # when the DEVEL_MODE flag is set, so this Fault will only occur
1146             # during code development.
1147 0         0 my $rLL = $self->[_rLL_];
1148 0         0 foreach my $KK ( 0 .. @{$rLL} - 1 ) {
  0         0  
1149 0         0 my $nvars = @{ $rLL->[$KK] };
  0         0  
1150 0 0       0 if ( $nvars != _NVARS ) {
1151 0         0 my $NVARS = _NVARS;
1152 0         0 my $type = $rLL->[$KK]->[_TYPE_];
1153 0 0       0 $type = '*' unless defined($type);
1154              
1155             # The number of variables per token node is _NVARS and was set when
1156             # the array indexes were generated. So if the number of variables
1157             # is different we have done something wrong, like not store all of
1158             # them in sub 'write_line' when they were received from the
1159             # tokenizer.
1160 0         0 Fault(
1161             "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
1162             );
1163             }
1164 0         0 foreach my $var ( _TOKEN_, _TYPE_ ) {
1165 0 0       0 if ( !defined( $rLL->[$KK]->[$var] ) ) {
1166 0         0 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
1167              
1168             # This is a simple check that each token has some basic
1169             # variables. In other words, that there are no holes in the
1170             # array of tokens. Sub 'write_line' pushes tokens into the
1171             # $rLL array, so this should guarantee no gaps.
1172 0         0 Fault("Undefined variable $var for K=$KK, line=$iline\n");
1173             }
1174             }
1175             }
1176 0         0 return;
1177             } ## end sub check_token_array
1178              
1179             { ## begin closure check_line_hashes
1180              
1181             # This code checks that no auto-vivification occurs in the 'line' hash
1182              
1183             my %valid_line_hash;
1184              
1185             BEGIN {
1186              
1187             # These keys are defined for each line in the formatter
1188             # Each line must have exactly these quantities
1189 38     38   295 my @valid_line_keys = qw(
1190             _curly_brace_depth
1191             _ending_in_quote
1192             _guessed_indentation_level
1193             _line_number
1194             _line_text
1195             _line_type
1196             _paren_depth
1197             _quote_character
1198             _rK_range
1199             _square_bracket_depth
1200             _starting_in_quote
1201             _ended_in_blank_token
1202             _code_type
1203              
1204             _ci_level_0
1205             _level_0
1206             _nesting_blocks_0
1207             _nesting_tokens_0
1208             );
1209              
1210 38         110514 @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
1211             } ## end BEGIN
1212              
1213             sub check_line_hashes {
1214 0     0 0 0 my $self = shift;
1215 0         0 my $rlines = $self->[_rlines_];
1216 0         0 foreach my $rline ( @{$rlines} ) {
  0         0  
1217 0         0 my $iline = $rline->{_line_number};
1218 0         0 my $line_type = $rline->{_line_type};
1219 0         0 check_keys( $rline, \%valid_line_hash,
1220             "Checkpoint: line number =$iline, line_type=$line_type", 1 );
1221             }
1222 0         0 return;
1223             } ## end sub check_line_hashes
1224             } ## end closure check_line_hashes
1225              
1226             { ## begin closure for logger routines
1227             my $logger_object;
1228              
1229             # Called once per file to initialize the logger object
1230             sub set_logger_object {
1231 555     555 0 1509 $logger_object = shift;
1232 555         1197 return;
1233             }
1234              
1235             sub get_logger_object {
1236 0     0 0 0 return $logger_object;
1237             }
1238              
1239             sub get_input_stream_name {
1240 0     0 0 0 my $input_stream_name = EMPTY_STRING;
1241 0 0       0 if ($logger_object) {
1242 0         0 $input_stream_name = $logger_object->get_input_stream_name();
1243             }
1244 0         0 return $input_stream_name;
1245             } ## end sub get_input_stream_name
1246              
1247             # interface to Perl::Tidy::Logger routines
1248             sub warning {
1249 0     0 0 0 my ( $msg, $msg_line_number ) = @_;
1250 0 0       0 if ($logger_object) {
1251 0         0 $logger_object->warning( $msg, $msg_line_number );
1252             }
1253 0         0 return;
1254             }
1255              
1256             sub complain {
1257 0     0 0 0 my ( $msg, $msg_line_number ) = @_;
1258 0 0       0 if ($logger_object) {
1259 0         0 $logger_object->complain( $msg, $msg_line_number );
1260             }
1261 0         0 return;
1262             } ## end sub complain
1263              
1264             sub write_logfile_entry {
1265 2994     2994 0 7807 my @msg = @_;
1266 2994 100       7455 if ($logger_object) {
1267 2984         8610 $logger_object->write_logfile_entry(@msg);
1268             }
1269 2994         6313 return;
1270             } ## end sub write_logfile_entry
1271              
1272             sub get_saw_brace_error {
1273 555 100   555 0 1944 if ($logger_object) {
1274 553         2451 return $logger_object->get_saw_brace_error();
1275             }
1276 2         17 return;
1277             } ## end sub get_saw_brace_error
1278              
1279             sub we_are_at_the_last_line {
1280 555 100   555 0 2093 if ($logger_object) {
1281 553         4050 $logger_object->we_are_at_the_last_line();
1282             }
1283 555         1070 return;
1284             } ## end sub we_are_at_the_last_line
1285              
1286             } ## end closure for logger routines
1287              
1288             { ## begin closure for diagnostics routines
1289             my $diagnostics_object;
1290              
1291             # Called once per file to initialize the diagnostics object
1292             sub set_diagnostics_object {
1293 555     555 0 1316 $diagnostics_object = shift;
1294 555         1108 return;
1295             }
1296              
1297             # Available for debugging but not currently used:
1298             sub write_diagnostics {
1299 0     0 0 0 my ( $msg, $line_number ) = @_;
1300 0 0       0 if ($diagnostics_object) {
1301 0         0 $diagnostics_object->write_diagnostics( $msg, $line_number );
1302             }
1303 0         0 return;
1304             } ## end sub write_diagnostics
1305             } ## end closure for diagnostics routines
1306              
1307             sub get_convergence_check {
1308 5     5 0 15 my ($self) = @_;
1309 5         33 return $self->[_converged_];
1310             }
1311              
1312             sub get_output_line_number {
1313 43     43 0 140 my ($self) = @_;
1314 43         80 my $vao = $self->[_vertical_aligner_object_];
1315 43         192 return $vao->get_output_line_number();
1316             }
1317              
1318             sub want_blank_line {
1319 20     20 0 53 my $self = shift;
1320 20         70 $self->flush();
1321 20         76 my $file_writer_object = $self->[_file_writer_object_];
1322 20         127 $file_writer_object->want_blank_line();
1323 20         49 return;
1324             } ## end sub want_blank_line
1325              
1326             sub write_unindented_line {
1327 255     255 0 548 my ( $self, $line ) = @_;
1328 255         705 $self->flush();
1329 255         442 my $file_writer_object = $self->[_file_writer_object_];
1330 255         842 $file_writer_object->write_line($line);
1331 255         502 return;
1332             } ## end sub write_unindented_line
1333              
1334             sub consecutive_nonblank_lines {
1335 1     1 0 3 my ($self) = @_;
1336 1         3 my $file_writer_object = $self->[_file_writer_object_];
1337 1         3 my $vao = $self->[_vertical_aligner_object_];
1338 1         7 return $file_writer_object->get_consecutive_nonblank_lines() +
1339             $vao->get_cached_line_count();
1340             } ## end sub consecutive_nonblank_lines
1341              
1342             sub split_words {
1343              
1344             # given a string containing words separated by whitespace,
1345             # return the list of words
1346 7213     7213 0 14267 my ($str) = @_;
1347 7213 100       25978 return unless $str;
1348 2265         8378 $str =~ s/\s+$//;
1349 2265         4951 $str =~ s/^\s+//;
1350 2265         10838 return split( /\s+/, $str );
1351             } ## end sub split_words
1352              
1353             ###########################################
1354             # CODE SECTION 3: Check and process options
1355             ###########################################
1356              
1357             sub check_options {
1358              
1359             # This routine is called to check the user-supplied run parameters
1360             # and to configure the control hashes to them.
1361 554     554 0 1650 $rOpts = shift;
1362              
1363 554         1516 $controlled_comma_style = 0;
1364              
1365 554         3214 initialize_whitespace_hashes();
1366 554         3842 initialize_bond_strength_hashes();
1367              
1368             # This function must be called early to get hashes with grep initialized
1369 554         3274 initialize_grep_and_friends();
1370              
1371             # Make needed regex patterns for matching text.
1372             # NOTE: sub_matching_patterns must be made first because later patterns use
1373             # them; see RT #133130.
1374 554         3667 make_sub_matching_pattern(); # must be first pattern made
1375 554         2765 make_static_block_comment_pattern();
1376 554         2770 make_static_side_comment_pattern();
1377 554         2507 make_closing_side_comment_prefix();
1378 554         2864 make_closing_side_comment_list_pattern();
1379 554         2537 $format_skipping_pattern_begin =
1380             make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
1381 554         2084 $format_skipping_pattern_end =
1382             make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
1383 554         4295 make_non_indenting_brace_pattern();
1384              
1385             # If closing side comments ARE selected, then we can safely
1386             # delete old closing side comments unless closing side comment
1387             # warnings are requested. This is a good idea because it will
1388             # eliminate any old csc's which fall below the line count threshold.
1389             # We cannot do this if warnings are turned on, though, because we
1390             # might delete some text which has been added. So that must
1391             # be handled when comments are created. And we cannot do this
1392             # with -io because -csc will be skipped altogether.
1393 554 100       3489 if ( $rOpts->{'closing-side-comments'} ) {
    50          
1394 4 50 33     38 if ( !$rOpts->{'closing-side-comment-warnings'}
1395             && !$rOpts->{'indent-only'} )
1396             {
1397 4         16 $rOpts->{'delete-closing-side-comments'} = 1;
1398             }
1399             }
1400              
1401             # If closing side comments ARE NOT selected, but warnings ARE
1402             # selected and we ARE DELETING csc's, then we will pretend to be
1403             # adding with a huge interval. This will force the comments to be
1404             # generated for comparison with the old comments, but not added.
1405             elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
1406 0 0       0 if ( $rOpts->{'delete-closing-side-comments'} ) {
1407 0         0 $rOpts->{'delete-closing-side-comments'} = 0;
1408 0         0 $rOpts->{'closing-side-comments'} = 1;
1409 0         0 $rOpts->{'closing-side-comment-interval'} = 100_000_000;
1410             }
1411             }
1412              
1413 554         2947 make_bli_pattern();
1414              
1415 554         3552 make_bl_pattern();
1416              
1417 554         3424 make_block_brace_vertical_tightness_pattern();
1418              
1419 554         2351 make_blank_line_pattern();
1420              
1421 554         2535 make_keyword_group_list_pattern();
1422              
1423 554         2725 prepare_cuddled_block_types();
1424              
1425 554 50       2536 if ( $rOpts->{'dump-cuddled-block-list'} ) {
1426 0         0 dump_cuddled_block_list(*STDOUT);
1427 0         0 Exit(0);
1428             }
1429              
1430             # -xlp implies -lp
1431 554 100       2412 if ( $rOpts->{'extended-line-up-parentheses'} ) {
1432 3   100     19 $rOpts->{'line-up-parentheses'} ||= 1;
1433             }
1434              
1435 554 100       2343 if ( $rOpts->{'line-up-parentheses'} ) {
1436              
1437 30 50 33     350 if ( $rOpts->{'indent-only'}
      33        
1438             || !$rOpts->{'add-newlines'}
1439             || !$rOpts->{'delete-old-newlines'} )
1440             {
1441 0         0 Warn(<<EOM);
1442             -----------------------------------------------------------------------
1443             Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
1444            
1445             The -lp indentation logic requires that perltidy be able to coordinate
1446             arbitrarily large numbers of line breakpoints. This isn't possible
1447             with these flags.
1448             -----------------------------------------------------------------------
1449             EOM
1450 0         0 $rOpts->{'line-up-parentheses'} = 0;
1451 0         0 $rOpts->{'extended-line-up-parentheses'} = 0;
1452             }
1453              
1454 30 50       167 if ( $rOpts->{'whitespace-cycle'} ) {
1455 0         0 Warn(<<EOM);
1456             Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
1457             EOM
1458 0         0 $rOpts->{'whitespace-cycle'} = 0;
1459             }
1460             }
1461              
1462             # At present, tabs are not compatible with the line-up-parentheses style
1463             # (it would be possible to entab the total leading whitespace
1464             # just prior to writing the line, if desired).
1465 554 50 66     2354 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
1466 0         0 Warn(<<EOM);
1467             Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
1468             EOM
1469 0         0 $rOpts->{'tabs'} = 0;
1470             }
1471              
1472             # Likewise, tabs are not compatible with outdenting..
1473 554 50 66     2179 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
1474 0         0 Warn(<<EOM);
1475             Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
1476             EOM
1477 0         0 $rOpts->{'tabs'} = 0;
1478             }
1479              
1480 554 50 66     4346 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
1481 0         0 Warn(<<EOM);
1482             Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
1483             EOM
1484 0         0 $rOpts->{'tabs'} = 0;
1485             }
1486              
1487 554 100       2189 if ( !$rOpts->{'space-for-semicolon'} ) {
1488 13         48 $want_left_space{'f'} = -1;
1489             }
1490              
1491 554 100       2205 if ( $rOpts->{'space-terminal-semicolon'} ) {
1492 2         8 $want_left_space{';'} = 1;
1493             }
1494              
1495             # We should put an upper bound on any -sil=n value. Otherwise enormous
1496             # files could be created by mistake.
1497 554         2257 for ( $rOpts->{'starting-indentation-level'} ) {
1498 554 50 33     3034 if ( $_ && $_ > 100 ) {
1499 0         0 Warn(<<EOM);
1500             The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
1501             EOM
1502 0         0 $_ = 0;
1503             }
1504             }
1505              
1506             # Require -msp > 0 to avoid future parsing problems (issue c147)
1507 554         1956 for ( $rOpts->{'minimum-space-to-comment'} ) {
1508 554 50 33     3912 if ( !$_ || $_ <= 0 ) { $_ = 1 }
  0         0  
1509             }
1510              
1511             # implement outdenting preferences for keywords
1512 554         2586 %outdent_keyword = ();
1513 554         2782 my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
1514 554 100       3174 unless (@okw) {
1515 553         2801 @okw = qw(next last redo goto return); # defaults
1516             }
1517              
1518             # FUTURE: if not a keyword, assume that it is an identifier
1519 554         1775 foreach (@okw) {
1520 2766 50       7546 if ( Perl::Tidy::Tokenizer::is_keyword($_) ) {
1521 2766         6937 $outdent_keyword{$_} = 1;
1522             }
1523             else {
1524 0         0 Warn("ignoring '$_' in -okwl list; not a perl keyword");
1525             }
1526             }
1527              
1528             # setup hash for -kpit option
1529 554         2148 %keyword_paren_inner_tightness = ();
1530 554         2057 my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
1531 554 100 66     3765 if ( defined($kpit_value) && $kpit_value != 1 ) {
1532             my @kpit =
1533 2         10 split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
1534 2 100       13 unless (@kpit) {
1535 1         5 @kpit = qw(if elsif unless while until for foreach); # defaults
1536             }
1537              
1538             # we will allow keywords and user-defined identifiers
1539 2         10 foreach (@kpit) {
1540 9         20 $keyword_paren_inner_tightness{$_} = $kpit_value;
1541             }
1542             }
1543              
1544             # implement user whitespace preferences
1545 554 100       2392 if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
1546 5         28 @want_left_space{@q} = (1) x scalar(@q);
1547             }
1548              
1549 554 100       2626 if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
1550 5         64 @want_right_space{@q} = (1) x scalar(@q);
1551             }
1552              
1553 554 100       2405 if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
1554 6         41 @want_left_space{@q} = (-1) x scalar(@q);
1555             }
1556              
1557 554 100       2421 if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
1558 7         40 @want_right_space{@q} = (-1) x scalar(@q);
1559             }
1560 554 50       2500 if ( $rOpts->{'dump-want-left-space'} ) {
1561 0         0 dump_want_left_space(*STDOUT);
1562 0         0 Exit(0);
1563             }
1564              
1565 554 50       2234 if ( $rOpts->{'dump-want-right-space'} ) {
1566 0         0 dump_want_right_space(*STDOUT);
1567 0         0 Exit(0);
1568             }
1569              
1570 554         3048 initialize_space_after_keyword();
1571              
1572 554         2785 initialize_extended_block_tightness_list();
1573              
1574 554         3046 initialize_token_break_preferences();
1575              
1576             #--------------------------------------------------------------
1577             # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
1578             #--------------------------------------------------------------
1579             # The -vmll and -lp parameters do not really work well together.
1580             # To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable).
1581             # NOTE: we could make this more precise by looking at any exclusion
1582             # flags for -lp, and allowing -bbx=2 for excluded types.
1583 554 0 66     2424 if ( $rOpts->{'variable-maximum-line-length'}
      33        
1584             && $rOpts->{'ignore-old-breakpoints'}
1585             && $rOpts->{'line-up-parentheses'} )
1586             {
1587 0         0 my @changed;
1588 0         0 foreach my $key ( keys %break_before_container_types ) {
1589 0 0       0 if ( $break_before_container_types{$key} == 2 ) {
1590 0         0 $break_before_container_types{$key} = 1;
1591 0         0 push @changed, $key;
1592             }
1593             }
1594 0 0       0 if (@changed) {
1595              
1596             # we could write a warning here
1597             }
1598             }
1599              
1600             #-----------------------------------------------------------
1601             # The combination -lp -vmll can be unstable if -ci<2 (b1267)
1602             #-----------------------------------------------------------
1603             # The -vmll and -lp parameters do not really work well together.
1604             # This is a very crude fix for an unusual parameter combination.
1605 554 50 66     2232 if ( $rOpts->{'variable-maximum-line-length'}
      33        
1606             && $rOpts->{'line-up-parentheses'}
1607             && $rOpts->{'continuation-indentation'} < 2 )
1608             {
1609 0         0 $rOpts->{'continuation-indentation'} = 2;
1610             ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
1611             }
1612              
1613             #-----------------------------------------------------------
1614             # The combination -lp -vmll -atc -dtc can be unstable
1615             #-----------------------------------------------------------
1616             # This fixes b1386 b1387 b1388 which had -wtc='b'
1617             # Updated to to include any -wtc to fix b1426
1618 554 0 66     2055 if ( $rOpts->{'variable-maximum-line-length'}
      33        
      0        
      0        
1619             && $rOpts->{'line-up-parentheses'}
1620             && $rOpts->{'add-trailing-commas'}
1621             && $rOpts->{'delete-trailing-commas'}
1622             && $rOpts->{'want-trailing-commas'} )
1623             {
1624 0         0 $rOpts->{'delete-trailing-commas'} = 0;
1625             ## Issuing a warning message causes trouble with test cases, and this combo is
1626             ## so rare that it is unlikely to not occur in practice. So skip warning.
1627             ## Warn(
1628             ##"The combination -vmll -lp -atc -dtc can be unstable; turning off -dtc\n"
1629             ## );
1630             }
1631              
1632 554         1512 %container_indentation_options = ();
1633 554         3430 foreach my $pair (
1634             [ 'break-before-hash-brace-and-indent', '{' ],
1635             [ 'break-before-square-bracket-and-indent', '[' ],
1636             [ 'break-before-paren-and-indent', '(' ],
1637             )
1638             {
1639 1662         2685 my ( $key, $tok ) = @{$pair};
  1662         3737  
1640 1662         3784 my $opt = $rOpts->{$key};
1641 1662 50 66     7554 if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
      66        
1642             {
1643              
1644             # (1) -lp is not compatible with opt=2, silently set to opt=0
1645             # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
1646             # (3) set opt=0 if -i < -ci (can be unstable, case b1355)
1647 5 100       17 if ( $opt == 2 ) {
1648 3 100 66     18 if (
1649             $rOpts->{'line-up-parentheses'}
1650             || ( $rOpts->{'indent-columns'} <=
1651             $rOpts->{'continuation-indentation'} )
1652             )
1653             {
1654 1         3 $opt = 0;
1655             }
1656             }
1657 5         13 $container_indentation_options{$tok} = $opt;
1658             }
1659             }
1660              
1661 554         3045 $right_bond_strength{'{'} = WEAK;
1662 554         1959 $left_bond_strength{'{'} = VERY_STRONG;
1663              
1664             # make -l=0 equal to -l=infinite
1665 554 100       2461 if ( !$rOpts->{'maximum-line-length'} ) {
1666 4         15 $rOpts->{'maximum-line-length'} = 1_000_000;
1667             }
1668              
1669             # make -lbl=0 equal to -lbl=infinite
1670 554 50       2365 if ( !$rOpts->{'long-block-line-count'} ) {
1671 0         0 $rOpts->{'long-block-line-count'} = 1_000_000;
1672             }
1673              
1674             # hashes used to simplify setting whitespace
1675             %tightness = (
1676             '{' => $rOpts->{'brace-tightness'},
1677             '}' => $rOpts->{'brace-tightness'},
1678             '(' => $rOpts->{'paren-tightness'},
1679             ')' => $rOpts->{'paren-tightness'},
1680             '[' => $rOpts->{'square-bracket-tightness'},
1681 554         4799 ']' => $rOpts->{'square-bracket-tightness'},
1682             );
1683              
1684 554 100       2322 if ( $rOpts->{'ignore-old-breakpoints'} ) {
1685              
1686 2         5 my @conflicts;
1687 2 50       12 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
1688 0         0 $rOpts->{'break-at-old-method-breakpoints'} = 0;
1689 0         0 push @conflicts, '--break-at-old-method-breakpoints (-bom)';
1690             }
1691 2 50       10 if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
1692 0         0 $rOpts->{'break-at-old-comma-breakpoints'} = 0;
1693 0         0 push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
1694             }
1695 2 50       9 if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
1696 0         0 $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
1697 0         0 push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
1698             }
1699 2 50       9 if ( $rOpts->{'keep-old-breakpoints-before'} ) {
1700 0         0 $rOpts->{'keep-old-breakpoints-before'} = EMPTY_STRING;
1701 0         0 push @conflicts, '--keep-old-breakpoints-before (-kbb)';
1702             }
1703 2 50       7 if ( $rOpts->{'keep-old-breakpoints-after'} ) {
1704 0         0 $rOpts->{'keep-old-breakpoints-after'} = EMPTY_STRING;
1705 0         0 push @conflicts, '--keep-old-breakpoints-after (-kba)';
1706             }
1707              
1708 2 50       8 if (@conflicts) {
1709 0         0 my $msg = join( "\n ",
1710             " Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
1711             @conflicts )
1712             . "\n";
1713 0         0 Warn($msg);
1714             }
1715              
1716             # Note: These additional parameters are made inactive by -iob.
1717             # They are silently turned off here because they are on by default.
1718             # We would generate unexpected warnings if we issued a warning.
1719 2         6 $rOpts->{'break-at-old-keyword-breakpoints'} = 0;
1720 2         8 $rOpts->{'break-at-old-logical-breakpoints'} = 0;
1721 2         4 $rOpts->{'break-at-old-ternary-breakpoints'} = 0;
1722 2         6 $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
1723             }
1724              
1725 554         1602 %keep_break_before_type = ();
1726 554         3666 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
1727             'kbb', \%keep_break_before_type );
1728              
1729 554         1645 %keep_break_after_type = ();
1730 554         2629 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
1731             'kba', \%keep_break_after_type );
1732              
1733             # Modify %keep_break_before and %keep_break_after to avoid conflicts
1734             # with %want_break_before; fixes b1436.
1735             # This became necessary after breaks for some tokens were converted
1736             # from hard to soft (see b1433).
1737             # We could do this for all tokens, but to minimize changes to existing
1738             # code we currently only do this for the soft break tokens.
1739 554         2765 foreach my $key ( keys %keep_break_before_type ) {
1740 2 50 66     16 if ( defined( $want_break_before{$key} )
      66        
1741             && !$want_break_before{$key}
1742             && $is_soft_keep_break_type{$key} )
1743             {
1744 0         0 $keep_break_after_type{$key} = $keep_break_before_type{$key};
1745 0         0 delete $keep_break_before_type{$key};
1746             }
1747             }
1748 554         2102 foreach my $key ( keys %keep_break_after_type ) {
1749 1 0 33     5 if ( defined( $want_break_before{$key} )
      0        
1750             && $want_break_before{$key}
1751             && $is_soft_keep_break_type{$key} )
1752             {
1753 0         0 $keep_break_before_type{$key} = $keep_break_after_type{$key};
1754 0         0 delete $keep_break_after_type{$key};
1755             }
1756             }
1757              
1758 554   66     3607 $controlled_comma_style ||= $keep_break_before_type{','};
1759 554   66     3494 $controlled_comma_style ||= $keep_break_after_type{','};
1760              
1761 554         2637 initialize_global_option_vars();
1762              
1763 554         2721 initialize_line_length_vars(); # after 'initialize_global_option_vars'
1764              
1765 554         2727 initialize_trailing_comma_rules(); # after 'initialize_line_length_vars'
1766              
1767 554         2972 initialize_weld_nested_exclusion_rules();
1768              
1769 554         2543 initialize_weld_fat_comma_rules();
1770              
1771 554         1322 %line_up_parentheses_control_hash = ();
1772 554         1338 $line_up_parentheses_control_is_lxpl = 1;
1773 554         1438 my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
1774 554         1269 my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
1775 554 50 66     2285 if ( $lpxl && $lpil ) {
1776 0         0 Warn( <<EOM );
1777             You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
1778             EOM
1779             }
1780 554 100       2736 if ($lpxl) {
    100          
1781 3         11 $line_up_parentheses_control_is_lxpl = 1;
1782             initialize_line_up_parentheses_control_hash(
1783 3         18 $rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
1784             }
1785             elsif ($lpil) {
1786 1         3 $line_up_parentheses_control_is_lxpl = 0;
1787             initialize_line_up_parentheses_control_hash(
1788 1         5 $rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
1789             }
1790              
1791 554         2588 return;
1792             } ## end sub check_options
1793              
1794 38     38   375 use constant ALIGN_GREP_ALIASES => 0;
  38         87  
  38         101357  
1795              
1796             sub initialize_grep_and_friends {
1797              
1798             # Initialize or re-initialize hashes with 'grep' and grep aliases. This
1799             # must be done after each set of options because new grep aliases may be
1800             # used.
1801              
1802             # re-initialize the hashes ... this is critical!
1803 554     554 0 2995 %is_sort_map_grep = ();
1804              
1805 554         2351 my @q = qw(sort map grep);
1806 554         2641 @is_sort_map_grep{@q} = (1) x scalar(@q);
1807              
1808 554         1815 my $olbxl = $rOpts->{'one-line-block-exclusion-list'};
1809 554         1300 my %is_olb_exclusion_word;
1810 554 100       2478 if ( defined($olbxl) ) {
1811 2         13 my @list = split_words($olbxl);
1812 2 50       11 if (@list) {
1813 2         13 @is_olb_exclusion_word{@list} = (1) x scalar(@list);
1814             }
1815             }
1816              
1817             # Make the list of block types which may be re-formed into one line.
1818             # They will be modified with the grep-alias-list below and
1819             # by sub 'prepare_cuddled_block_types'.
1820             # Note that it is essential to always re-initialize the hash here:
1821 554         2651 %want_one_line_block = ();
1822 554 100       2165 if ( !$is_olb_exclusion_word{'*'} ) {
1823 553         2250 foreach (qw(sort map grep eval)) {
1824 2212 100       4930 if ( !$is_olb_exclusion_word{$_} ) { $want_one_line_block{$_} = 1 }
  2211         5035  
1825             }
1826             }
1827              
1828             # Note that any 'grep-alias-list' string has been preprocessed to be a
1829             # trimmed, space-separated list.
1830 554         2031 my $str = $rOpts->{'grep-alias-list'};
1831 554         5339 my @grep_aliases = split /\s+/, $str;
1832              
1833 554 50       2846 if (@grep_aliases) {
1834              
1835 554         3307 @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
1836              
1837 554 100       2769 if ( $want_one_line_block{'grep'} ) {
1838 553         2818 @{want_one_line_block}{@grep_aliases} = (1) x scalar(@grep_aliases);
1839             }
1840             }
1841              
1842             ##@q = qw(sort map grep eval);
1843 554         4924 %is_sort_map_grep_eval = %is_sort_map_grep;
1844 554         2118 $is_sort_map_grep_eval{'eval'} = 1;
1845              
1846             ##@q = qw(sort map grep eval do);
1847 554         4344 %is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
1848 554         2147 $is_sort_map_grep_eval_do{'do'} = 1;
1849              
1850             # These block types can take ci. This is used by the -xci option.
1851             # Note that the 'sub' in this list is an anonymous sub. To be more correct
1852             # we could remove sub and use ASUB pattern to also handle a
1853             # prototype/signature. But that would slow things down and would probably
1854             # never be useful.
1855             ##@q = qw( do sub eval sort map grep );
1856 554         4378 %is_block_with_ci = %is_sort_map_grep_eval_do;
1857 554         1916 $is_block_with_ci{'sub'} = 1;
1858              
1859 554         2963 %is_keyword_returning_list = ();
1860 554         2757 @q = qw(
1861             grep
1862             keys
1863             map
1864             reverse
1865             sort
1866             split
1867             );
1868 554         2159 push @q, @grep_aliases;
1869 554         4152 @is_keyword_returning_list{@q} = (1) x scalar(@q);
1870              
1871             # This code enables vertical alignment of grep aliases for testing. It has
1872             # not been found to be beneficial, so it is off by default. But it is
1873             # useful for precise testing of the grep alias coding.
1874 554         1216 if (ALIGN_GREP_ALIASES) {
1875             %block_type_map = (
1876             'unless' => 'if',
1877             'else' => 'if',
1878             'elsif' => 'if',
1879             'when' => 'if',
1880             'default' => 'if',
1881             'case' => 'if',
1882             'sort' => 'map',
1883             'grep' => 'map',
1884             );
1885             foreach (@q) {
1886             $block_type_map{$_} = 'map' unless ( $_ eq 'map' );
1887             }
1888             }
1889 554         2341 return;
1890             } ## end sub initialize_grep_and_friends
1891              
1892             sub initialize_weld_nested_exclusion_rules {
1893 554     554 0 1672 %weld_nested_exclusion_rules = ();
1894              
1895 554         1446 my $opt_name = 'weld-nested-exclusion-list';
1896 554         1571 my $str = $rOpts->{$opt_name};
1897 554 100       1895 return unless ($str);
1898 4         20 $str =~ s/^\s+//;
1899 4         18 $str =~ s/\s+$//;
1900 4 50       21 return unless ($str);
1901              
1902             # There are four container tokens.
1903 4         32 my %token_keys = (
1904             '(' => '(',
1905             '[' => '[',
1906             '{' => '{',
1907             'q' => 'q',
1908             );
1909              
1910             # We are parsing an exclusion list for nested welds. The list is a string
1911             # with spaces separating any number of items. Each item consists of three
1912             # pieces of information:
1913             # <optional position> <optional type> <type of container>
1914             # < ^ or . > < k or K > < ( [ { >
1915              
1916             # The last character is the required container type and must be one of:
1917             # ( = paren
1918             # [ = square bracket
1919             # { = brace
1920              
1921             # An optional leading position indicator:
1922             # ^ means the leading token position in the weld
1923             # . means a secondary token position in the weld
1924             # no position indicator means all positions match
1925              
1926             # An optional alphanumeric character between the position and container
1927             # token selects to which the rule applies:
1928             # k = any keyword
1929             # K = any non-keyword
1930             # f = function call
1931             # F = not a function call
1932             # w = function or keyword
1933             # W = not a function or keyword
1934             # no letter means any preceding type matches
1935              
1936             # Examples:
1937             # ^( - the weld must not start with a paren
1938             # .( - the second and later tokens may not be parens
1939             # ( - no parens in weld
1940             # ^K( - exclude a leading paren not preceded by a keyword
1941             # .k( - exclude a secondary paren preceded by a keyword
1942             # [ { - exclude all brackets and braces
1943              
1944 4         23 my @items = split /\s+/, $str;
1945 4         16 my $msg1;
1946             my $msg2;
1947 4         14 foreach my $item (@items) {
1948 9         18 my $item_save = $item;
1949 9         24 my $tok = chop($item);
1950 9         17 my $key = $token_keys{$tok};
1951 9 50       25 if ( !defined($key) ) {
1952 0         0 $msg1 .= " '$item_save'";
1953 0         0 next;
1954             }
1955 9 100       32 if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
1956 8         19 $weld_nested_exclusion_rules{$key} = [];
1957             }
1958 9         20 my $rflags = $weld_nested_exclusion_rules{$key};
1959              
1960             # A 'q' means do not weld quotes
1961 9 100       27 if ( $tok eq 'q' ) {
1962 1         5 $rflags->[0] = '*';
1963 1         3 $rflags->[1] = '*';
1964 1         3 next;
1965             }
1966              
1967 8         17 my $pos = '*';
1968 8         13 my $select = '*';
1969 8 100       22 if ($item) {
1970 5 50       28 if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
1971 5 50       29 $pos = $1 if ($1);
1972 5 100       29 $select = $2 if ($2);
1973             }
1974             else {
1975 0         0 $msg1 .= " '$item_save'";
1976 0         0 next;
1977             }
1978             }
1979              
1980 8         14 my $err;
1981 8 100 100     62 if ( $pos eq '^' || $pos eq '*' ) {
1982 6 50 33     24 if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
1983 0         0 $err = 1;
1984             }
1985 6         12 $rflags->[0] = $select;
1986             }
1987 8 100 100     38 if ( $pos eq '.' || $pos eq '*' ) {
1988 5 50 33     25 if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
1989 0         0 $err = 1;
1990             }
1991 5         9 $rflags->[1] = $select;
1992             }
1993 8 50       37 if ($err) { $msg2 .= " '$item_save'"; }
  0         0  
1994             }
1995 4 50       23 if ($msg1) {
1996 0         0 Warn(<<EOM);
1997             Unexpecting symbol(s) encountered in --$opt_name will be ignored:
1998             $msg1
1999             EOM
2000             }
2001 4 50       16 if ($msg2) {
2002 0         0 Warn(<<EOM);
2003             Multiple specifications were encountered in the --weld-nested-exclusion-list for:
2004             $msg2
2005             Only the last will be used.
2006             EOM
2007             }
2008 4         15 return;
2009             } ## end sub initialize_weld_nested_exclusion_rules
2010              
2011             sub initialize_weld_fat_comma_rules {
2012              
2013             # Initialize a hash controlling which opening token types can be
2014             # welded around a fat comma
2015 554     554 0 1469 %weld_fat_comma_rules = ();
2016              
2017             # The -wfc flag turns on welding of '=>' after an opening paren
2018 554 100       2180 if ( $rOpts->{'weld-fat-comma'} ) { $weld_fat_comma_rules{'('} = 1 }
  1         4  
2019              
2020             # This could be generalized in the future by introducing a parameter
2021             # -weld-fat-comma-after=str (-wfca=str), where str contains any of:
2022             # * { [ (
2023             # to indicate which opening parens may weld to a subsequent '=>'
2024              
2025             # The flag -wfc would then be equivalent to -wfca='('
2026              
2027             # This has not been done because it is not yet clear how useful
2028             # this generalization would be.
2029 554         1173 return;
2030             } ## end sub initialize_weld_fat_comma_rules
2031              
2032             sub initialize_line_up_parentheses_control_hash {
2033 4     4 0 16 my ( $str, $opt_name ) = @_;
2034 4 50       19 return unless ($str);
2035 4         25 $str =~ s/^\s+//;
2036 4         17 $str =~ s/\s+$//;
2037 4 50       21 return unless ($str);
2038              
2039             # The format is space separated items, where each item must consist of a
2040             # string with a token type preceded by an optional text token and followed
2041             # by an integer:
2042             # For example:
2043             # W(1
2044             # = (flag1)(key)(flag2), where
2045             # flag1 = 'W'
2046             # key = '('
2047             # flag2 = '1'
2048              
2049 4         22 my @items = split /\s+/, $str;
2050 4         13 my $msg1;
2051             my $msg2;
2052 4         16 foreach my $item (@items) {
2053 10         16 my $item_save = $item;
2054 10         19 my ( $flag1, $key, $flag2 );
2055 10 50       56 if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
2056 10 100       37 $flag1 = $1 if $1;
2057 10 50       28 $key = $2 if $2;
2058 10 100       35 $flag2 = $3 if $3;
2059             }
2060             else {
2061 0         0 $msg1 .= " '$item_save'";
2062 0         0 next;
2063             }
2064              
2065 10 50       25 if ( !defined($key) ) {
2066 0         0 $msg1 .= " '$item_save'";
2067 0         0 next;
2068             }
2069              
2070             # Check for valid flag1
2071 10 100       42 if ( !defined($flag1) ) { $flag1 = '*' }
  7 50       13  
2072             elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
2073 0         0 $msg1 .= " '$item_save'";
2074 0         0 next;
2075             }
2076              
2077             # Check for valid flag2
2078             # 0 or blank: ignore container contents
2079             # 1 all containers with sublists match
2080             # 2 all containers with sublists, code blocks or ternary operators match
2081             # ... this could be extended in the future
2082 10 100       39 if ( !defined($flag2) ) { $flag2 = 0 }
  7 50       12  
2083             elsif ( $flag2 !~ /^[012]$/ ) {
2084 0         0 $msg1 .= " '$item_save'";
2085 0         0 next;
2086             }
2087              
2088 10 50       29 if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
2089 10         27 $line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
2090 10         31 next;
2091             }
2092              
2093             # check for multiple conflicting specifications
2094 0         0 my $rflags = $line_up_parentheses_control_hash{$key};
2095 0         0 my $err;
2096 0 0 0     0 if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
2097 0         0 $err = 1;
2098 0         0 $rflags->[0] = $flag1;
2099             }
2100 0 0 0     0 if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
2101 0         0 $err = 1;
2102 0         0 $rflags->[1] = $flag2;
2103             }
2104 0 0       0 $msg2 .= " '$item_save'" if ($err);
2105 0         0 next;
2106             }
2107 4 50       34 if ($msg1) {
2108 0         0 Warn(<<EOM);
2109             Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2110             $msg1
2111             EOM
2112             }
2113 4 50       52 if ($msg2) {
2114 0         0 Warn(<<EOM);
2115             Multiple specifications were encountered in the $opt_name at:
2116             $msg2
2117             Only the last will be used.
2118             EOM
2119             }
2120              
2121             # Speedup: we can turn off -lp if it is not actually used
2122 4 100       22 if ($line_up_parentheses_control_is_lxpl) {
2123 3         8 my $all_off = 1;
2124 3         10 foreach my $key (qw# ( { [ #) {
2125 5         9 my $rflags = $line_up_parentheses_control_hash{$key};
2126 5 50       17 if ( defined($rflags) ) {
2127 5         9 my ( $flag1, $flag2 ) = @{$rflags};
  5         13  
2128 5 100 66     30 if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
  2         7  
  2         6  
2129 3 50       10 if ($flag2) { $all_off = 0; last }
  0         0  
  0         0  
2130             }
2131             }
2132 3 100       39 if ($all_off) {
2133 1         4 $rOpts->{'line-up-parentheses'} = EMPTY_STRING;
2134             }
2135             }
2136              
2137 4         15 return;
2138             } ## end sub initialize_line_up_parentheses_control_hash
2139              
2140             sub initialize_space_after_keyword {
2141              
2142             # default keywords for which space is introduced before an opening paren
2143             # (at present, including them messes up vertical alignment)
2144 554     554 0 5191 my @sak = qw(my local our and or xor err eq ne if else elsif until
2145             unless while for foreach return switch case given when catch);
2146 554         1761 %space_after_keyword = map { $_ => 1 } @sak;
  12742         30304  
2147              
2148             # first remove any or all of these if desired
2149 554 100       3159 if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
2150              
2151             # -nsak='*' selects all the above keywords
2152 1 50 33     9 if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
  0         0  
2153 1         7 @space_after_keyword{@q} = (0) x scalar(@q);
2154             }
2155              
2156             # then allow user to add to these defaults
2157 554 100       2594 if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
2158 1         8 @space_after_keyword{@q} = (1) x scalar(@q);
2159             }
2160              
2161 554         2020 return;
2162             } ## end sub initialize_space_after_keyword
2163              
2164             sub initialize_extended_block_tightness_list {
2165              
2166             # Setup the control hash for --extended-block-tightness
2167              
2168             # keywords taking indirect objects:
2169 554     554 0 3602 my @k_list = keys %is_indirect_object_taker;
2170              
2171             # type symbols which may precede an opening block brace
2172 554         2436 my @t_list = qw($ @ % & *);
2173 554         1524 push @t_list, '$#';
2174              
2175 554         2085 my @all = ( @k_list, @t_list );
2176              
2177             # We will build the selection in %hash
2178             # By default the option is 'on' for keywords only (-xbtl='k')
2179 554         1348 my %hash;
2180 554         3783 @hash{@k_list} = (1) x scalar(@k_list);
2181 554         3227 @hash{@t_list} = (0) x scalar(@t_list);
2182              
2183             # This can be overridden with -xbtl="..."
2184 554         1730 my $long_name = 'extended-block-tightness-list';
2185 554 100       2420 if ( $rOpts->{$long_name} ) {
2186 2         8 my @words = split_words( $rOpts->{$long_name} );
2187 2         6 my @unknown;
2188              
2189             # Turn everything off
2190 2         16 @hash{@all} = (0) x scalar(@all);
2191              
2192             # Then turn on selections
2193 2         6 foreach my $word (@words) {
2194              
2195             # 'print' etc turns on a specific word or symbol
2196 4 100       40 if ( defined( $hash{$word} ) ) { $hash{$word} = 1; }
  2 50       4  
    100          
    50          
2197              
2198             # 'k' turns on all keywords
2199             elsif ( $word eq 'k' ) {
2200 0         0 @hash{@k_list} = (1) x scalar(@k_list);
2201             }
2202              
2203             # 't' turns on all symbols
2204             elsif ( $word eq 't' ) {
2205 1         6 @hash{@t_list} = (1) x scalar(@t_list);
2206             }
2207              
2208             # 'kt' same as 'k' and 't' for convenience
2209             elsif ( $word eq 'kt' ) {
2210 1         6 @hash{@all} = (1) x scalar(@all);
2211             }
2212              
2213             # Anything else is an error
2214 0         0 else { push @unknown, $word }
2215             }
2216 2 50       15 if (@unknown) {
2217 0         0 my $num = @unknown;
2218 0         0 local $LIST_SEPARATOR = SPACE;
2219 0         0 Warn(<<EOM);
2220             $num unrecognized keyword(s) were input with --$long_name :
2221             @unknown
2222             EOM
2223             }
2224             }
2225              
2226             # Transfer the result to the global hash
2227 554         4557 %extended_block_tightness_list = %hash;
2228              
2229 554         2855 return;
2230             } ## end sub initialize_extended_block_tightness_list
2231              
2232             sub initialize_token_break_preferences {
2233              
2234             # implement user break preferences
2235             my $break_after = sub {
2236 556     556   2244 my @toks = @_;
2237 556         2029 foreach my $tok (@toks) {
2238 124 100       259 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
  2         9  
2239 124 50       213 if ( $tok eq ',' ) { $controlled_comma_style = 1 }
  0         0  
2240 124         184 my $lbs = $left_bond_strength{$tok};
2241 124         173 my $rbs = $right_bond_strength{$tok};
2242 124 100 33     491 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
      66        
2243 22         72 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
2244             ( $lbs, $rbs );
2245             }
2246             }
2247 556         1231 return;
2248 554     554 0 4823 };
2249              
2250             my $break_before = sub {
2251 555     555   1917 my @toks = @_;
2252 555         1778 foreach my $tok (@toks) {
2253 370 50       636 if ( $tok eq ',' ) { $controlled_comma_style = 1 }
  0         0  
2254 370         607 my $lbs = $left_bond_strength{$tok};
2255 370         1643 my $rbs = $right_bond_strength{$tok};
2256 370 100 33     1375 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
      66        
2257 361         753 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
2258             ( $lbs, $rbs );
2259             }
2260             }
2261 555         1137 return;
2262 554         3021 };
2263              
2264 554 100       2521 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
2265             $break_before->(@all_operators)
2266 554 100       2186 if ( $rOpts->{'break-before-all-operators'} );
2267              
2268 554         2392 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
2269 554         2543 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
2270              
2271             # make note if breaks are before certain key types
2272 554         7292 %want_break_before = ();
2273 554         2094 foreach my $tok ( @all_operators, ',' ) {
2274             $want_break_before{$tok} =
2275 23822         54214 $left_bond_strength{$tok} < $right_bond_strength{$tok};
2276             }
2277              
2278             # Coordinate ?/: breaks, which must be similar
2279             # The small strength 0.01 which is added is 1% of the strength of one
2280             # indentation level and seems to work okay.
2281 554 100       3191 if ( !$want_break_before{':'} ) {
2282 2         17 $want_break_before{'?'} = $want_break_before{':'};
2283 2         8 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
2284 2         7 $left_bond_strength{'?'} = NO_BREAK;
2285             }
2286              
2287             # Only make a hash entry for the next parameters if values are defined.
2288             # That allows a quick check to be made later.
2289 554         1804 %break_before_container_types = ();
2290 554         1863 for ( $rOpts->{'break-before-hash-brace'} ) {
2291 554 100 66     2743 $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
2292             }
2293 554         1764 for ( $rOpts->{'break-before-square-bracket'} ) {
2294 554 50 33     2413 $break_before_container_types{'['} = $_ if $_ && $_ > 0;
2295             }
2296 554         1711 for ( $rOpts->{'break-before-paren'} ) {
2297 554 100 66     2531 $break_before_container_types{'('} = $_ if $_ && $_ > 0;
2298             }
2299 554         7334 return;
2300             } ## end sub initialize_token_break_preferences
2301              
2302 38     38   396 use constant DEBUG_KB => 0;
  38         100  
  38         55351  
2303              
2304             sub initialize_keep_old_breakpoints {
2305 1108     1108 0 3518 my ( $str, $short_name, $rkeep_break_hash ) = @_;
2306 1108 100       3135 return unless $str;
2307              
2308 2         5 my %flags = ();
2309 2         6 my @list = split_words($str);
2310 2         5 if ( DEBUG_KB && @list ) {
2311             local $LIST_SEPARATOR = SPACE;
2312             print <<EOM;
2313             DEBUG_KB entering for '$short_name' with str=$str\n";
2314             list is: @list;
2315             EOM
2316             }
2317              
2318             # Ignore kbb='(' and '[' and '{': can cause unstable math formatting
2319             # (issues b1346, b1347, b1348) and likewise ignore kba=')' and ']' and '}'
2320             # Also always ignore ? and : (b1440 and b1433-b1439)
2321 2 100       11 if ( $short_name eq 'kbb' ) {
    50          
2322 1         4 @list = grep { !m/[\(\[\{\?\:]/ } @list;
  2         11  
2323             }
2324             elsif ( $short_name eq 'kba' ) {
2325 1         2 @list = grep { !m/[\)\]\}\?\:]/ } @list;
  1         4  
2326             }
2327              
2328             # pull out any any leading container code, like f( or *{
2329             # For example: 'f(' becomes flags hash entry '(' => 'f'
2330 2         6 foreach my $item (@list) {
2331 3 50       10 if ( $item =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
2332 0         0 $item = $2;
2333 0         0 $flags{$2} = $1;
2334             }
2335             }
2336              
2337 2         4 my @unknown_types;
2338 2         4 foreach my $type (@list) {
2339 3 50       13 if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
2340 0         0 push @unknown_types, $type;
2341             }
2342             }
2343              
2344 2 50       6 if (@unknown_types) {
2345 0         0 my $num = @unknown_types;
2346 0         0 local $LIST_SEPARATOR = SPACE;
2347 0         0 Warn(<<EOM);
2348             $num unrecognized token types were input with --$short_name :
2349             @unknown_types
2350             EOM
2351             }
2352              
2353 2         4 @{$rkeep_break_hash}{@list} = (1) x scalar(@list);
  2         5  
2354              
2355 2         7 foreach my $key ( keys %flags ) {
2356 0         0 my $flag = $flags{$key};
2357              
2358 0 0 0     0 if ( length($flag) != 1 ) {
    0 0        
    0 0        
      0        
2359 0         0 Warn(<<EOM);
2360             Multiple entries given for '$key' in '$short_name'
2361             EOM
2362             }
2363             elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
2364 0         0 Warn(<<EOM);
2365             Unknown flag '$flag' given for '$key' in '$short_name'
2366             EOM
2367             }
2368             elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
2369 0         0 Warn(<<EOM);
2370             Unknown flag '$flag' given for '$key' in '$short_name'
2371             EOM
2372             }
2373              
2374 0         0 $rkeep_break_hash->{$key} = $flag;
2375             }
2376              
2377 2         4 if ( DEBUG_KB && @list ) {
2378             my @tmp = %flags;
2379             local $LIST_SEPARATOR = SPACE;
2380             print <<EOM;
2381              
2382             DEBUG_KB -$short_name flag: $str
2383             final keys: @list
2384             special flags: @tmp
2385             EOM
2386              
2387             }
2388              
2389 2         5 return;
2390              
2391             } ## end sub initialize_keep_old_breakpoints
2392              
2393             sub initialize_global_option_vars {
2394              
2395             #------------------------------------------------------------
2396             # Make global vars for frequently used options for efficiency
2397             #------------------------------------------------------------
2398              
2399 554     554 0 1598 $rOpts_add_newlines = $rOpts->{'add-newlines'};
2400 554         1493 $rOpts_add_trailing_commas = $rOpts->{'add-trailing-commas'};
2401 554         1332 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
2402             $rOpts_blank_lines_after_opening_block =
2403 554         1370 $rOpts->{'blank-lines-after-opening-block'};
2404 554         1500 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
2405             $rOpts_block_brace_vertical_tightness =
2406 554         1399 $rOpts->{'block-brace-vertical-tightness'};
2407             $rOpts_brace_follower_vertical_tightness =
2408 554         1498 $rOpts->{'brace-follower-vertical-tightness'};
2409 554         1472 $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
2410             $rOpts_break_at_old_attribute_breakpoints =
2411 554         1528 $rOpts->{'break-at-old-attribute-breakpoints'};
2412             $rOpts_break_at_old_comma_breakpoints =
2413 554         1439 $rOpts->{'break-at-old-comma-breakpoints'};
2414             $rOpts_break_at_old_keyword_breakpoints =
2415 554         1362 $rOpts->{'break-at-old-keyword-breakpoints'};
2416             $rOpts_break_at_old_logical_breakpoints =
2417 554         1412 $rOpts->{'break-at-old-logical-breakpoints'};
2418             $rOpts_break_at_old_semicolon_breakpoints =
2419 554         1388 $rOpts->{'break-at-old-semicolon-breakpoints'};
2420             $rOpts_break_at_old_ternary_breakpoints =
2421 554         1438 $rOpts->{'break-at-old-ternary-breakpoints'};
2422 554         1630 $rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'};
2423 554         1381 $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
2424             $rOpts_closing_side_comment_else_flag =
2425 554         1421 $rOpts->{'closing-side-comment-else-flag'};
2426             $rOpts_closing_side_comment_maximum_text =
2427 554         1293 $rOpts->{'closing-side-comment-maximum-text'};
2428 554         1271 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
2429 554         1216 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
2430 554         1337 $rOpts_cuddled_paren_brace = $rOpts->{'cuddled-paren-brace'};
2431             $rOpts_delete_closing_side_comments =
2432 554         1276 $rOpts->{'delete-closing-side-comments'};
2433 554         1346 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
2434             $rOpts_extended_continuation_indentation =
2435 554         1287 $rOpts->{'extended-continuation-indentation'};
2436 554         1333 $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
2437 554         1204 $rOpts_delete_trailing_commas = $rOpts->{'delete-trailing-commas'};
2438             $rOpts_delete_weld_interfering_commas =
2439 554         1182 $rOpts->{'delete-weld-interfering-commas'};
2440 554         1316 $rOpts_format_skipping = $rOpts->{'format-skipping'};
2441 554         1454 $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
2442             $rOpts_function_paren_vertical_alignment =
2443 554         1218 $rOpts->{'function-paren-vertical-alignment'};
2444 554         1426 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
2445 554         1253 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
2446             $rOpts_ignore_side_comment_lengths =
2447 554         1282 $rOpts->{'ignore-side-comment-lengths'};
2448 554         1436 $rOpts_ignore_perlcritic_comments = $rOpts->{'ignore-perlcritic-comments'};
2449 554         1304 $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
2450 554         1212 $rOpts_indent_columns = $rOpts->{'indent-columns'};
2451 554         1208 $rOpts_indent_only = $rOpts->{'indent-only'};
2452 554         1216 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
2453 554         1290 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
2454 554         1274 $rOpts_extended_block_tightness = $rOpts->{'extended-block-tightness'};
2455             $rOpts_extended_line_up_parentheses =
2456 554         1314 $rOpts->{'extended-line-up-parentheses'};
2457 554         1262 $rOpts_logical_padding = $rOpts->{'logical-padding'};
2458             $rOpts_maximum_consecutive_blank_lines =
2459 554         1283 $rOpts->{'maximum-consecutive-blank-lines'};
2460 554         1367 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
2461 554         1282 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
2462 554         1230 $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
2463             $rOpts_opening_brace_always_on_right =
2464 554         1312 $rOpts->{'opening-brace-always-on-right'};
2465 554         1255 $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
2466 554         1203 $rOpts_outdent_labels = $rOpts->{'outdent-labels'};
2467 554         1203 $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
2468 554         1307 $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
2469             $rOpts_outdent_static_block_comments =
2470 554         1273 $rOpts->{'outdent-static-block-comments'};
2471 554         1234 $rOpts_recombine = $rOpts->{'recombine'};
2472             $rOpts_short_concatenation_item_length =
2473 554         1282 $rOpts->{'short-concatenation-item-length'};
2474 554         1272 $rOpts_space_prototype_paren = $rOpts->{'space-prototype-paren'};
2475 554         1194 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
2476 554         1346 $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
2477 554         1254 $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
2478 554         1148 $rOpts_tee_pod = $rOpts->{'tee-pod'};
2479 554         1300 $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
2480 554         1357 $rOpts_valign_code = $rOpts->{'valign-code'};
2481 554         1200 $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
2482 554         1143 $rOpts_valign_if_unless = $rOpts->{'valign-if-unless'};
2483             $rOpts_variable_maximum_line_length =
2484 554         1236 $rOpts->{'variable-maximum-line-length'};
2485              
2486             # Note that both opening and closing tokens can access the opening
2487             # and closing flags of their container types.
2488             %opening_vertical_tightness = (
2489             '(' => $rOpts->{'paren-vertical-tightness'},
2490             '{' => $rOpts->{'brace-vertical-tightness'},
2491             '[' => $rOpts->{'square-bracket-vertical-tightness'},
2492             ')' => $rOpts->{'paren-vertical-tightness'},
2493             '}' => $rOpts->{'brace-vertical-tightness'},
2494 554         4514 ']' => $rOpts->{'square-bracket-vertical-tightness'},
2495             );
2496              
2497             %closing_vertical_tightness = (
2498             '(' => $rOpts->{'paren-vertical-tightness-closing'},
2499             '{' => $rOpts->{'brace-vertical-tightness-closing'},
2500             '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
2501             ')' => $rOpts->{'paren-vertical-tightness-closing'},
2502             '}' => $rOpts->{'brace-vertical-tightness-closing'},
2503 554         4345 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
2504             );
2505              
2506             # assume flag for '>' same as ')' for closing qw quotes
2507             %closing_token_indentation = (
2508             ')' => $rOpts->{'closing-paren-indentation'},
2509             '}' => $rOpts->{'closing-brace-indentation'},
2510             ']' => $rOpts->{'closing-square-bracket-indentation'},
2511 554         3333 '>' => $rOpts->{'closing-paren-indentation'},
2512             );
2513              
2514             # flag indicating if any closing tokens are indented
2515             $some_closing_token_indentation =
2516             $rOpts->{'closing-paren-indentation'}
2517             || $rOpts->{'closing-brace-indentation'}
2518             || $rOpts->{'closing-square-bracket-indentation'}
2519 554   66     5752 || $rOpts->{'indent-closing-brace'};
2520              
2521             %opening_token_right = (
2522             '(' => $rOpts->{'opening-paren-right'},
2523             '{' => $rOpts->{'opening-hash-brace-right'},
2524 554         2720 '[' => $rOpts->{'opening-square-bracket-right'},
2525             );
2526              
2527             %stack_opening_token = (
2528             '(' => $rOpts->{'stack-opening-paren'},
2529             '{' => $rOpts->{'stack-opening-hash-brace'},
2530 554         2379 '[' => $rOpts->{'stack-opening-square-bracket'},
2531             );
2532              
2533             %stack_closing_token = (
2534             ')' => $rOpts->{'stack-closing-paren'},
2535             '}' => $rOpts->{'stack-closing-hash-brace'},
2536 554         2220 ']' => $rOpts->{'stack-closing-square-bracket'},
2537             );
2538 554         1343 return;
2539             } ## end sub initialize_global_option_vars
2540              
2541             sub initialize_line_length_vars {
2542              
2543             # Create a table of maximum line length vs level for later efficient use.
2544             # We will make the tables very long to be sure it will not be exceeded.
2545             # But we have to choose a fixed length. A check will be made at the start
2546             # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
2547             # my standard test problems have indentation levels of about 150, so this
2548             # should be fairly large. If the choice of a maximum level ever becomes
2549             # an issue then these table values could be returned in a sub with a simple
2550             # memoization scheme.
2551              
2552             # Also create a table of the maximum spaces available for text due to the
2553             # level only. If a line has continuation indentation, then that space must
2554             # be subtracted from the table value. This table is used for preliminary
2555             # estimates in welding, extended_ci, BBX, and marking short blocks.
2556 38     38   356 use constant LEVEL_TABLE_MAX => 1000;
  38         107  
  38         66025  
2557              
2558             # The basic scheme:
2559 554     554 0 1980 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
2560 554554         688138 my $indent = $level * $rOpts_indent_columns;
2561 554554         750944 $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
2562 554554         803787 $maximum_text_length_at_level[$level] =
2563             $rOpts_maximum_line_length - $indent;
2564             }
2565              
2566             # Correct the maximum_text_length table if the -wc=n flag is used
2567 554         3183 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
2568 554 100       2298 if ($rOpts_whitespace_cycle) {
2569 2 50       10 if ( $rOpts_whitespace_cycle > 0 ) {
2570 2         11 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
2571 2002         2928 my $level_mod = $level % $rOpts_whitespace_cycle;
2572 2002         2424 my $indent = $level_mod * $rOpts_indent_columns;
2573 2002         2807 $maximum_text_length_at_level[$level] =
2574             $rOpts_maximum_line_length - $indent;
2575             }
2576             }
2577             else {
2578 0         0 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
2579             }
2580             }
2581              
2582             # Correct the tables if the -vmll flag is used. These values override the
2583             # previous values.
2584 554 100       2098 if ($rOpts_variable_maximum_line_length) {
2585 1         6 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
2586 1001         1414 $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
2587 1001         1342 $maximum_line_length_at_level[$level] =
2588             $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
2589             }
2590             }
2591              
2592             # Define two measures of indentation level, alpha and beta, at which some
2593             # formatting features come under stress and need to start shutting down.
2594             # Some combination of the two will be used to shut down different
2595             # formatting features.
2596             # Put a reasonable upper limit on stress level (say 100) in case the
2597             # whitespace-cycle variable is used.
2598 554         3624 my $stress_level_limit = min( 100, LEVEL_TABLE_MAX );
2599              
2600             # Find stress_level_alpha, targeted at very short maximum line lengths.
2601 554         1427 $stress_level_alpha = $stress_level_limit + 1;
2602 554         1998 foreach my $level_test ( 0 .. $stress_level_limit ) {
2603 10506         14493 my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
2604 10506         14433 my $excess_inside_space =
2605             $max_len -
2606             $rOpts_continuation_indentation -
2607             $rOpts_indent_columns - 8;
2608 10506 100       19348 if ( $excess_inside_space <= 0 ) {
2609 541         2437 $stress_level_alpha = $level_test;
2610 541         2200 last;
2611             }
2612             }
2613              
2614             # Find stress level beta, a stress level targeted at formatting
2615             # at deep levels near the maximum line length. We start increasing
2616             # from zero and stop at the first level which shows no more space.
2617              
2618             # 'const' is a fixed number of spaces for a typical variable.
2619             # Cases b1197-b1204 work ok with const=12 but not with const=8
2620 554         1740 my $const = 16;
2621 554         2309 my $denom = max( 1, $rOpts_indent_columns );
2622 554         1486 $stress_level_beta = 0;
2623 554         1851 foreach my $level ( 0 .. $stress_level_limit ) {
2624 8895         16483 my $remaining_cycles = max(
2625             0,
2626             (
2627             $maximum_text_length_at_level[$level] -
2628             $rOpts_continuation_indentation - $const
2629             ) / $denom
2630             );
2631 8895 100       16777 last if ( $remaining_cycles <= 3 ); # 2 does not work
2632 8354         12048 $stress_level_beta = $level;
2633             }
2634              
2635             # This is a combined level which works well for turning off formatting
2636             # features in most cases:
2637 554         3627 $high_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
2638              
2639 554         1521 return;
2640             } ## end sub initialize_line_length_vars
2641              
2642             sub initialize_trailing_comma_rules {
2643              
2644             # Setup control hash for trailing commas
2645              
2646             # -wtc=s defines desired trailing comma policy:
2647             #
2648             # =" " stable
2649             # [ both -atc and -dtc ignored ]
2650             # =0 : none
2651             # [requires -dtc; -atc ignored]
2652             # =1 or * : all
2653             # [requires -atc; -dtc ignored]
2654             # =m : multiline lists require trailing comma
2655             # if -atc set => will add missing multiline trailing commas
2656             # if -dtc set => will delete trailing single line commas
2657             # =b or 'bare' (multiline) lists require trailing comma
2658             # if -atc set => will add missing bare trailing commas
2659             # if -dtc set => will delete non-bare trailing commas
2660             # =h or 'hash': single column stable bare lists require trailing comma
2661             # if -atc set will add these
2662             # if -dtc set will delete other trailing commas
2663              
2664             #-------------------------------------------------------------------
2665             # This routine must be called after the alpha and beta stress levels
2666             # have been defined in sub 'initialize_line_length_vars'.
2667             #-------------------------------------------------------------------
2668              
2669 554     554 0 1778 %trailing_comma_rules = ();
2670              
2671 554         3320 my $rvalid_flags = [qw(0 1 * m b h i)];
2672              
2673 554         1771 my $option = $rOpts->{'want-trailing-commas'};
2674              
2675 554 100       2025 if ($option) {
2676 6         31 $option =~ s/^\s+//;
2677 6         32 $option =~ s/\s+$//;
2678             }
2679              
2680             # We need to use length() here because '0' is a possible option
2681 554 100 66     2451 if ( defined($option) && length($option) ) {
2682 7         16 my $error_message;
2683             my %rule_hash;
2684 7         18 my @q = @{$rvalid_flags};
  7         31  
2685 7         28 my %is_valid_flag;
2686 7         55 @is_valid_flag{@q} = (1) x scalar(@q);
2687              
2688             # handle single character control, such as -wtc='b'
2689 7 50       54 if ( length($option) == 1 ) {
2690 7         25 foreach (qw< ) ] } >) {
2691 21         69 $rule_hash{$_} = [ $option, EMPTY_STRING ];
2692             }
2693             }
2694              
2695             # handle multi-character control(s), such as -wtc='[m' or -wtc='k(m'
2696             else {
2697 0         0 my @parts = split /\s+/, $option;
2698 0         0 foreach my $part (@parts) {
2699 0 0 0     0 if ( length($part) >= 2 && length($part) <= 3 ) {
2700 0         0 my $val = substr( $part, -1, 1 );
2701 0         0 my $key_o = substr( $part, -2, 1 );
2702 0 0       0 if ( $is_opening_token{$key_o} ) {
2703 0         0 my $paren_flag = EMPTY_STRING;
2704 0 0       0 if ( length($part) == 3 ) {
2705 0         0 $paren_flag = substr( $part, 0, 1 );
2706             }
2707 0         0 my $key = $matching_token{$key_o};
2708 0         0 $rule_hash{$key} = [ $val, $paren_flag ];
2709             }
2710             else {
2711 0         0 $error_message .= "Unrecognized term: '$part'\n";
2712             }
2713             }
2714             else {
2715 0         0 $error_message .= "Unrecognized term: '$part'\n";
2716             }
2717             }
2718             }
2719              
2720             # check for valid control characters
2721 7 50       44 if ( !$error_message ) {
2722 7         36 foreach my $key ( keys %rule_hash ) {
2723 21         35 my $item = $rule_hash{$key};
2724 21         36 my ( $val, $paren_flag ) = @{$item};
  21         38  
2725 21 50 66     97 if ( $val && !$is_valid_flag{$val} ) {
2726 0         0 my $valid_str = join( SPACE, @{$rvalid_flags} );
  0         0  
2727 0         0 $error_message .=
2728             "Unexpected value '$val'; must be one of: $valid_str\n";
2729 0         0 last;
2730             }
2731 21 50       60 if ($paren_flag) {
2732 0 0       0 if ( $paren_flag !~ /^[kKfFwW]$/ ) {
2733 0         0 $error_message .=
2734             "Unexpected paren flag '$paren_flag'; must be one of: k K f F w W\n";
2735 0         0 last;
2736             }
2737 0 0       0 if ( $key ne ')' ) {
2738 0         0 $error_message .=
2739             "paren flag '$paren_flag' is only allowed before a '('\n";
2740 0         0 last;
2741             }
2742             }
2743             }
2744             }
2745              
2746 7 50       63 if ($error_message) {
2747 0         0 Warn(<<EOM);
2748             Error parsing --want-trailing-commas='$option':
2749             $error_message
2750             EOM
2751             }
2752              
2753             # Set the control hash if no errors
2754             else {
2755 7         42 %trailing_comma_rules = %rule_hash;
2756             }
2757             }
2758              
2759             # Both adding and deleting commas can lead to instability in extreme cases
2760 554 100 100     2314 if ( $rOpts_add_trailing_commas && $rOpts_delete_trailing_commas ) {
2761              
2762             # If the possible instability is significant, then we can turn off
2763             # -dtc as a defensive measure to prevent it.
2764              
2765             # We must turn off -dtc for very small values of --whitespace-cycle
2766             # to avoid instability. A minimum value of -wc=3 fixes b1393, but a
2767             # value of 4 is used here for safety. This parameter is seldom used,
2768             # and much larger than this when used, so the cutoff value is not
2769             # critical.
2770 4 50 33     23 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle <= 4 ) {
2771 0         0 $rOpts_delete_trailing_commas = 0;
2772             }
2773             }
2774              
2775 554         1692 return;
2776             } ## end sub initialize_trailing_comma_rules
2777              
2778             sub initialize_whitespace_hashes {
2779              
2780             # This is called once before formatting begins to initialize these global
2781             # hashes, which control the use of whitespace around tokens:
2782             #
2783             # %binary_ws_rules
2784             # %want_left_space
2785             # %want_right_space
2786             # %space_after_keyword
2787             #
2788             # Many token types are identical to the tokens themselves.
2789             # See the tokenizer for a complete list. Here are some special types:
2790             # k = perl keyword
2791             # f = semicolon in for statement
2792             # m = unary minus
2793             # p = unary plus
2794             # Note that :: is excluded since it should be contained in an identifier
2795             # Note that '->' is excluded because it never gets space
2796             # parentheses and brackets are excluded since they are handled specially
2797             # curly braces are included but may be overridden by logic, such as
2798             # newline logic.
2799              
2800             # NEW_TOKENS: create a whitespace rule here. This can be as
2801             # simple as adding your new letter to @spaces_both_sides, for
2802             # example.
2803              
2804 554     554 0 9230 my @spaces_both_sides = qw#
2805             + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
2806             .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
2807             &&= ||= //= <=> A k f w F n C Y U G v
2808             #;
2809              
2810 554         3010 my @spaces_left_side = qw<
2811             t ! ~ m p { \ h pp mm Z j
2812             >;
2813 554         1527 push( @spaces_left_side, '#' ); # avoids warning message
2814              
2815 554         2465 my @spaces_right_side = qw<
2816             ; } ) ] R J ++ -- **=
2817             >;
2818 554         1431 push( @spaces_right_side, ',' ); # avoids warning message
2819              
2820 554         13435 %want_left_space = ();
2821 554         7723 %want_right_space = ();
2822 554         9230 %binary_ws_rules = ();
2823              
2824             # Note that we setting defaults here. Later in processing
2825             # the values of %want_left_space and %want_right_space
2826             # may be overridden by any user settings specified by the
2827             # -wls and -wrs parameters. However the binary_whitespace_rules
2828             # are hardwired and have priority.
2829 554         17739 @want_left_space{@spaces_both_sides} =
2830             (1) x scalar(@spaces_both_sides);
2831 554         7232 @want_right_space{@spaces_both_sides} =
2832             (1) x scalar(@spaces_both_sides);
2833 554         6252 @want_left_space{@spaces_left_side} =
2834             (1) x scalar(@spaces_left_side);
2835 554         3051 @want_right_space{@spaces_left_side} =
2836             (-1) x scalar(@spaces_left_side);
2837 554         4670 @want_left_space{@spaces_right_side} =
2838             (-1) x scalar(@spaces_right_side);
2839 554         2813 @want_right_space{@spaces_right_side} =
2840             (1) x scalar(@spaces_right_side);
2841 554         2228 $want_left_space{'->'} = WS_NO;
2842 554         1556 $want_right_space{'->'} = WS_NO;
2843 554         1436 $want_left_space{'**'} = WS_NO;
2844 554         1488 $want_right_space{'**'} = WS_NO;
2845 554         1596 $want_right_space{'CORE::'} = WS_NO;
2846              
2847             # These binary_ws_rules are hardwired and have priority over the above
2848             # settings. It would be nice to allow adjustment by the user,
2849             # but it would be complicated to specify.
2850             #
2851             # hash type information must stay tightly bound
2852             # as in : ${xxxx}
2853 554         2085 $binary_ws_rules{'i'}{'L'} = WS_NO;
2854 554         1638 $binary_ws_rules{'i'}{'{'} = WS_YES;
2855 554         1671 $binary_ws_rules{'k'}{'{'} = WS_YES;
2856 554         1583 $binary_ws_rules{'U'}{'{'} = WS_YES;
2857 554         1675 $binary_ws_rules{'i'}{'['} = WS_NO;
2858 554         1625 $binary_ws_rules{'R'}{'L'} = WS_NO;
2859 554         1428 $binary_ws_rules{'R'}{'{'} = WS_NO;
2860 554         1469 $binary_ws_rules{'t'}{'L'} = WS_NO;
2861 554         1500 $binary_ws_rules{'t'}{'{'} = WS_NO;
2862 554         1434 $binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123
2863 554         1562 $binary_ws_rules{'}'}{'L'} = WS_NO;
2864 554         1478 $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
2865 554         1750 $binary_ws_rules{'$'}{'L'} = WS_NO;
2866 554         1485 $binary_ws_rules{'$'}{'{'} = WS_NO;
2867 554         1687 $binary_ws_rules{'@'}{'L'} = WS_NO;
2868 554         1552 $binary_ws_rules{'@'}{'{'} = WS_NO;
2869 554         1540 $binary_ws_rules{'='}{'L'} = WS_YES;
2870 554         1597 $binary_ws_rules{'J'}{'J'} = WS_YES;
2871              
2872             # the following includes ') {'
2873             # as in : if ( xxx ) { yyy }
2874 554         1582 $binary_ws_rules{']'}{'L'} = WS_NO;
2875 554         1511 $binary_ws_rules{']'}{'{'} = WS_NO;
2876 554         1650 $binary_ws_rules{')'}{'{'} = WS_YES;
2877 554         1524 $binary_ws_rules{')'}{'['} = WS_NO;
2878 554         1623 $binary_ws_rules{']'}{'['} = WS_NO;
2879 554         1454 $binary_ws_rules{']'}{'{'} = WS_NO;
2880 554         1461 $binary_ws_rules{'}'}{'['} = WS_NO;
2881 554         1411 $binary_ws_rules{'R'}{'['} = WS_NO;
2882              
2883 554         1292 $binary_ws_rules{']'}{'++'} = WS_NO;
2884 554         1395 $binary_ws_rules{']'}{'--'} = WS_NO;
2885 554         1378 $binary_ws_rules{')'}{'++'} = WS_NO;
2886 554         1338 $binary_ws_rules{')'}{'--'} = WS_NO;
2887              
2888 554         1326 $binary_ws_rules{'R'}{'++'} = WS_NO;
2889 554         1323 $binary_ws_rules{'R'}{'--'} = WS_NO;
2890              
2891 554         1486 $binary_ws_rules{'i'}{'Q'} = WS_YES;
2892 554         1573 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
2893              
2894 554         1308 $binary_ws_rules{'i'}{'('} = WS_NO;
2895              
2896 554         1473 $binary_ws_rules{'w'}{'('} = WS_NO;
2897 554         1378 $binary_ws_rules{'w'}{'{'} = WS_YES;
2898 554         3716 return;
2899              
2900             } ## end sub initialize_whitespace_hashes
2901              
2902             { #<<< begin closure set_whitespace_flags
2903              
2904             my %is_special_ws_type;
2905             my %is_wCUG;
2906             my %is_wi;
2907              
2908             BEGIN {
2909              
2910             # The following hash is used to skip over needless if tests.
2911             # Be sure to update it when adding new checks in its block.
2912 38     38   268 my @q = qw(k w C m - Q);
2913 38         152 push @q, '#';
2914 38         264 @is_special_ws_type{@q} = (1) x scalar(@q);
2915              
2916             # These hashes replace slower regex tests
2917 38         129 @q = qw( w C U G );
2918 38         165 @is_wCUG{@q} = (1) x scalar(@q);
2919              
2920 38         96 @q = qw( w i );
2921 38         1036 @is_wi{@q} = (1) x scalar(@q);
2922             } ## end BEGIN
2923              
2924 38     38   289 use constant DEBUG_WHITE => 0;
  38         106  
  38         118025  
2925              
2926             # Hashes to set spaces around container tokens according to their
2927             # sequence numbers. These are set as keywords are examined.
2928             # They are controlled by the -kpit and -kpitl flags.
2929             my %opening_container_inside_ws;
2930             my %closing_container_inside_ws;
2931              
2932             sub set_whitespace_flags {
2933              
2934             # This routine is called once per file to set whitespace flags for that
2935             # file. This routine examines each pair of nonblank tokens and sets a flag
2936             # indicating if white space is needed.
2937             #
2938             # $rwhitespace_flags->[$j] is a flag indicating whether a white space
2939             # BEFORE token $j is needed, with the following values:
2940             #
2941             # WS_NO = -1 do not want a space BEFORE token $j
2942             # WS_OPTIONAL= 0 optional space or $j is a whitespace
2943             # WS_YES = 1 want a space BEFORE token $j
2944             #
2945              
2946 552     552 0 1292 my $self = shift;
2947              
2948 552         1146 my $j_tight_closing_paren = -1;
2949 552         3342 my $rLL = $self->[_rLL_];
2950 552         1154 my $jmax = @{$rLL} - 1;
  552         1673  
2951              
2952 552         1449 %opening_container_inside_ws = ();
2953 552         1229 %closing_container_inside_ws = ();
2954              
2955 552         2815 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
2956              
2957 552         1503 my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
2958 552         1485 my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
2959 552         1354 my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
2960              
2961 552         1229 my $rwhitespace_flags = [];
2962 552         1371 my $ris_function_call_paren = {};
2963              
2964 552 100       3626 return $rwhitespace_flags if ( $jmax < 0 );
2965              
2966 549         2414 my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
2967              
2968 549         1551 my $last_token = SPACE;
2969 549         1252 my $last_type = 'b';
2970              
2971 549         1262 my $last_token_dbg = SPACE;
2972 549         1261 my $last_type_dbg = 'b';
2973              
2974 549         1139 my $rtokh_last = [ @{ $rLL->[0] } ];
  549         2436  
2975 549         1614 $rtokh_last->[_TOKEN_] = $last_token;
2976 549         1456 $rtokh_last->[_TYPE_] = $last_type;
2977 549         1238 $rtokh_last->[_TYPE_SEQUENCE_] = EMPTY_STRING;
2978 549         1378 $rtokh_last->[_LINE_INDEX_] = 0;
2979              
2980 549         1189 my $rtokh_last_last = $rtokh_last;
2981              
2982             # This will identify braces to be treated as blocks for the -xbt flag
2983 549         1027 my %block_type_for_tightness;
2984              
2985 549         2393 my ( $ws_1, $ws_2, $ws_3, $ws_4 );
2986              
2987             # main loop over all tokens to define the whitespace flags
2988 549         0 my $last_type_is_opening;
2989 549         0 my ( $token, $type );
2990 549         1158 my $j = -1;
2991 549         1170 foreach my $rtokh ( @{$rLL} ) {
  549         1502  
2992              
2993 51189         62643 $j++;
2994              
2995 51189         85241 $type = $rtokh->[_TYPE_];
2996 51189 100       86395 if ( $type eq 'b' ) {
2997 15270         23276 $rwhitespace_flags->[$j] = WS_OPTIONAL;
2998 15270         23460 next;
2999             }
3000              
3001 35919         54002 $token = $rtokh->[_TOKEN_];
3002              
3003 35919         44427 my $ws;
3004              
3005             #---------------------------------------------------------------
3006             # Whitespace Rules Section 1:
3007             # Handle space on the inside of opening braces.
3008             #---------------------------------------------------------------
3009              
3010             # /^[L\{\(\[]$/
3011 35919 100       57913 if ($last_type_is_opening) {
3012              
3013 4368         7239 $last_type_is_opening = 0;
3014              
3015 4368         7667 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3016 4368         7543 my $block_type = $rblock_type_of_seqno->{$seqno};
3017 4368         6794 my $last_seqno = $rtokh_last->[_TYPE_SEQUENCE_];
3018             my $last_block_type = $rblock_type_of_seqno->{$last_seqno}
3019 4368   100     12698 || $block_type_for_tightness{$last_seqno};
3020              
3021 4368         6155 $j_tight_closing_paren = -1;
3022              
3023             # let us keep empty matched braces together: () {} []
3024             # except for BLOCKS
3025 4368 100       9857 if ( $token eq $matching_token{$last_token} ) {
3026 223 100       2256 if ($block_type) {
3027 49         114 $ws = WS_YES;
3028             }
3029             else {
3030 174         402 $ws = WS_NO;
3031             }
3032             }
3033             else {
3034              
3035             # we're considering the right of an opening brace
3036             # tightness = 0 means always pad inside with space
3037             # tightness = 1 means pad inside if "complex"
3038             # tightness = 2 means never pad inside with space
3039              
3040 4145         5693 my $tightness;
3041 4145 100 66     10850 if ( $last_block_type && $last_token eq '{' ) {
3042 948         1661 $tightness = $rOpts_block_brace_tightness;
3043             }
3044 3197         5423 else { $tightness = $tightness{$last_token} }
3045              
3046             #=============================================================
3047             # Patch for test problem <<snippets/fabrice_bug.in>>
3048             # We must always avoid spaces around a bare word beginning
3049             # with ^ as in:
3050             # my $before = ${^PREMATCH};
3051             # Because all of the following cause an error in perl:
3052             # my $before = ${ ^PREMATCH };
3053             # my $before = ${ ^PREMATCH};
3054             # my $before = ${^PREMATCH };
3055             # So if brace tightness flag is -bt=0 we must temporarily reset
3056             # to bt=1. Note that here we must set tightness=1 and not 2 so
3057             # that the closing space is also avoided
3058             # (via the $j_tight_closing_paren flag in coding)
3059 4145 100 100     10814 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
  5         14  
3060              
3061             #=============================================================
3062              
3063 4145 100       9391 if ( $tightness <= 0 ) {
    100          
3064 908         1596 $ws = WS_YES;
3065             }
3066             elsif ( $tightness > 1 ) {
3067 198         338 $ws = WS_NO;
3068             }
3069             else {
3070              
3071             # find the index of the closing token
3072             my $j_closing =
3073 3039         5870 $self->[_K_closing_container_]->{$last_seqno};
3074              
3075             # If the closing token is less than five characters ahead
3076             # we must take a closer look
3077 3039 100 66     14087 if ( defined($j_closing)
      66        
3078             && $j_closing - $j < 5
3079             && $rLL->[$j_closing]->[_TYPE_SEQUENCE_] eq
3080             $last_seqno )
3081             {
3082 1187         4389 $ws =
3083             ws_in_container( $j, $j_closing, $rLL, $type, $token,
3084             $last_token );
3085 1187 100       3173 if ( $ws == WS_NO ) {
3086 995         1814 $j_tight_closing_paren = $j_closing;
3087             }
3088             }
3089             else {
3090 1852         3285 $ws = WS_YES;
3091             }
3092             }
3093             }
3094              
3095             # check for special cases which override the above rules
3096 4368 100 66     11080 if ( %opening_container_inside_ws && $last_seqno ) {
3097 23         42 my $ws_override = $opening_container_inside_ws{$last_seqno};
3098 23 100       45 if ($ws_override) { $ws = $ws_override }
  6         14  
3099             }
3100              
3101 4368         6043 $ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
3102             if DEBUG_WHITE;
3103              
3104             } ## end setting space flag inside opening tokens
3105              
3106             #---------------------------------------------------------------
3107             # Whitespace Rules Section 2:
3108             # Special checks for certain types ...
3109             #---------------------------------------------------------------
3110             # The hash '%is_special_ws_type' significantly speeds up this routine,
3111             # but be sure to update it if a new check is added.
3112             # Currently has types: qw(k w C m - Q #)
3113 35919 100       85436 if ( $is_special_ws_type{$type} ) {
    100          
    100          
3114              
3115 8336 100 100     34495 if ( $type eq 'k' ) {
    100 100        
    100 66        
    100 66        
    100          
3116              
3117             # Keywords 'for', 'foreach' are special cases for -kpit since
3118             # the opening paren does not always immediately follow the
3119             # keyword. So we have to search forward for the paren in this
3120             # case. I have limited the search to 10 tokens ahead, just in
3121             # case somebody has a big file and no opening paren. This
3122             # should be enough for all normal code. Added the level check
3123             # to fix b1236.
3124 2798 50 100     8359 if ( $is_for_foreach{$token}
      66        
      66        
3125             && %keyword_paren_inner_tightness
3126             && defined( $keyword_paren_inner_tightness{$token} )
3127             && $j < $jmax )
3128             {
3129 1         3 my $level = $rLL->[$j]->[_LEVEL_];
3130 1         3 my $jp = $j;
3131             ## NOTE: we might use the KNEXT variable to avoid this loop
3132             ## but profiling shows that little would be saved
3133 1         4 foreach my $inc ( 1 .. 9 ) {
3134 3         6 $jp++;
3135 3 50       7 last if ( $jp > $jmax );
3136 3 50       7 last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236
3137 3 100       9 next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
3138 1         13 my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
3139 1         6 set_container_ws_by_keyword( $token, $seqno_p );
3140 1         3 last;
3141             }
3142             }
3143             }
3144              
3145             # handle a comment
3146             elsif ( $type eq '#' ) {
3147              
3148             # newline before block comment ($j==0), and
3149             # space before side comment ($j>0), so ..
3150 1091         1965 $ws = WS_YES;
3151              
3152             #---------------------------------
3153             # Nothing more to do for a comment
3154             #---------------------------------
3155 1091         2141 $rwhitespace_flags->[$j] = $ws;
3156 1091         2087 next;
3157             }
3158              
3159             # retain any space between '-' and bare word
3160             elsif ( $type eq 'w' || $type eq 'C' ) {
3161 1574 100       3697 $ws = WS_OPTIONAL if $last_type eq '-';
3162             }
3163              
3164             # retain any space between '-' and bare word; for example
3165             # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
3166             # $myhash{USER-NAME}='steve';
3167             elsif ( $type eq 'm' || $type eq '-' ) {
3168 389 100       981 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
3169             }
3170              
3171             # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
3172             # allow a space between a backslash and single or double quote
3173             # to avoid fooling html formatters
3174             elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ )
3175             {
3176 11 100       26 if ($rOpts_space_backslash_quote) {
3177 9 100       27 if ( $rOpts_space_backslash_quote == 1 ) {
    50          
3178 7         11 $ws = WS_OPTIONAL;
3179             }
3180 2         4 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
3181             else { } # shouldnt happen
3182             }
3183             else {
3184 2         3 $ws = WS_NO;
3185             }
3186             }
3187             } ## end elsif ( $is_special_ws_type{$type} ...
3188              
3189             #---------------------------------------------------------------
3190             # Whitespace Rules Section 3:
3191             # Handle space on inside of closing brace pairs.
3192             #---------------------------------------------------------------
3193              
3194             # /[\}\)\]R]/
3195             elsif ( $is_closing_type{$type} ) {
3196              
3197 4368         9027 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3198 4368 100       9163 if ( $j == $j_tight_closing_paren ) {
3199              
3200 995         1703 $j_tight_closing_paren = -1;
3201 995         1638 $ws = WS_NO;
3202             }
3203             else {
3204              
3205 3373 100       7372 if ( !defined($ws) ) {
3206              
3207 3150         4674 my $tightness;
3208             my $block_type = $rblock_type_of_seqno->{$seqno}
3209 3150   100     9653 || $block_type_for_tightness{$seqno};
3210              
3211 3150 100 66     8713 if ( $block_type && $token eq '}' ) {
3212 946         1674 $tightness = $rOpts_block_brace_tightness;
3213             }
3214 2204         3928 else { $tightness = $tightness{$token} }
3215              
3216 3150 100       6209 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
3217             }
3218             }
3219              
3220             # check for special cases which override the above rules
3221 4368 100 66     9129 if ( %closing_container_inside_ws && $seqno ) {
3222 23         35 my $ws_override = $closing_container_inside_ws{$seqno};
3223 23 100       44 if ($ws_override) { $ws = $ws_override }
  6         12  
3224             }
3225              
3226 4368         5753 $ws_4 = $ws_3 = $ws_2 = $ws
3227             if DEBUG_WHITE;
3228             } ## end setting space flag inside closing tokens
3229              
3230             #---------------------------------------------------------------
3231             # Whitespace Rules Section 4:
3232             #---------------------------------------------------------------
3233             # /^[L\{\(\[]$/
3234             elsif ( $is_opening_type{$type} ) {
3235              
3236 4368         7095 $last_type_is_opening = 1;
3237              
3238 4368 100 100     16920 if ( $token eq '(' ) {
    100 100        
3239              
3240 2117         4189 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3241              
3242             # This will have to be tweaked as tokenization changes.
3243             # We usually want a space at '} (', for example:
3244             # <<snippets/space1.in>>
3245             # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
3246             #
3247             # But not others:
3248             # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
3249             # At present, the above & block is marked as type L/R so this
3250             # case won't go through here.
3251 2117 100 100     18526 if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
  8 100 66     20  
    100 66        
    100 66        
    50 100        
      33        
3252              
3253             # NOTE: some older versions of Perl had occasional problems if
3254             # spaces are introduced between keywords or functions and
3255             # opening parens. So the default is not to do this except is
3256             # certain cases. The current Perl seems to tolerate spaces.
3257              
3258             # Space between keyword and '('
3259             elsif ( $last_type eq 'k' ) {
3260             $ws = WS_NO
3261             unless ( $rOpts_space_keyword_paren
3262 630 100 100     3356 || $space_after_keyword{$last_token} );
3263              
3264             # Set inside space flag if requested
3265 630         1927 set_container_ws_by_keyword( $last_token, $seqno );
3266             }
3267              
3268             # Space between function and '('
3269             # -----------------------------------------------------
3270             # 'w' and 'i' checks for something like:
3271             # myfun( &myfun( ->myfun(
3272             # -----------------------------------------------------
3273              
3274             # Note that at this point an identifier may still have a
3275             # leading arrow, but the arrow will be split off during token
3276             # respacing. After that, the token may become a bare word
3277             # without leading arrow. The point is, it is best to mark
3278             # function call parens right here before that happens.
3279             # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
3280             # NOTE: this would be the place to allow spaces between
3281             # repeated parens, like () () (), as in case c017, but I
3282             # decided that would not be a good idea.
3283              
3284             # Updated to allow detached '->' from tokenizer (issue c140)
3285             elsif (
3286              
3287             # /^[wCUG]$/
3288             $is_wCUG{$last_type}
3289              
3290             || (
3291              
3292             # /^[wi]$/
3293             $is_wi{$last_type}
3294              
3295             && (
3296              
3297             # with prefix '->' or '&'
3298             $last_token =~ /^([\&]|->)/
3299              
3300             # or preceding token '->' (see b1337; c140)
3301             || $rtokh_last_last->[_TYPE_] eq '->'
3302              
3303             # or preceding sub call operator token '&'
3304             || ( $rtokh_last_last->[_TYPE_] eq 't'
3305             && $rtokh_last_last->[_TOKEN_] =~ /^\&\s*$/ )
3306             )
3307             )
3308             )
3309             {
3310 848 100       2386 $ws =
3311             $rOpts_space_function_paren
3312             ? $self->ws_space_function_paren( $j, $rtokh_last_last )
3313             : WS_NO;
3314              
3315 848         2721 set_container_ws_by_keyword( $last_token, $seqno );
3316 848         2325 $ris_function_call_paren->{$seqno} = 1;
3317             }
3318              
3319             # space between something like $i and ( in 'snippets/space2.in'
3320             # for $i ( 0 .. 20 ) {
3321             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
3322 37         107 $ws = WS_YES;
3323             }
3324              
3325             # allow constant function followed by '()' to retain no space
3326             elsif ($last_type eq 'C'
3327             && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
3328             {
3329 0         0 $ws = WS_NO;
3330             }
3331             }
3332              
3333             # patch for SWITCH/CASE: make space at ']{' optional
3334             # since the '{' might begin a case or when block
3335             elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
3336 2         3 $ws = WS_OPTIONAL;
3337             }
3338              
3339             # keep space between 'sub' and '{' for anonymous sub definition,
3340             # be sure type = 'k' (added for c140)
3341 4368 100       9512 if ( $type eq '{' ) {
3342 3698 100 66     10242 if ( $last_token eq 'sub' && $last_type eq 'k' ) {
3343 161         309 $ws = WS_YES;
3344             }
3345              
3346             # this is needed to avoid no space in '){'
3347 3698 100 100     8467 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
  257         469  
3348              
3349             # avoid any space before the brace or bracket in something like
3350             # @opts{'a','b',...}
3351 3698 50 66     9506 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
3352 0         0 $ws = WS_NO;
3353             }
3354             }
3355              
3356             # The --extended-block-tightness option allows certain braces
3357             # to be treated as blocks just for setting inner whitespace
3358 4368 100 100     9329 if ( $rOpts_extended_block_tightness && $token eq '{' ) {
3359 60         105 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3360 60 100 100     217 if ( !$rblock_type_of_seqno->{$seqno}
3361             && $extended_block_tightness_list{$last_token} )
3362             {
3363              
3364             # Ok - make this brace a block type for tightness only
3365 32         79 $block_type_for_tightness{$seqno} = $last_token;
3366             }
3367             }
3368             } ## end if ( $is_opening_type{$type} ) {
3369              
3370             # always preserve whatever space was used after a possible
3371             # filehandle (except _) or here doc operator
3372 34828 100 100     113849 if (
      66        
3373             (
3374             ( $last_type eq 'Z' && $last_token ne '_' )
3375             || $last_type eq 'h'
3376             )
3377             && $type ne '#' # no longer required due to early exit for '#' above
3378             )
3379             {
3380             # no space for '$ {' even if '$' is marked as type 'Z', issue c221
3381 103 50 66     738 if ( $last_type eq 'Z' && $last_token eq '$' && $token eq '{' ) {
      33        
3382 0         0 $ws = WS_NO;
3383             }
3384             else {
3385 103         257 $ws = WS_OPTIONAL;
3386             }
3387             }
3388              
3389 34828         42885 $ws_4 = $ws_3 = $ws
3390             if DEBUG_WHITE;
3391              
3392 34828 100       59433 if ( !defined($ws) ) {
3393              
3394             #---------------------------------------------------------------
3395             # Whitespace Rules Section 4:
3396             # Use the binary rule table.
3397             #---------------------------------------------------------------
3398 24694 100       49509 if ( defined( $binary_ws_rules{$last_type}{$type} ) ) {
3399 1210         2460 $ws = $binary_ws_rules{$last_type}{$type};
3400 1210         1897 $ws_4 = $ws if DEBUG_WHITE;
3401             }
3402              
3403             #---------------------------------------------------------------
3404             # Whitespace Rules Section 5:
3405             # Apply default rules not covered above.
3406             #---------------------------------------------------------------
3407              
3408             # If we fall through to here, look at the pre-defined hash tables
3409             # for the two tokens, and:
3410             # if (they are equal) use the common value
3411             # if (either is zero or undef) use the other
3412             # if (either is -1) use it
3413             # That is,
3414             # left vs right
3415             # 1 vs 1 --> 1
3416             # 0 vs 0 --> 0
3417             # -1 vs -1 --> -1
3418             #
3419             # 0 vs -1 --> -1
3420             # 0 vs 1 --> 1
3421             # 1 vs 0 --> 1
3422             # -1 vs 0 --> -1
3423             #
3424             # -1 vs 1 --> -1
3425             # 1 vs -1 --> -1
3426             else {
3427 23484         35859 my $wl = $want_left_space{$type};
3428 23484         34429 my $wr = $want_right_space{$last_type};
3429 23484 100       43022 if ( !defined($wl) ) {
    100          
3430 6365 100       12052 $ws = defined($wr) ? $wr : 0;
3431             }
3432             elsif ( !defined($wr) ) {
3433 5643         8499 $ws = $wl;
3434             }
3435             else {
3436 11476 100 66     34420 $ws =
3437             ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
3438             }
3439             }
3440             }
3441              
3442             # Treat newline as a whitespace. Otherwise, we might combine
3443             # 'Send' and '-recipients' here according to the above rules:
3444             # <<snippets/space3.in>>
3445             # my $msg = new Fax::Send
3446             # -recipients => $to,
3447             # -data => $data;
3448 34828 100 100     62816 if ( !$ws
3449             && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
3450             {
3451 280         540 $ws = WS_YES;
3452             }
3453              
3454 34828         53366 $rwhitespace_flags->[$j] = $ws;
3455              
3456             # remember non-blank, non-comment tokens
3457 34828         46881 $last_token = $token;
3458 34828         43382 $last_type = $type;
3459 34828         43818 $rtokh_last_last = $rtokh_last;
3460 34828         44022 $rtokh_last = $rtokh;
3461              
3462             # Programming note: for some reason, it is very much faster to 'next'
3463             # out of this loop here than to put the DEBUG coding in a block.
3464             # But note that the debug code must then update its own copies
3465             # of $last_token and $last_type.
3466 34828         54722 next if ( !DEBUG_WHITE );
3467              
3468 0         0 my $str = substr( $last_token_dbg, 0, 15 );
3469 0         0 $str .= SPACE x ( 16 - length($str) );
3470 0 0       0 if ( !defined($ws_1) ) { $ws_1 = "*" }
  0         0  
3471 0 0       0 if ( !defined($ws_2) ) { $ws_2 = "*" }
  0         0  
3472 0 0       0 if ( !defined($ws_3) ) { $ws_3 = "*" }
  0         0  
3473 0 0       0 if ( !defined($ws_4) ) { $ws_4 = "*" }
  0         0  
3474 0         0 print STDOUT
3475             "NEW WHITE: i=$j $str $last_type_dbg $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
3476              
3477             # reset for next pass
3478 0         0 $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
3479              
3480 0         0 $last_token_dbg = $token;
3481 0         0 $last_type_dbg = $type;
3482              
3483             } ## end main loop
3484              
3485 549 100       3814 if ( $rOpts->{'tight-secret-operators'} ) {
3486 1         6 new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
3487             }
3488 549         1895 $self->[_ris_function_call_paren_] = $ris_function_call_paren;
3489 549         5794 return $rwhitespace_flags;
3490              
3491             } ## end sub set_whitespace_flags
3492              
3493             sub set_container_ws_by_keyword {
3494              
3495 1479     1479 0 3671 my ( $word, $sequence_number ) = @_;
3496 1479 100       3788 return unless (%keyword_paren_inner_tightness);
3497              
3498             # We just saw a keyword (or other function name) followed by an opening
3499             # paren. Now check to see if the following paren should have special
3500             # treatment for its inside space. If so we set a hash value using the
3501             # sequence number as key.
3502 12 50 33     46 if ( $word && $sequence_number ) {
3503 12         24 my $tightness = $keyword_paren_inner_tightness{$word};
3504 12 100 66     43 if ( defined($tightness) && $tightness != 1 ) {
3505 6 50       16 my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
3506 6         15 $opening_container_inside_ws{$sequence_number} = $ws_flag;
3507 6         11 $closing_container_inside_ws{$sequence_number} = $ws_flag;
3508             }
3509             }
3510 12         17 return;
3511             } ## end sub set_container_ws_by_keyword
3512              
3513             sub ws_in_container {
3514              
3515 1187     1187 0 3507 my ( $j, $j_closing, $rLL, $type, $token, $last_token ) = @_;
3516              
3517             # Given:
3518             # $j = index of token following an opening container token
3519             # $type, $token = the type and token at index $j
3520             # $j_closing = closing token of the container
3521             # $last_token = the opening token of the container
3522             # Return:
3523             # WS_NO if there is just one token in the container (with exceptions)
3524             # WS_YES otherwise
3525              
3526             #------------------------------------
3527             # Look forward for the closing token;
3528             #------------------------------------
3529 1187 50       3142 if ( $j + 1 > $j_closing ) { return WS_NO }
  0         0  
3530              
3531             # Patch to count '-foo' as single token so that
3532             # each of $a{-foo} and $a{foo} and $a{'foo'} do
3533             # not get spaces with default formatting.
3534 1187         1851 my $j_here = $j;
3535 1187 50 66     3270 ++$j_here
      66        
3536             if ( $token eq '-'
3537             && $last_token eq '{'
3538             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
3539              
3540             # Patch to count a sign separated from a number as a single token, as
3541             # in the following line. Otherwise, it takes two steps to converge:
3542             # deg2rad(- 0.5)
3543 1187 0 66     5231 if ( ( $type eq 'm' || $type eq 'p' )
      66        
      66        
      33        
      33        
3544             && $j < $j_closing + 1
3545             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
3546             && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
3547             && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
3548             {
3549 0         0 $j_here = $j + 2;
3550             }
3551              
3552             # $j_next is where a closing token should be if the container has
3553             # just a "single" token
3554 1187 50       2876 if ( $j_here + 1 > $j_closing ) { return WS_NO }
  0         0  
3555 1187 100       3494 my $j_next =
3556             ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
3557             ? $j_here + 2
3558             : $j_here + 1;
3559              
3560             #-----------------------------------------------------------------
3561             # Now decide: if we get to the closing token we will keep it tight
3562             #-----------------------------------------------------------------
3563 1187 100 100     4568 if (
3564             $j_next == $j_closing
3565              
3566             # OLD PROBLEM: but watch out for this: [ [ ] (misc.t)
3567             # No longer necessary because of the previous check on sequence numbers
3568             ##&& $last_token ne $token
3569              
3570             # double diamond is usually spaced
3571             && $token ne '<<>>'
3572              
3573             )
3574             {
3575 995         2408 return WS_NO;
3576             }
3577              
3578 192         515 return WS_YES;
3579              
3580             } ## end sub ws_in_container
3581              
3582             sub ws_space_function_paren {
3583              
3584 32     32 0 73 my ( $self, $j, $rtokh_last_last ) = @_;
3585              
3586             # Called if --space-function-paren is set to see if it might cause
3587             # a problem. The manual warns the user about potential problems with
3588             # this flag. Here we just try to catch one common problem.
3589              
3590             # Given:
3591             # $j = index of '(' after function name
3592             # Return:
3593             # WS_NO if no space
3594             # WS_YES otherwise
3595              
3596             # This was added to fix for issue c166. Ignore -sfp at a possible indirect
3597             # object location. For example, do not convert this:
3598             # print header() ...
3599             # to this:
3600             # print header () ...
3601             # because in this latter form, header may be taken to be a file handle
3602             # instead of a function call.
3603              
3604             # Start with the normal value for -sfp:
3605 32         52 my $ws = WS_YES;
3606              
3607             # now check to be sure we don't cause a problem:
3608 32         54 my $type_ll = $rtokh_last_last->[_TYPE_];
3609 32         57 my $tok_ll = $rtokh_last_last->[_TOKEN_];
3610              
3611             # NOTE: this is just a minimal check. For example, we might also check
3612             # for something like this:
3613             # print ( header ( ..
3614 32 50 66     96 if ( $type_ll eq 'k' && $is_indirect_object_taker{$tok_ll} ) {
3615 0         0 $ws = WS_NO;
3616             }
3617              
3618 32         66 return $ws;
3619              
3620             } ## end sub ws_space_function_paren
3621              
3622             } ## end closure set_whitespace_flags
3623              
3624             sub dump_want_left_space {
3625 0     0 0 0 my $fh = shift;
3626 0         0 local $LIST_SEPARATOR = "\n";
3627 0         0 $fh->print(<<EOM);
3628             These values are the main control of whitespace to the left of a token type;
3629             They may be altered with the -wls parameter.
3630             For a list of token types, use perltidy --dump-token-types (-dtt)
3631             1 means the token wants a space to its left
3632             -1 means the token does not want a space to its left
3633             ------------------------------------------------------------------------
3634             EOM
3635 0         0 foreach my $key ( sort keys %want_left_space ) {
3636 0         0 $fh->print("$key\t$want_left_space{$key}\n");
3637             }
3638 0         0 return;
3639             } ## end sub dump_want_left_space
3640              
3641             sub dump_want_right_space {
3642 0     0 0 0 my $fh = shift;
3643 0         0 local $LIST_SEPARATOR = "\n";
3644 0         0 $fh->print(<<EOM);
3645             These values are the main control of whitespace to the right of a token type;
3646             They may be altered with the -wrs parameter.
3647             For a list of token types, use perltidy --dump-token-types (-dtt)
3648             1 means the token wants a space to its right
3649             -1 means the token does not want a space to its right
3650             ------------------------------------------------------------------------
3651             EOM
3652 0         0 foreach my $key ( sort keys %want_right_space ) {
3653 0         0 $fh->print("$key\t$want_right_space{$key}\n");
3654             }
3655 0         0 return;
3656             } ## end sub dump_want_right_space
3657              
3658             { ## begin closure is_essential_whitespace
3659              
3660             my %is_sort_grep_map;
3661             my %is_for_foreach;
3662             my %is_digraph;
3663             my %is_trigraph;
3664             my %essential_whitespace_filter_l1;
3665             my %essential_whitespace_filter_r1;
3666             my %essential_whitespace_filter_l2;
3667             my %essential_whitespace_filter_r2;
3668             my %is_type_with_space_before_bareword;
3669             my %is_special_variable_char;
3670              
3671             BEGIN {
3672              
3673 38     38   230 my @q;
3674              
3675             # NOTE: This hash is like the global %is_sort_map_grep, but it ignores
3676             # grep aliases on purpose, since here we are looking parens, not braces
3677 38         182 @q = qw(sort grep map);
3678 38         263 @is_sort_grep_map{@q} = (1) x scalar(@q);
3679              
3680 38         144 @q = qw(for foreach);
3681 38         218 @is_for_foreach{@q} = (1) x scalar(@q);
3682              
3683 38         485 @q = qw(
3684             .. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <>
3685             <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
3686             );
3687 38         1149 @is_digraph{@q} = (1) x scalar(@q);
3688              
3689 38         180 @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
3690 38         285 @is_trigraph{@q} = (1) x scalar(@q);
3691              
3692             # These are used as a speedup filters for sub is_essential_whitespace.
3693              
3694             # Filter 1:
3695             # These left side token types USUALLY do not require a space:
3696 38         124 @q = qw( ; { } [ ] L R );
3697 38         94 push @q, ',';
3698 38         74 push @q, ')';
3699 38         90 push @q, '(';
3700 38         212 @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
3701              
3702             # BUT some might if followed by these right token types
3703 38         122 @q = qw( pp mm << <<= h );
3704 38         162 @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
3705              
3706             # Filter 2:
3707             # These right side filters usually do not require a space
3708 38         110 @q = qw( ; ] R } );
3709 38         75 push @q, ',';
3710 38         76 push @q, ')';
3711 38         172 @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
3712              
3713             # BUT some might if followed by these left token types
3714 38         112 @q = qw( h Z );
3715 38         104 @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
3716              
3717             # Keep a space between certain types and any bareword:
3718             # Q: keep a space between a quote and a bareword to prevent the
3719             # bareword from becoming a quote modifier.
3720             # &: do not remove space between an '&' and a bare word because
3721             # it may turn into a function evaluation, like here
3722             # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
3723             # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
3724 38         79 @q = qw( Q & );
3725 38         102 @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
3726              
3727             # These are the only characters which can (currently) form special
3728             # variables, like $^W: (issue c066, c068).
3729 38         178 @q =
3730             qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
3731 38         31978 @{is_special_variable_char}{@q} = (1) x scalar(@q);
3732              
3733             } ## end BEGIN
3734              
3735             sub is_essential_whitespace {
3736              
3737             # Essential whitespace means whitespace which cannot be safely deleted
3738             # without risking the introduction of a syntax error.
3739             # We are given three tokens and their types:
3740             # ($tokenl, $typel) is the token to the left of the space in question
3741             # ($tokenr, $typer) is the token to the right of the space in question
3742             # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
3743             #
3744             # Note1: This routine should almost never need to be changed. It is
3745             # for avoiding syntax problems rather than for formatting.
3746              
3747             # Note2: The -mangle option causes large numbers of calls to this
3748             # routine and therefore is a good test. So if a change is made, be sure
3749             # to use nytprof to profile with both old and revised coding using the
3750             # -mangle option and check differences.
3751              
3752 6252     6252 0 14359 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
3753              
3754             # This is potentially a very slow routine but the following quick
3755             # filters typically catch and handle over 90% of the calls.
3756              
3757             # Filter 1: usually no space required after common types ; , [ ] { } ( )
3758             return
3759             if ( $essential_whitespace_filter_l1{$typel}
3760 6252 100 100     27067 && !$essential_whitespace_filter_r1{$typer} );
3761              
3762             # Filter 2: usually no space before common types ; ,
3763             return
3764             if ( $essential_whitespace_filter_r2{$typer}
3765 1303 100 66     5458 && !$essential_whitespace_filter_l2{$typel} );
3766              
3767             # Filter 3: Handle side comments: a space is only essential if the left
3768             # token ends in '$' For example, we do not want to create $#foo below:
3769              
3770             # sub t086
3771             # ( #foo)))
3772             # $ #foo)))
3773             # a #foo)))
3774             # ) #foo)))
3775             # { ... }
3776              
3777             # Also, I prefer not to put a ? and # together because ? used to be
3778             # a pattern delimiter and spacing was used if guessing was needed.
3779              
3780 1008 100       2642 if ( $typer eq '#' ) {
3781              
3782 6 100 66     39 return 1
      66        
3783             if ( $tokenl
3784             && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
3785 4         10 return;
3786             }
3787              
3788 1002   100     4982 my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
3789 1002         1895 my $tokenr_is_open_paren = $tokenr eq '(';
3790 1002         2141 my $token_joined = $tokenl . $tokenr;
3791 1002         2186 my $tokenl_is_dash = $tokenl eq '-';
3792              
3793             my $result =
3794              
3795             # never combine two bare words or numbers
3796             # examples: and ::ok(1)
3797             # return ::spw(...)
3798             # for bla::bla:: abc
3799             # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3800             # $input eq"quit" to make $inputeq"quit"
3801             # my $size=-s::SINK if $file; <==OK but we won't do it
3802             # don't join something like: for bla::bla:: abc
3803             # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3804             ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
3805             && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
3806              
3807             # do not combine a number with a concatenation dot
3808             # example: pom.caputo:
3809             # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
3810             || $typel eq 'n' && $tokenr eq '.'
3811             || $typer eq 'n' && $tokenl eq '.'
3812              
3813             # cases of a space before a bareword...
3814             || (
3815             $tokenr_is_bareword && (
3816              
3817             # do not join a minus with a bare word, because you might form
3818             # a file test operator. Example from Complex.pm:
3819             # if (CORE::abs($z - i) < $eps);
3820             # "z-i" would be taken as a file test.
3821             $tokenl_is_dash && length($tokenr) == 1
3822              
3823             # and something like this could become ambiguous without space
3824             # after the '-':
3825             # use constant III=>1;
3826             # $a = $b - III;
3827             # and even this:
3828             # $a = - III;
3829             || $tokenl_is_dash && $typer =~ /^[wC]$/
3830              
3831             # keep space between types Q & and a bareword
3832             || $is_type_with_space_before_bareword{$typel}
3833              
3834             # +-: binary plus and minus before a bareword could get
3835             # converted into unary plus and minus on next pass through the
3836             # tokenizer. This can lead to blinkers: cases b660 b670 b780
3837             # b781 b787 b788 b790 So we keep a space unless the +/- clearly
3838             # follows an operator
3839             || ( ( $typel eq '+' || $typel eq '-' )
3840             && $typell !~ /^[niC\)\}\]R]$/ )
3841              
3842             # keep a space between a token ending in '$' and any word;
3843             # this caused trouble: "die @$ if $@"
3844             || $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
3845              
3846             # don't combine $$ or $# with any alphanumeric
3847             # (testfile mangle.t with --mangle)
3848             || $tokenl eq '$$'
3849             || $tokenl eq '$#'
3850              
3851             )
3852             ) ## end $tokenr_is_bareword
3853              
3854             # OLD, not used
3855             # '= -' should not become =- or you will get a warning
3856             # about reversed -=
3857             # || ($tokenr eq '-')
3858              
3859             # do not join a bare word with a minus, like between 'Send' and
3860             # '-recipients' here <<snippets/space3.in>>
3861             # my $msg = new Fax::Send
3862             # -recipients => $to,
3863             # -data => $data;
3864             # This is the safest thing to do. If we had the token to the right of
3865             # the minus we could do a better check.
3866             #
3867             # And do not combine a bareword and a quote, like this:
3868             # oops "Your login, $Bad_Login, is not valid";
3869             # It can cause a syntax error if oops is a sub
3870             || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
3871              
3872             # perl is very fussy about spaces before <<
3873             || substr( $tokenr, 0, 2 ) eq '<<'
3874              
3875             # avoid combining tokens to create new meanings. Example:
3876             # $a+ +$b must not become $a++$b
3877             || ( $is_digraph{$token_joined} )
3878             || $is_trigraph{$token_joined}
3879              
3880             # another example: do not combine these two &'s:
3881             # allow_options & &OPT_EXECCGI
3882             || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
3883              
3884             # retain any space after possible filehandle
3885             # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
3886             # but no space for '$ {' even if '$' is marked as type 'Z', issue c221
3887             || ( $typel eq 'Z' && !( $tokenl eq '$' && $tokenr eq '{' ) )
3888              
3889             # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
3890             # space after type Y. Otherwise, it will get parsed as type 'Z' later
3891             # and any space would have to be added back manually if desired.
3892             || $typel eq 'Y'
3893              
3894             # Perl is sensitive to whitespace after the + here:
3895             # $b = xvals $a + 0.1 * yvals $a;
3896             || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
3897              
3898             || (
3899             $tokenr_is_open_paren && (
3900              
3901             # keep paren separate in 'use Foo::Bar ()'
3902             ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
3903              
3904             # OLD: keep any space between filehandle and paren:
3905             # file mangle.t with --mangle:
3906             # NEW: this test is no longer necessary here (moved above)
3907             ## || $typel eq 'Y'
3908              
3909             # must have space between grep and left paren; "grep(" will fail
3910             || $is_sort_grep_map{$tokenl}
3911              
3912             # don't stick numbers next to left parens, as in:
3913             #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
3914             || $typel eq 'n'
3915             )
3916             ) ## end $tokenr_is_open_paren
3917              
3918             # retain any space after here doc operator ( hereerr.t)
3919             || $typel eq 'h'
3920              
3921             # be careful with a space around ++ and --, to avoid ambiguity as to
3922             # which token it applies
3923             || ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
3924             || ( $typel eq '++' || $typel eq '--' )
3925             && $tokenr !~ /^[\;\}\)\]]/
3926              
3927             # need space after foreach my; for example, this will fail in
3928             # older versions of Perl:
3929             # foreach my$ft(@filetypes)...
3930             || (
3931             $tokenl eq 'my'
3932              
3933             && substr( $tokenr, 0, 1 ) eq '$'
3934              
3935             # /^(for|foreach)$/
3936             && $is_for_foreach{$tokenll}
3937             )
3938              
3939             # Keep space after like $^ if needed to avoid forming a different
3940             # special variable (issue c068). For example:
3941             # my $aa = $^ ? "none" : "ok";
3942             || ( $typel eq 'i'
3943             && length($tokenl) == 2
3944             && substr( $tokenl, 1, 1 ) eq '^'
3945 1002   33     47374 && $is_special_variable_char{ substr( $tokenr, 0, 1 ) } )
3946              
3947             # We must be sure that a space between a ? and a quoted string
3948             # remains if the space before the ? remains. [Loca.pm, lockarea]
3949             # ie,
3950             # $b=join $comma ? ',' : ':', @_; # ok
3951             # $b=join $comma?',' : ':', @_; # ok!
3952             # $b=join $comma ?',' : ':', @_; # error!
3953             # Not really required:
3954             ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
3955              
3956             # Space stacked labels...
3957             # Not really required: Perl seems to accept non-spaced labels.
3958             ## || $typel eq 'J' && $typer eq 'J'
3959              
3960             ; # the value of this long logic sequence is the result we want
3961 1002         3553 return $result;
3962             } ## end sub is_essential_whitespace
3963             } ## end closure is_essential_whitespace
3964              
3965             { ## begin closure new_secret_operator_whitespace
3966              
3967             my %secret_operators;
3968             my %is_leading_secret_token;
3969              
3970             BEGIN {
3971              
3972             # token lists for perl secret operators as compiled by Philippe Bruhat
3973             # at: https://metacpan.org/module/perlsecret
3974 38     38   935 %secret_operators = (
3975             'Goatse' => [qw#= ( ) =#], #=( )=
3976             'Venus1' => [qw#0 +#], # 0+
3977             'Venus2' => [qw#+ 0#], # +0
3978             'Enterprise' => [qw#) x ! !#], # ()x!!
3979             'Kite1' => [qw#~ ~ <>#], # ~~<>
3980             'Kite2' => [qw#~~ <>#], # ~~<>
3981             'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
3982             'Bang bang ' => [qw#! !#], # !!
3983             );
3984              
3985             # The following operators and constants are not included because they
3986             # are normally kept tight by perltidy:
3987             # ~~ <~>
3988             #
3989              
3990             # Make a lookup table indexed by the first token of each operator:
3991             # first token => [list, list, ...]
3992 38         262 foreach my $value ( values(%secret_operators) ) {
3993 304         549 my $tok = $value->[0];
3994 304         447 push @{ $is_leading_secret_token{$tok} }, $value;
  304         71918  
3995             }
3996             } ## end BEGIN
3997              
3998             sub new_secret_operator_whitespace {
3999              
4000 1     1 0 4 my ( $rlong_array, $rwhitespace_flags ) = @_;
4001              
4002             # Loop over all tokens in this line
4003 1         4 my ( $token, $type );
4004 1         2 my $jmax = @{$rlong_array} - 1;
  1         4  
4005 1         4 foreach my $j ( 0 .. $jmax ) {
4006              
4007 9         16 $token = $rlong_array->[$j]->[_TOKEN_];
4008 9         13 $type = $rlong_array->[$j]->[_TYPE_];
4009              
4010             # Skip unless this token might start a secret operator
4011 9 100       19 next if ( $type eq 'b' );
4012 6 100       16 next unless ( $is_leading_secret_token{$token} );
4013              
4014             # Loop over all secret operators with this leading token
4015 2         7 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
  2         8  
4016 2         3 my $jend = $j - 1;
4017 2         3 foreach my $tok ( @{$rpattern} ) {
  2         8  
4018 4         7 $jend++;
4019 4 100 66     18 $jend++
4020              
4021             if ( $jend <= $jmax
4022             && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
4023 4 100 66     54 if ( $jend > $jmax
4024             || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
4025             {
4026 1         5 $jend = undef;
4027 1         2 last;
4028             }
4029             }
4030              
4031 2 100       17 if ($jend) {
4032              
4033             # set flags to prevent spaces within this operator
4034 1         5 foreach my $jj ( $j + 1 .. $jend ) {
4035 1         2 $rwhitespace_flags->[$jj] = WS_NO;
4036             }
4037 1         6 $j = $jend;
4038 1         4 last;
4039             }
4040             } ## End Loop over all operators
4041             } ## End loop over all tokens
4042 1         14 return;
4043             } ## end sub new_secret_operator_whitespace
4044             } ## end closure new_secret_operator_whitespace
4045              
4046             { ## begin closure set_bond_strengths
4047              
4048             # These routines and variables are involved in deciding where to break very
4049             # long lines.
4050              
4051             my %is_good_keyword_breakpoint;
4052             my %is_lt_gt_le_ge;
4053             my %is_container_token;
4054              
4055             my %binary_bond_strength_nospace;
4056             my %binary_bond_strength;
4057             my %nobreak_lhs;
4058             my %nobreak_rhs;
4059              
4060             my @bias_tokens;
4061             my %bias_hash;
4062             my %bias;
4063             my $delta_bias;
4064              
4065             sub initialize_bond_strength_hashes {
4066              
4067 554     554 0 1284 my @q;
4068 554         2721 @q = qw(if unless while until for foreach);
4069 554         3670 @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
4070              
4071 554         2293 @q = qw(lt gt le ge);
4072 554         3148 @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
4073              
4074 554         2219 @q = qw/ ( [ { } ] ) /;
4075 554         2299 @is_container_token{@q} = (1) x scalar(@q);
4076              
4077             # The decision about where to break a line depends upon a "bond
4078             # strength" between tokens. The LOWER the bond strength, the MORE
4079             # likely a break. A bond strength may be any value but to simplify
4080             # things there are several pre-defined strength levels:
4081              
4082             # NO_BREAK => 10000;
4083             # VERY_STRONG => 100;
4084             # STRONG => 2.1;
4085             # NOMINAL => 1.1;
4086             # WEAK => 0.8;
4087             # VERY_WEAK => 0.55;
4088              
4089             # The strength values are based on trial-and-error, and need to be
4090             # tweaked occasionally to get desired results. Some comments:
4091             #
4092             # 1. Only relative strengths are important. small differences
4093             # in strengths can make big formatting differences.
4094             # 2. Each indentation level adds one unit of bond strength.
4095             # 3. A value of NO_BREAK makes an unbreakable bond
4096             # 4. A value of VERY_WEAK is the strength of a ','
4097             # 5. Values below NOMINAL are considered ok break points.
4098             # 6. Values above NOMINAL are considered poor break points.
4099             #
4100             # The bond strengths should roughly follow precedence order where
4101             # possible. If you make changes, please check the results very
4102             # carefully on a variety of scripts. Testing with the -extrude
4103             # options is particularly helpful in exercising all of the rules.
4104              
4105             # Wherever possible, bond strengths are defined in the following
4106             # tables. There are two main stages to setting bond strengths and
4107             # two types of tables:
4108             #
4109             # The first stage involves looking at each token individually and
4110             # defining left and right bond strengths, according to if we want
4111             # to break to the left or right side, and how good a break point it
4112             # is. For example tokens like =, ||, && make good break points and
4113             # will have low strengths, but one might want to break on either
4114             # side to put them at the end of one line or beginning of the next.
4115             #
4116             # The second stage involves looking at certain pairs of tokens and
4117             # defining a bond strength for that particular pair. This second
4118             # stage has priority.
4119              
4120             #---------------------------------------------------------------
4121             # Bond Strength BEGIN Section 1.
4122             # Set left and right bond strengths of individual tokens.
4123             #---------------------------------------------------------------
4124              
4125             # NOTE: NO_BREAK's set in this section first are HINTS which will
4126             # probably not be honored. Essential NO_BREAKS's should be set in
4127             # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
4128             # of this subroutine.
4129              
4130             # Note that we are setting defaults in this section. The user
4131             # cannot change bond strengths but can cause the left and right
4132             # bond strengths of any token type to be swapped through the use of
4133             # the -wba and -wbb flags. In this way the user can determine if a
4134             # breakpoint token should appear at the end of one line or the
4135             # beginning of the next line.
4136              
4137 554         12432 %right_bond_strength = ();
4138 554         9666 %left_bond_strength = ();
4139 554         4235 %binary_bond_strength_nospace = ();
4140 554         9800 %binary_bond_strength = ();
4141 554         1839 %nobreak_lhs = ();
4142 554         1743 %nobreak_rhs = ();
4143              
4144             # The hash keys in this section are token types, plus the text of
4145             # certain keywords like 'or', 'and'.
4146              
4147             # no break around possible filehandle
4148 554         2009 $left_bond_strength{'Z'} = NO_BREAK;
4149 554         1758 $right_bond_strength{'Z'} = NO_BREAK;
4150              
4151             # never put a bare word on a new line:
4152             # example print (STDERR, "bla"); will fail with break after (
4153 554         1624 $left_bond_strength{'w'} = NO_BREAK;
4154              
4155             # blanks always have infinite strength to force breaks after
4156             # real tokens
4157 554         1728 $right_bond_strength{'b'} = NO_BREAK;
4158              
4159             # try not to break on exponentiation
4160 554         2272 @q = qw# ** .. ... <=> #;
4161 554         2500 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4162 554         2108 @right_bond_strength{@q} = (STRONG) x scalar(@q);
4163              
4164             # The comma-arrow has very low precedence but not a good break point
4165 554         1600 $left_bond_strength{'=>'} = NO_BREAK;
4166 554         1549 $right_bond_strength{'=>'} = NOMINAL;
4167              
4168             # ok to break after label
4169 554         1567 $left_bond_strength{'J'} = NO_BREAK;
4170 554         1563 $right_bond_strength{'J'} = NOMINAL;
4171 554         1456 $left_bond_strength{'j'} = STRONG;
4172 554         1459 $right_bond_strength{'j'} = STRONG;
4173 554         1601 $left_bond_strength{'A'} = STRONG;
4174 554         1410 $right_bond_strength{'A'} = STRONG;
4175              
4176 554         1373 $left_bond_strength{'->'} = STRONG;
4177 554         1374 $right_bond_strength{'->'} = VERY_STRONG;
4178              
4179 554         1479 $left_bond_strength{'CORE::'} = NOMINAL;
4180 554         1434 $right_bond_strength{'CORE::'} = NO_BREAK;
4181              
4182             # breaking AFTER modulus operator is ok:
4183 554         1721 @q = qw< % >;
4184 554         1944 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4185 554         1803 @right_bond_strength{@q} =
4186             ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
4187              
4188             # Break AFTER math operators * and /
4189 554         1962 @q = qw< * / x >;
4190 554         2066 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4191 554         2096 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
4192              
4193             # Break AFTER weakest math operators + and -
4194             # Make them weaker than * but a bit stronger than '.'
4195 554         1996 @q = qw< + - >;
4196 554         1985 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4197 554         1910 @right_bond_strength{@q} =
4198             ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
4199              
4200             # Define left strength of unary plus and minus (fixes case b511)
4201 554         1998 $left_bond_strength{p} = $left_bond_strength{'+'};
4202 554         1760 $left_bond_strength{m} = $left_bond_strength{'-'};
4203              
4204             # And make right strength of unary plus and minus very high.
4205             # Fixes cases b670 b790
4206 554         1571 $right_bond_strength{p} = NO_BREAK;
4207 554         1371 $right_bond_strength{m} = NO_BREAK;
4208              
4209             # breaking BEFORE these is just ok:
4210 554         1753 @q = qw# >> << #;
4211 554         1862 @right_bond_strength{@q} = (STRONG) x scalar(@q);
4212 554         2101 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
4213              
4214             # breaking before the string concatenation operator seems best
4215             # because it can be hard to see at the end of a line
4216 554         1677 $right_bond_strength{'.'} = STRONG;
4217 554         1682 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
4218              
4219 554         1947 @q = qw< } ] ) R >;
4220 554         2258 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4221 554         2079 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
4222              
4223             # make these a little weaker than nominal so that they get
4224             # favored for end-of-line characters
4225 554         2298 @q = qw< != == =~ !~ ~~ !~~ >;
4226 554         2574 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4227 554         2453 @right_bond_strength{@q} =
4228             ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
4229              
4230             # break AFTER these
4231 554         2255 @q = qw# < > | & >= <= #;
4232 554         2275 @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
4233 554         2397 @right_bond_strength{@q} =
4234             ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
4235              
4236             # breaking either before or after a quote is ok
4237             # but bias for breaking before a quote
4238 554         1683 $left_bond_strength{'Q'} = NOMINAL;
4239 554         1646 $right_bond_strength{'Q'} = NOMINAL + 0.02;
4240 554         1710 $left_bond_strength{'q'} = NOMINAL;
4241 554         1532 $right_bond_strength{'q'} = NOMINAL;
4242              
4243             # starting a line with a keyword is usually ok
4244 554         1397 $left_bond_strength{'k'} = NOMINAL;
4245              
4246             # we usually want to bond a keyword strongly to what immediately
4247             # follows, rather than leaving it stranded at the end of a line
4248 554         1388 $right_bond_strength{'k'} = STRONG;
4249              
4250 554         1447 $left_bond_strength{'G'} = NOMINAL;
4251 554         1351 $right_bond_strength{'G'} = STRONG;
4252              
4253             # assignment operators
4254 554         3238 @q = qw(
4255             = **= += *= &= <<= &&=
4256             -= /= |= >>= ||= //=
4257             .= %= ^=
4258             x=
4259             );
4260              
4261             # Default is to break AFTER various assignment operators
4262 554         4140 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4263 554         3762 @right_bond_strength{@q} =
4264             ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
4265              
4266             # Default is to break BEFORE '&&' and '||' and '//'
4267             # set strength of '||' to same as '=' so that chains like
4268             # $a = $b || $c || $d will break before the first '||'
4269 554         1712 $right_bond_strength{'||'} = NOMINAL;
4270 554         2019 $left_bond_strength{'||'} = $right_bond_strength{'='};
4271              
4272             # same thing for '//'
4273 554         1586 $right_bond_strength{'//'} = NOMINAL;
4274 554         1731 $left_bond_strength{'//'} = $right_bond_strength{'='};
4275              
4276             # set strength of && a little higher than ||
4277 554         1583 $right_bond_strength{'&&'} = NOMINAL;
4278 554         1824 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
4279              
4280 554         1475 $left_bond_strength{';'} = VERY_STRONG;
4281 554         1540 $right_bond_strength{';'} = VERY_WEAK;
4282 554         1410 $left_bond_strength{'f'} = VERY_STRONG;
4283              
4284             # make right strength of for ';' a little less than '='
4285             # to make for contents break after the ';' to avoid this:
4286             # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
4287             # $number_of_fields )
4288             # and make it weaker than ',' and 'and' too
4289 554         1463 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
4290              
4291             # The strengths of ?/: should be somewhere between
4292             # an '=' and a quote (NOMINAL),
4293             # make strength of ':' slightly less than '?' to help
4294             # break long chains of ? : after the colons
4295 554         1443 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
4296 554         1425 $right_bond_strength{':'} = NO_BREAK;
4297 554         1674 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
4298 554         1424 $right_bond_strength{'?'} = NO_BREAK;
4299              
4300 554         1416 $left_bond_strength{','} = VERY_STRONG;
4301 554         1375 $right_bond_strength{','} = VERY_WEAK;
4302              
4303             # remaining digraphs and trigraphs not defined above
4304 554         2127 @q = qw( :: <> ++ --);
4305 554         2431 @left_bond_strength{@q} = (WEAK) x scalar(@q);
4306 554         2119 @right_bond_strength{@q} = (STRONG) x scalar(@q);
4307              
4308             # Set bond strengths of certain keywords
4309             # make 'or', 'err', 'and' slightly weaker than a ','
4310 554         1717 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
4311 554         1605 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
4312 554         1501 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
4313 554         1398 $left_bond_strength{'xor'} = VERY_WEAK - 0.01;
4314 554         1264 $right_bond_strength{'and'} = NOMINAL;
4315 554         1337 $right_bond_strength{'or'} = NOMINAL;
4316 554         1412 $right_bond_strength{'err'} = NOMINAL;
4317 554         1414 $right_bond_strength{'xor'} = NOMINAL;
4318              
4319             #---------------------------------------------------------------
4320             # Bond Strength BEGIN Section 2.
4321             # Set binary rules for bond strengths between certain token types.
4322             #---------------------------------------------------------------
4323              
4324             # We have a little problem making tables which apply to the
4325             # container tokens. Here is a list of container tokens and
4326             # their types:
4327             #
4328             # type tokens // meaning
4329             # { {, [, ( // indent
4330             # } }, ], ) // outdent
4331             # [ [ // left non-structural [ (enclosing an array index)
4332             # ] ] // right non-structural square bracket
4333             # ( ( // left non-structural paren
4334             # ) ) // right non-structural paren
4335             # L { // left non-structural curly brace (enclosing a key)
4336             # R } // right non-structural curly brace
4337             #
4338             # Some rules apply to token types and some to just the token
4339             # itself. We solve the problem by combining type and token into a
4340             # new hash key for the container types.
4341             #
4342             # If a rule applies to a token 'type' then we need to make rules
4343             # for each of these 'type.token' combinations:
4344             # Type Type.Token
4345             # { {{, {[, {(
4346             # [ [[
4347             # ( ((
4348             # L L{
4349             # } }}, }], })
4350             # ] ]]
4351             # ) ))
4352             # R R}
4353             #
4354             # If a rule applies to a token then we need to make rules for
4355             # these 'type.token' combinations:
4356             # Token Type.Token
4357             # { {{, L{
4358             # [ {[, [[
4359             # ( {(, ((
4360             # } }}, R}
4361             # ] }], ]]
4362             # ) }), ))
4363              
4364             # allow long lines before final { in an if statement, as in:
4365             # if (..........
4366             # ..........)
4367             # {
4368             #
4369             # Otherwise, the line before the { tends to be too short.
4370              
4371 554         1845 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
4372 554         1728 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
4373              
4374             # break on something like '} (', but keep this stronger than a ','
4375             # example is in 'howe.pl'
4376 554         1708 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
4377 554         1816 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
4378              
4379             # keep matrix and hash indices together
4380             # but make them a little below STRONG to allow breaking open
4381             # something like {'some-word'}{'some-very-long-word'} at the }{
4382             # (bracebrk.t)
4383 554         1822 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
4384 554         1609 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
4385 554         1426 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
4386 554         1473 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
4387              
4388             # increase strength to the point where a break in the following
4389             # will be after the opening paren rather than at the arrow:
4390             # $a->$b($c);
4391 554         1588 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
4392              
4393             # Added for c140 to make 'w ->' and 'i ->' behave the same
4394 554         1486 $binary_bond_strength{'w'}{'->'} = 1.45 * STRONG;
4395              
4396             # Note that the following alternative strength would make the break at the
4397             # '->' rather than opening the '('. Both have advantages and disadvantages.
4398             # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
4399              
4400 554         1401 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4401 554         1576 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4402 554         1662 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4403 554         1648 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4404 554         1459 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4405 554         1495 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4406              
4407 554         1428 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
4408 554         1320 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
4409 554         1534 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
4410 554         1400 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
4411              
4412             #---------------------------------------------------------------
4413             # Binary NO_BREAK rules
4414             #---------------------------------------------------------------
4415              
4416             # use strict requires that bare word and => not be separated
4417 554         1531 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
4418 554         1633 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
4419              
4420             # Never break between a bareword and a following paren because
4421             # perl may give an error. For example, if a break is placed
4422             # between 'to_filehandle' and its '(' the following line will
4423             # give a syntax error [Carp.pm]: my( $no) =fileno(
4424             # to_filehandle( $in)) ;
4425 554         1526 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
4426 554         1594 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
4427 554         1490 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
4428 554         1520 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
4429              
4430             # use strict requires that bare word within braces not start new
4431             # line
4432 554         1595 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
4433              
4434 554         1564 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
4435              
4436             # The following two rules prevent a syntax error caused by breaking up
4437             # a construction like '{-y}'. The '-' quotes the 'y' and prevents
4438             # it from being taken as a transliteration. We have to keep
4439             # token types 'L m w' together to prevent this error.
4440 554         1423 $binary_bond_strength{'L{'}{'m'} = NO_BREAK;
4441 554         1632 $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
4442              
4443             # keep 'bareword-' together, but only if there is no space between
4444             # the word and dash. Do not keep together if there is a space.
4445             # example 'use perl6-alpha'
4446 554         1573 $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
4447              
4448             # use strict requires that bare word and => not be separated
4449 554         1436 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
4450              
4451             # use strict does not allow separating type info from trailing { }
4452             # testfile is readmail.pl
4453 554         1585 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
4454 554         1490 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
4455              
4456             # As a defensive measure, do not break between a '(' and a
4457             # filehandle. In some cases, this can cause an error. For
4458             # example, the following program works:
4459             # my $msg="hi!\n";
4460             # print
4461             # ( STDOUT
4462             # $msg
4463             # );
4464             #
4465             # But this program fails:
4466             # my $msg="hi!\n";
4467             # print
4468             # (
4469             # STDOUT
4470             # $msg
4471             # );
4472             #
4473             # This is normally only a problem with the 'extrude' option
4474 554         1639 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
4475 554         1558 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
4476              
4477             # never break between sub name and opening paren
4478 554         1490 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
4479 554         1445 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
4480              
4481             # keep '}' together with ';'
4482 554         1508 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
4483              
4484             # Breaking before a ++ can cause perl to guess wrong. For
4485             # example the following line will cause a syntax error
4486             # with -extrude if we break between '$i' and '++' [fixstyle2]
4487             # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
4488 554         1750 $nobreak_lhs{'++'} = NO_BREAK;
4489              
4490             # Do not break before a possible file handle
4491 554         1428 $nobreak_lhs{'Z'} = NO_BREAK;
4492              
4493             # use strict hates bare words on any new line. For
4494             # example, a break before the underscore here provokes the
4495             # wrath of use strict:
4496             # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
4497 554         1384 $nobreak_rhs{'F'} = NO_BREAK;
4498 554         1456 $nobreak_rhs{'CORE::'} = NO_BREAK;
4499              
4500             # To prevent the tokenizer from switching between types 'w' and 'G' we
4501             # need to avoid breaking between type 'G' and the following code block
4502             # brace. Fixes case b929.
4503 554         1470 $nobreak_rhs{G} = NO_BREAK;
4504              
4505             #---------------------------------------------------------------
4506             # Bond Strength BEGIN Section 3.
4507             # Define tables and values for applying a small bias to the above
4508             # values.
4509             #---------------------------------------------------------------
4510             # Adding a small 'bias' to strengths is a simple way to make a line
4511             # break at the first of a sequence of identical terms. For
4512             # example, to force long string of conditional operators to break
4513             # with each line ending in a ':', we can add a small number to the
4514             # bond strength of each ':' (colon.t)
4515 554         3243 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
4516 554         1694 %bias_hash = map { $_ => 0 } @bias_tokens;
  3878         10397  
4517 554         1919 $delta_bias = 0.0001; # a very small strength level
4518 554         1459 return;
4519              
4520             } ## end sub initialize_bond_strength_hashes
4521              
4522 38     38   401 use constant DEBUG_BOND => 0;
  38         128  
  38         70626  
4523              
4524             sub set_bond_strengths {
4525              
4526 1110     1110 0 2486 my ($self) = @_;
4527              
4528             #-----------------------------------------------------------------
4529             # Define a 'bond strength' for each token pair in an output batch.
4530             # See comments above for definition of bond strength.
4531             #-----------------------------------------------------------------
4532              
4533 1110         2333 my $rbond_strength_to_go = [];
4534              
4535 1110         2322 my $rLL = $self->[_rLL_];
4536 1110         3161 my $rK_weld_right = $self->[_rK_weld_right_];
4537 1110         2176 my $rK_weld_left = $self->[_rK_weld_left_];
4538 1110         2213 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
4539              
4540             # patch-its always ok to break at end of line
4541 1110         2110 $nobreak_to_go[$max_index_to_go] = 0;
4542              
4543             # we start a new set of bias values for each line
4544 1110         9601 %bias = %bias_hash;
4545              
4546 1110         2921 my $code_bias = -.01; # bias for closing block braces
4547              
4548 1110         2092 my $type = 'b';
4549 1110         2149 my $token = SPACE;
4550 1110         1873 my $token_length = 1;
4551 1110         1701 my $last_type;
4552 1110         1958 my $last_nonblank_type = $type;
4553 1110         1920 my $last_nonblank_token = $token;
4554 1110         2746 my $list_str = $left_bond_strength{'?'};
4555              
4556 1110         3815 my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
4557              
4558 1110         0 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
4559             $next_nonblank_type, $next_token, $next_type,
4560             $total_nesting_depth, );
4561              
4562             # main loop to compute bond strengths between each pair of tokens
4563 1110         2979 foreach my $i ( 0 .. $max_index_to_go ) {
4564 31228         41661 $last_type = $type;
4565 31228 100       54088 if ( $type ne 'b' ) {
4566 18537         24476 $last_nonblank_type = $type;
4567 18537         25360 $last_nonblank_token = $token;
4568             }
4569 31228         42882 $type = $types_to_go[$i];
4570              
4571             # strength on both sides of a blank is the same
4572 31228 100 66     70786 if ( $type eq 'b' && $last_type ne 'b' ) {
4573 11581         25383 $rbond_strength_to_go->[$i] = $rbond_strength_to_go->[ $i - 1 ];
4574 11581   100     39253 $nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
4575 11581         18349 next;
4576             }
4577              
4578 19647         27617 $token = $tokens_to_go[$i];
4579 19647         26910 $token_length = $token_lengths_to_go[$i];
4580 19647         27108 $block_type = $block_type_to_go[$i];
4581 19647         26367 $i_next = $i + 1;
4582 19647         27744 $next_type = $types_to_go[$i_next];
4583 19647         27211 $next_token = $tokens_to_go[$i_next];
4584 19647         27534 $total_nesting_depth = $nesting_depth_to_go[$i_next];
4585 19647 100       35974 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
4586 19647         27509 $next_nonblank_type = $types_to_go[$i_next_nonblank];
4587 19647         26986 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
4588              
4589 19647         27184 my $seqno = $type_sequence_to_go[$i];
4590 19647         26915 my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
4591              
4592             # We are computing the strength of the bond between the current
4593             # token and the NEXT token.
4594              
4595             #---------------------------------------------------------------
4596             # Bond Strength Section 1:
4597             # First Approximation.
4598             # Use minimum of individual left and right tabulated bond
4599             # strengths.
4600             #---------------------------------------------------------------
4601 19647         34138 my $bsr = $right_bond_strength{$type};
4602 19647         32548 my $bsl = $left_bond_strength{$next_nonblank_type};
4603              
4604             # define right bond strengths of certain keywords
4605 19647 100 100     75855 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
    100 100        
4606 76         181 $bsr = $right_bond_strength{$token};
4607             }
4608             elsif ( $token eq 'ne' or $token eq 'eq' ) {
4609 81         233 $bsr = NOMINAL;
4610             }
4611              
4612             # set terminal bond strength to the nominal value
4613             # this will cause good preceding breaks to be retained
4614 19647 100       34922 if ( $i_next_nonblank > $max_index_to_go ) {
4615 1110         2724 $bsl = NOMINAL;
4616              
4617             # But weaken the bond at a 'missing terminal comma'. If an
4618             # optional comma is missing at the end of a broken list, use
4619             # the strength of a comma anyway to make formatting the same as
4620             # if it were there. Fixes issue c133.
4621 1110 100 100     5753 if ( !defined($bsr) || $bsr > VERY_WEAK ) {
4622 555         1278 my $seqno_px = $parent_seqno_to_go[$max_index_to_go];
4623 555 100       1695 if ( $ris_list_by_seqno->{$seqno_px} ) {
4624 72         213 my $KK = $K_to_go[$max_index_to_go];
4625 72         380 my $Kn = $self->K_next_nonblank($KK);
4626 72         252 my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
4627 72 100 100     400 if ( $seqno_n && $seqno_n eq $seqno_px ) {
4628 17         54 $bsl = VERY_WEAK;
4629             }
4630             }
4631             }
4632             }
4633              
4634             # define right bond strengths of certain keywords
4635 19647 100 100     80983 if ( $next_nonblank_type eq 'k'
    100 100        
    50          
4636             && defined( $left_bond_strength{$next_nonblank_token} ) )
4637             {
4638 76         182 $bsl = $left_bond_strength{$next_nonblank_token};
4639             }
4640             elsif ($next_nonblank_token eq 'ne'
4641             or $next_nonblank_token eq 'eq' )
4642             {
4643 81         196 $bsl = NOMINAL;
4644             }
4645             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
4646 0         0 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
4647             }
4648              
4649             # Use the minimum of the left and right strengths. Note: it might
4650             # seem that we would want to keep a NO_BREAK if either token has
4651             # this value. This didn't work, for example because in an arrow
4652             # list, it prevents the comma from separating from the following
4653             # bare word (which is probably quoted by its arrow). So necessary
4654             # NO_BREAK's have to be handled as special cases in the final
4655             # section.
4656 19647 100       35849 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
  5679         8655  
4657 19647 100       34141 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
  4290         6569  
4658 19647 100       35269 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
4659 19647         24527 $bond_str_1 = $bond_str if (DEBUG_BOND);
4660              
4661             #---------------------------------------------------------------
4662             # Bond Strength Section 2:
4663             # Apply hardwired rules..
4664             #---------------------------------------------------------------
4665              
4666             # Patch to put terminal or clauses on a new line: Weaken the bond
4667             # at an || followed by die or similar keyword to make the terminal
4668             # or clause fall on a new line, like this:
4669             #
4670             # my $class = shift
4671             # || die "Cannot add broadcast: No class identifier found";
4672             #
4673             # Otherwise the break will be at the previous '=' since the || and
4674             # = have the same starting strength and the or is biased, like
4675             # this:
4676             #
4677             # my $class =
4678             # shift || die "Cannot add broadcast: No class identifier found";
4679             #
4680             # In any case if the user places a break at either the = or the ||
4681             # it should remain there.
4682 19647 100 100     58764 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
      100        
4683              
4684             # /^(die|confess|croak|warn)$/
4685 89 100       437 if ( $is_die_confess_croak_warn{$next_nonblank_token} ) {
4686 4 50 33     25 if ( $want_break_before{$token} && $i > 0 ) {
4687 4         12 $rbond_strength_to_go->[ $i - 1 ] -= $delta_bias;
4688              
4689             # keep bond strength of a token and its following blank
4690             # the same
4691 4 100 66     27 if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
4692 1         5 $rbond_strength_to_go->[ $i - 2 ] -= $delta_bias;
4693             }
4694             }
4695             else {
4696 0         0 $bond_str -= $delta_bias;
4697             }
4698             }
4699             }
4700              
4701             # good to break after end of code blocks
4702 19647 100 100     42543 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
      100        
4703              
4704 194         449 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
4705 194         430 $code_bias += $delta_bias;
4706             }
4707              
4708 19647 100       33796 if ( $type eq 'k' ) {
4709              
4710             # allow certain control keywords to stand out
4711 1226 100 100     3359 if ( $next_nonblank_type eq 'k'
4712             && $is_last_next_redo_return{$token} )
4713             {
4714 5         18 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
4715             }
4716              
4717             # Don't break after keyword my. This is a quick fix for a
4718             # rare problem with perl. An example is this line from file
4719             # Container.pm:
4720              
4721             # foreach my $question( Debian::DebConf::ConfigDb::gettree(
4722             # $this->{'question'} ) )
4723              
4724 1226 100       2848 if ( $token eq 'my' ) {
4725 233         543 $bond_str = NO_BREAK;
4726             }
4727              
4728             }
4729              
4730 19647 100 100     49585 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
    100          
4731              
4732 728 100       2598 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
4733 65 50       239 $bond_str = $list_str if ( $bond_str > $list_str );
4734             }
4735              
4736             # keywords like 'unless', 'if', etc, within statements
4737             # make good breaks
4738 728 100       1899 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
4739 19         59 $bond_str = VERY_WEAK / 1.05;
4740             }
4741             }
4742              
4743             # try not to break before a comma-arrow
4744             elsif ( $next_nonblank_type eq '=>' ) {
4745 890 100       2576 if ( $bond_str < STRONG ) { $bond_str = STRONG }
  185         342  
4746             }
4747              
4748             #---------------------------------------------------------------
4749             # Additional hardwired NOBREAK rules
4750             #---------------------------------------------------------------
4751              
4752             # map1.t -- correct for a quirk in perl
4753 19647 50 100     42179 if ( $token eq '('
      100        
      66        
4754             && $next_nonblank_type eq 'i'
4755             && $last_nonblank_type eq 'k'
4756             && $is_sort_map_grep{$last_nonblank_token} )
4757              
4758             # /^(sort|map|grep)$/ )
4759             {
4760 0         0 $bond_str = NO_BREAK;
4761             }
4762              
4763             # extrude.t: do not break before paren at:
4764             # -l pid_filename(
4765 19647 100 100     37518 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
4766 2         18 $bond_str = NO_BREAK;
4767             }
4768              
4769             # OLD COMMENT: In older version of perl, use strict can cause
4770             # problems with breaks before bare words following opening parens.
4771             # For example, this will fail under older versions if a break is
4772             # made between '(' and 'MAIL':
4773              
4774             # use strict; open( MAIL, "a long filename or command"); close MAIL;
4775              
4776             # NEW COMMENT: Third fix for b1213:
4777             # This option does not seem to be needed any longer, and it can
4778             # cause instabilities. It can be turned off, but to minimize
4779             # changes to existing formatting it is retained only in the case
4780             # where the previous token was 'open' and there was no line break.
4781             # Even this could eventually be removed if it causes instability.
4782 19647 100       39419 if ( $type eq '{' ) {
    100          
4783              
4784 2364 50 100     9000 if ( $token eq '('
      100        
      66        
      33        
4785             && $next_nonblank_type eq 'w'
4786             && $last_nonblank_type eq 'k'
4787             && $last_nonblank_token eq 'open'
4788             && !$old_breakpoint_to_go[$i] )
4789             {
4790 0         0 $bond_str = NO_BREAK;
4791             }
4792             }
4793              
4794             # Do not break between a possible filehandle and a ? or / and do
4795             # not introduce a break after it if there is no blank
4796             # (extrude.t)
4797             elsif ( $type eq 'Z' ) {
4798              
4799             # don't break..
4800 3 100 66     44 if (
      66        
      33        
      66        
4801              
4802             # if there is no blank and we do not want one. Examples:
4803             # print $x++ # do not break after $x
4804             # print HTML"HELLO" # break ok after HTML
4805             (
4806             $next_type ne 'b'
4807             && defined( $want_left_space{$next_type} )
4808             && $want_left_space{$next_type} == WS_NO
4809             )
4810              
4811             # or we might be followed by the start of a quote,
4812             # and this is not an existing breakpoint; fixes c039.
4813             || !$old_breakpoint_to_go[$i]
4814             && substr( $next_nonblank_token, 0, 1 ) eq '/'
4815              
4816             )
4817             {
4818 2         5 $bond_str = NO_BREAK;
4819             }
4820             }
4821              
4822             # Breaking before a ? before a quote can cause trouble if
4823             # they are not separated by a blank.
4824             # Example: a syntax error occurs if you break before the ? here
4825             # my$logic=join$all?' && ':' || ',@regexps;
4826             # From: Professional_Perl_Programming_Code/multifind.pl
4827 19647 100       45725 if ( $next_nonblank_type eq '?' ) {
    100          
    100          
4828 125 100       740 $bond_str = NO_BREAK
4829             if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
4830             }
4831              
4832             # Breaking before a . followed by a number
4833             # can cause trouble if there is no intervening space
4834             # Example: a syntax error occurs if you break before the .2 here
4835             # $str .= pack($endian.2, ensurrogate($ord));
4836             # From: perl58/Unicode.pm
4837             elsif ( $next_nonblank_type eq '.' ) {
4838 116 50       442 $bond_str = NO_BREAK
4839             if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
4840             }
4841              
4842             # Fix for c039
4843             elsif ( $type eq 'w' ) {
4844 995 50 66     5449 $bond_str = NO_BREAK
      33        
4845             if ( !$old_breakpoint_to_go[$i]
4846             && substr( $next_nonblank_token, 0, 1 ) eq '/'
4847             && $next_nonblank_type ne '//' );
4848             }
4849              
4850 19647         24185 $bond_str_2 = $bond_str if (DEBUG_BOND);
4851              
4852             #---------------------------------------------------------------
4853             # End of hardwired rules
4854             #---------------------------------------------------------------
4855              
4856             #---------------------------------------------------------------
4857             # Bond Strength Section 3:
4858             # Apply table rules. These have priority over the above
4859             # hardwired rules.
4860             #---------------------------------------------------------------
4861              
4862 19647         24704 my $tabulated_bond_str;
4863 19647         26364 my $ltype = $type;
4864 19647         26172 my $rtype = $next_nonblank_type;
4865 19647 100 100     40718 if ( $seqno && $is_container_token{$token} ) {
4866 4947         8519 $ltype = $type . $token;
4867             }
4868              
4869 19647 100 100     41271 if ( $next_nonblank_seqno
4870             && $is_container_token{$next_nonblank_token} )
4871             {
4872 4830         8336 $rtype = $next_nonblank_type . $next_nonblank_token;
4873              
4874             # Alternate Fix #1 for issue b1299. This version makes the
4875             # decision as soon as possible. See Alternate Fix #2 also.
4876             # Do not separate a bareword identifier from its paren: b1299
4877             # This is currently needed for stability because if the bareword
4878             # gets separated from a preceding '->' and following '(' then
4879             # the tokenizer may switch from type 'i' to type 'w'. This
4880             # patch will prevent this by keeping it adjacent to its '('.
4881             ## if ( $next_nonblank_token eq '('
4882             ## && $ltype eq 'i'
4883             ## && substr( $token, 0, 1 ) =~ /^\w$/ )
4884             ## {
4885             ## $ltype = 'w';
4886             ## }
4887             }
4888              
4889             # apply binary rules which apply regardless of space between tokens
4890 19647 100       46725 if ( $binary_bond_strength{$ltype}{$rtype} ) {
4891 1654         3244 $bond_str = $binary_bond_strength{$ltype}{$rtype};
4892 1654         2569 $tabulated_bond_str = $bond_str;
4893             }
4894              
4895             # apply binary rules which apply only if no space between tokens
4896 19647 100       39457 if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
4897 255         476 $bond_str = $binary_bond_strength{$ltype}{$next_type};
4898 255         444 $tabulated_bond_str = $bond_str;
4899             }
4900              
4901 19647 100 100     57793 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
4902 49         165 $bond_str = NO_BREAK;
4903 49         194 $tabulated_bond_str = $bond_str;
4904             }
4905              
4906 19647         24880 $bond_str_3 = $bond_str if (DEBUG_BOND);
4907              
4908             # If the hardwired rules conflict with the tabulated bond
4909             # strength then there is an inconsistency that should be fixed
4910             DEBUG_BOND
4911             && $tabulated_bond_str
4912             && $bond_str_1
4913             && $bond_str_1 != $bond_str_2
4914             && $bond_str_2 != $tabulated_bond_str
4915 19647         23971 && do {
4916             print STDERR
4917             "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
4918             };
4919              
4920             #-----------------------------------------------------------------
4921             # Bond Strength Section 4:
4922             # Modify strengths of certain tokens which often occur in sequence
4923             # by adding a small bias to each one in turn so that the breaks
4924             # occur from left to right.
4925             #
4926             # Note that we only changing strengths by small amounts here,
4927             # and usually increasing, so we should not be altering any NO_BREAKs.
4928             # Other routines which check for NO_BREAKs will use a tolerance
4929             # of one to avoid any problem.
4930             #-----------------------------------------------------------------
4931              
4932             # The bias tables use special keys:
4933             # $type - if not keyword
4934             # $token - if keyword, but map some keywords together
4935 19647 50       35123 my $left_key =
    100          
4936             $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
4937 19647 50       32901 my $right_key =
    100          
4938             $next_nonblank_type eq 'k'
4939             ? $next_nonblank_token eq 'err'
4940             ? 'or'
4941             : $next_nonblank_token
4942             : $next_nonblank_type;
4943              
4944             # bias left token
4945 19647 100       35893 if ( defined( $bias{$left_key} ) ) {
4946 452 100       1610 if ( !$want_break_before{$left_key} ) {
4947 30         71 $bias{$left_key} += $delta_bias;
4948 30         57 $bond_str += $bias{$left_key};
4949             }
4950             }
4951              
4952             # bias right token
4953 19647 100       35869 if ( defined( $bias{$right_key} ) ) {
4954 451 100       1744 if ( $want_break_before{$right_key} ) {
4955              
4956             # for leading '.' align all but 'short' quotes; the idea
4957             # is to not place something like "\n" on a single line.
4958 421 100       1087 if ( $right_key eq '.' ) {
4959 115 100 100     712 unless (
      66        
4960             $last_nonblank_type eq '.'
4961             && ( $token_length <=
4962             $rOpts_short_concatenation_item_length )
4963             && ( !$is_closing_token{$token} )
4964             )
4965             {
4966 75         167 $bias{$right_key} += $delta_bias;
4967             }
4968             }
4969             else {
4970 306         643 $bias{$right_key} += $delta_bias;
4971             }
4972 421         797 $bond_str += $bias{$right_key};
4973             }
4974             }
4975              
4976 19647         24662 $bond_str_4 = $bond_str if (DEBUG_BOND);
4977              
4978             #---------------------------------------------------------------
4979             # Bond Strength Section 5:
4980             # Fifth Approximation.
4981             # Take nesting depth into account by adding the nesting depth
4982             # to the bond strength.
4983             #---------------------------------------------------------------
4984 19647         25031 my $strength;
4985              
4986 19647 100 100     53738 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
4987 18241 100       29862 if ( $total_nesting_depth > 0 ) {
4988 14640         22585 $strength = $bond_str + $total_nesting_depth;
4989             }
4990             else {
4991 3601         5394 $strength = $bond_str;
4992             }
4993             }
4994             else {
4995 1406         2298 $strength = NO_BREAK;
4996              
4997             # For critical code such as lines with here targets we must
4998             # be absolutely sure that we do not allow a break. So for
4999             # these the nobreak flag exceeds 1 as a signal. Otherwise we
5000             # can run into trouble when small tolerances are added.
5001 1406 100 100     4770 $strength += 1
5002             if ( $nobreak_to_go[$i] && $nobreak_to_go[$i] > 1 );
5003             }
5004              
5005             #---------------------------------------------------------------
5006             # Bond Strength Section 6:
5007             # Sixth Approximation. Welds.
5008             #---------------------------------------------------------------
5009              
5010             # Do not allow a break within welds
5011 19647 100 100     38114 if ( $total_weld_count && $seqno ) {
5012 383         694 my $KK = $K_to_go[$i];
5013 383 100 66     1596 if ( $rK_weld_right->{$KK} ) {
    100          
5014 68         170 $strength = NO_BREAK;
5015             }
5016              
5017             # But encourage breaking after opening welded tokens
5018             elsif ($rK_weld_left->{$KK}
5019             && $is_opening_token{$token} )
5020             {
5021 27         88 $strength -= 1;
5022             }
5023             }
5024              
5025             # always break after side comment
5026 19647 100       35110 if ( $type eq '#' ) { $strength = 0 }
  50         144  
5027              
5028 19647         38129 $rbond_strength_to_go->[$i] = $strength;
5029              
5030             # Fix for case c001: be sure NO_BREAK's are enforced by later
5031             # routines, except at a '?' because '?' as quote delimiter is
5032             # deprecated.
5033 19647 100 100     43117 if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
5034 2745   100     7125 $nobreak_to_go[$i] ||= 1;
5035             }
5036              
5037 19647         32142 DEBUG_BOND && do {
5038             my $str = substr( $token, 0, 15 );
5039             $str .= SPACE x ( 16 - length($str) );
5040             print STDOUT
5041             "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
5042              
5043             # reset for next pass
5044             $bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
5045             };
5046              
5047             } ## end main loop
5048 1110         4639 return $rbond_strength_to_go;
5049             } ## end sub set_bond_strengths
5050             } ## end closure set_bond_strengths
5051              
5052             sub bad_pattern {
5053              
5054             # See if a pattern will compile. We have to use a string eval here,
5055             # but it should be safe because the pattern has been constructed
5056             # by this program.
5057 1112     1112 0 2677 my ($pattern) = @_;
5058 1112         109463 my $ok = eval "'##'=~/$pattern/";
5059 1112   33     9555 return !defined($ok) || $EVAL_ERROR;
5060             } ## end sub bad_pattern
5061              
5062             { ## begin closure prepare_cuddled_block_types
5063              
5064             my %no_cuddle;
5065              
5066             # Add keywords here which really should not be cuddled
5067             BEGIN {
5068 38     38   316 my @q = qw(if unless for foreach while);
5069 38         19982 @no_cuddle{@q} = (1) x scalar(@q);
5070             }
5071              
5072             sub prepare_cuddled_block_types {
5073              
5074             # the cuddled-else style, if used, is controlled by a hash that
5075             # we construct here
5076              
5077             # Include keywords here which should not be cuddled
5078              
5079 554     554 0 1507 my $cuddled_string = EMPTY_STRING;
5080 554 100       2321 if ( $rOpts->{'cuddled-else'} ) {
5081              
5082             # set the default
5083             $cuddled_string = 'elsif else continue catch finally'
5084 12 50       72 unless ( $rOpts->{'cuddled-block-list-exclusive'} );
5085              
5086             # This is the old equivalent but more complex version
5087             # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
5088              
5089             # Add users other blocks to be cuddled
5090 12         36 my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
5091 12 100       51 if ($cuddled_block_list) {
5092 2         10 $cuddled_string .= SPACE . $cuddled_block_list;
5093             }
5094              
5095             }
5096              
5097             # If we have a cuddled string of the form
5098             # 'try-catch-finally'
5099              
5100             # we want to prepare a hash of the form
5101              
5102             # $rcuddled_block_types = {
5103             # 'try' => {
5104             # 'catch' => 1,
5105             # 'finally' => 1
5106             # },
5107             # };
5108              
5109             # use -dcbl to dump this hash
5110              
5111             # Multiple such strings are input as a space or comma separated list
5112              
5113             # If we get two lists with the same leading type, such as
5114             # -cbl = "-try-catch-finally -try-catch-otherwise"
5115             # then they will get merged as follows:
5116             # $rcuddled_block_types = {
5117             # 'try' => {
5118             # 'catch' => 1,
5119             # 'finally' => 2,
5120             # 'otherwise' => 1,
5121             # },
5122             # };
5123             # This will allow either type of chain to be followed.
5124              
5125 554         1686 $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
5126 554         1872 my @cuddled_strings = split /\s+/, $cuddled_string;
5127              
5128 554         1925 $rcuddled_block_types = {};
5129              
5130             # process each dash-separated string...
5131 554         1620 my $string_count = 0;
5132 554         1825 foreach my $string (@cuddled_strings) {
5133 66 50       153 next unless $string;
5134 66         163 my @words = split /-+/, $string; # allow multiple dashes
5135              
5136             # we could look for and report possible errors here...
5137 66 50       152 next unless ( @words > 0 );
5138              
5139             # allow either '-continue' or *-continue' for arbitrary starting type
5140 66         117 my $start = '*';
5141              
5142             # a single word without dashes is a secondary block type
5143 66 50       147 if ( @words > 1 ) {
5144 0         0 $start = shift @words;
5145             }
5146              
5147             # always make an entry for the leading word. If none follow, this
5148             # will still prevent a wildcard from matching this word.
5149 66 100       154 if ( !defined( $rcuddled_block_types->{$start} ) ) {
5150 12         53 $rcuddled_block_types->{$start} = {};
5151             }
5152              
5153             # The count gives the original word order in case we ever want it.
5154 66         109 $string_count++;
5155 66         103 my $word_count = 0;
5156 66         136 foreach my $word (@words) {
5157 66 50       145 next unless $word;
5158 66 50       173 if ( $no_cuddle{$word} ) {
5159 0         0 Warn(
5160             "## Ignoring keyword '$word' in -cbl; does not seem right\n"
5161             );
5162 0         0 next;
5163             }
5164 66         98 $word_count++;
5165 66         181 $rcuddled_block_types->{$start}->{$word} =
5166             1; #"$string_count.$word_count";
5167              
5168             # git#9: Remove this word from the list of desired one-line
5169             # blocks
5170 66         179 $want_one_line_block{$word} = 0;
5171             }
5172             }
5173 554         1335 return;
5174             } ## end sub prepare_cuddled_block_types
5175             } ## end closure prepare_cuddled_block_types
5176              
5177             sub dump_cuddled_block_list {
5178 0     0 0 0 my ($fh) = @_;
5179              
5180             # ORIGINAL METHOD: Here is the format of the cuddled block type hash
5181             # which controls this routine
5182             # my $rcuddled_block_types = {
5183             # 'if' => {
5184             # 'else' => 1,
5185             # 'elsif' => 1
5186             # },
5187             # 'try' => {
5188             # 'catch' => 1,
5189             # 'finally' => 1
5190             # },
5191             # };
5192              
5193             # SIMPLIFIED METHOD: the simplified method uses a wildcard for
5194             # the starting block type and puts all cuddled blocks together:
5195             # my $rcuddled_block_types = {
5196             # '*' => {
5197             # 'else' => 1,
5198             # 'elsif' => 1
5199             # 'catch' => 1,
5200             # 'finally' => 1
5201             # },
5202             # };
5203              
5204             # Both methods work, but the simplified method has proven to be adequate and
5205             # easier to manage.
5206              
5207 0         0 my $cuddled_string = $rOpts->{'cuddled-block-list'};
5208 0 0       0 $cuddled_string = EMPTY_STRING unless $cuddled_string;
5209              
5210 0         0 my $flags = EMPTY_STRING;
5211 0 0       0 $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
5212 0         0 $flags .= " -cbl='$cuddled_string'";
5213              
5214 0 0       0 unless ( $rOpts->{'cuddled-else'} ) {
5215 0         0 $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
5216             }
5217              
5218 0         0 $fh->print(<<EOM);
5219             ------------------------------------------------------------------------
5220             Hash of cuddled block types prepared for a run with these parameters:
5221             $flags
5222             ------------------------------------------------------------------------
5223             EOM
5224              
5225 38     38   28426 use Data::Dumper;
  38         272525  
  38         174100  
5226 0         0 $fh->print( Dumper($rcuddled_block_types) );
5227              
5228 0         0 $fh->print(<<EOM);
5229             ------------------------------------------------------------------------
5230             EOM
5231 0         0 return;
5232             } ## end sub dump_cuddled_block_list
5233              
5234             sub make_static_block_comment_pattern {
5235              
5236             # create the pattern used to identify static block comments
5237 554     554 0 1760 $static_block_comment_pattern = '^\s*##';
5238              
5239             # allow the user to change it
5240 554 100       2668 if ( $rOpts->{'static-block-comment-prefix'} ) {
5241 1         3 my $prefix = $rOpts->{'static-block-comment-prefix'};
5242 1         6 $prefix =~ s/^\s*//;
5243 1         3 my $pattern = $prefix;
5244              
5245             # user may give leading caret to force matching left comments only
5246 1 50       7 if ( $prefix !~ /^\^#/ ) {
5247 1 50       4 if ( $prefix !~ /^#/ ) {
5248 0         0 Die(
5249             "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
5250             );
5251             }
5252 1         3 $pattern = '^\s*' . $prefix;
5253             }
5254 1 50       9 if ( bad_pattern($pattern) ) {
5255 0         0 Die(
5256             "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
5257             );
5258             }
5259 1         6 $static_block_comment_pattern = $pattern;
5260             }
5261 554         1212 return;
5262             } ## end sub make_static_block_comment_pattern
5263              
5264             sub make_format_skipping_pattern {
5265 1108     1108 0 3609 my ( $opt_name, $default ) = @_;
5266 1108         2859 my $param = $rOpts->{$opt_name};
5267 1108 100       3093 unless ($param) { $param = $default }
  1106         2228  
5268 1108         4554 $param =~ s/^\s*//;
5269 1108 50       5257 if ( $param !~ /^#/ ) {
5270 0         0 Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
5271             }
5272 1108         3409 my $pattern = '^' . $param . '\s';
5273 1108 50       3721 if ( bad_pattern($pattern) ) {
5274 0         0 Die(
5275             "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
5276             );
5277             }
5278 1108         4189 return $pattern;
5279             } ## end sub make_format_skipping_pattern
5280              
5281             sub make_non_indenting_brace_pattern {
5282              
5283             # Create the pattern used to identify static side comments.
5284             # Note that we are ending the pattern in a \s. This will allow
5285             # the pattern to be followed by a space and some text, or a newline.
5286             # The pattern is used in sub 'non_indenting_braces'
5287 554     554 0 1729 $non_indenting_brace_pattern = '^#<<<\s';
5288              
5289             # allow the user to change it
5290 554 100       2630 if ( $rOpts->{'non-indenting-brace-prefix'} ) {
5291 1         3 my $prefix = $rOpts->{'non-indenting-brace-prefix'};
5292 1         5 $prefix =~ s/^\s*//;
5293 1 50       7 if ( $prefix !~ /^#/ ) {
5294 0         0 Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
5295             }
5296 1         5 my $pattern = '^' . $prefix . '\s';
5297 1 50       5 if ( bad_pattern($pattern) ) {
5298 0         0 Die(
5299             "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
5300             );
5301             }
5302 1         3 $non_indenting_brace_pattern = $pattern;
5303             }
5304 554         1224 return;
5305             } ## end sub make_non_indenting_brace_pattern
5306              
5307             sub make_closing_side_comment_list_pattern {
5308              
5309             # turn any input list into a regex for recognizing selected block types
5310 554     554 0 1590 $closing_side_comment_list_pattern = '^\w+';
5311 554 50 66     2351 if ( defined( $rOpts->{'closing-side-comment-list'} )
5312             && $rOpts->{'closing-side-comment-list'} )
5313             {
5314             $closing_side_comment_list_pattern =
5315 1         5 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
5316             }
5317 554         1054 return;
5318             } ## end sub make_closing_side_comment_list_pattern
5319              
5320             sub make_sub_matching_pattern {
5321              
5322             # Patterns for standardizing matches to block types for regular subs and
5323             # anonymous subs. Examples
5324             # 'sub process' is a named sub
5325             # 'sub ::m' is a named sub
5326             # 'sub' is an anonymous sub
5327             # 'sub:' is a label, not a sub
5328             # 'sub :' is a label, not a sub ( block type will be <sub:> )
5329             # sub'_ is a named sub ( block type will be <sub '_> )
5330             # 'substr' is a keyword
5331             # So note that named subs always have a space after 'sub'
5332 554     554 0 2187 $SUB_PATTERN = '^sub\s'; # match normal sub
5333 554         1484 $ASUB_PATTERN = '^sub$'; # match anonymous sub
5334              
5335             # Note (see also RT #133130): These patterns are used by
5336             # sub make_block_pattern, which is used for making most patterns.
5337             # So this sub needs to be called before other pattern-making routines.
5338              
5339 554 50       2351 if ( $rOpts->{'sub-alias-list'} ) {
5340              
5341             # Note that any 'sub-alias-list' has been preprocessed to
5342             # be a trimmed, space-separated list which includes 'sub'
5343             # for example, it might be 'sub method fun'
5344 554         1598 my $sub_alias_list = $rOpts->{'sub-alias-list'};
5345 554         3456 $sub_alias_list =~ s/\s+/\|/g;
5346 554         5263 $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
5347 554         3510 $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
5348             }
5349 554         1420 return;
5350             } ## end sub make_sub_matching_pattern
5351              
5352             sub make_bl_pattern {
5353              
5354             # Set defaults lists to retain historical default behavior for -bl:
5355 554     554 0 1610 my $bl_list_string = '*';
5356 554         1450 my $bl_exclusion_list_string = 'sort map grep eval asub';
5357              
5358 554 50 66     2272 if ( defined( $rOpts->{'brace-left-list'} )
5359             && $rOpts->{'brace-left-list'} )
5360             {
5361 1         4 $bl_list_string = $rOpts->{'brace-left-list'};
5362             }
5363 554 100       2260 if ( $bl_list_string =~ /\bsub\b/ ) {
5364             $rOpts->{'opening-sub-brace-on-new-line'} ||=
5365 1   33     9 $rOpts->{'opening-brace-on-new-line'};
5366             }
5367 554 100       2168 if ( $bl_list_string =~ /\basub\b/ ) {
5368             $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
5369 1   33     7 $rOpts->{'opening-brace-on-new-line'};
5370             }
5371              
5372 554         1587 $bl_pattern = make_block_pattern( '-bll', $bl_list_string );
5373              
5374             # for -bl, a list with '*' turns on -sbl and -asbl
5375 554 100       3761 if ( $bl_pattern =~ /\.\*/ ) {
5376             $rOpts->{'opening-sub-brace-on-new-line'} ||=
5377 553   100     4271 $rOpts->{'opening-brace-on-new-line'};
5378             $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
5379 553   66     3701 $rOpts->{'opening-anonymous-brace-on-new-line'};
5380             }
5381              
5382 554 50 66     2428 if ( defined( $rOpts->{'brace-left-exclusion-list'} )
5383             && $rOpts->{'brace-left-exclusion-list'} )
5384             {
5385 1         5 $bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'};
5386 1 50       6 if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
5387 0         0 $rOpts->{'opening-sub-brace-on-new-line'} = 0;
5388             }
5389 1 50       4 if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
5390 0         0 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0;
5391             }
5392             }
5393              
5394             $bl_exclusion_pattern =
5395 554         1805 make_block_pattern( '-blxl', $bl_exclusion_list_string );
5396 554         2045 return;
5397             } ## end sub make_bl_pattern
5398              
5399             sub make_bli_pattern {
5400              
5401             # default list of block types for which -bli would apply
5402 554     554 0 1516 my $bli_list_string = 'if else elsif unless while for foreach do : sub';
5403 554         1682 my $bli_exclusion_list_string = SPACE;
5404              
5405 554 50 66     2355 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
5406             && $rOpts->{'brace-left-and-indent-list'} )
5407             {
5408 3         13 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
5409             }
5410              
5411 554         2567 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
5412              
5413 554 50 66     2708 if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
5414             && $rOpts->{'brace-left-and-indent-exclusion-list'} )
5415             {
5416             $bli_exclusion_list_string =
5417 1         3 $rOpts->{'brace-left-and-indent-exclusion-list'};
5418             }
5419             $bli_exclusion_pattern =
5420 554         1854 make_block_pattern( '-blixl', $bli_exclusion_list_string );
5421 554         1862 return;
5422             } ## end sub make_bli_pattern
5423              
5424             sub make_keyword_group_list_pattern {
5425              
5426             # turn any input list into a regex for recognizing selected block types.
5427             # Here are the defaults:
5428 554     554 0 1468 $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
5429 554         1534 $keyword_group_list_comment_pattern = EMPTY_STRING;
5430 554 0 33     2237 if ( defined( $rOpts->{'keyword-group-blanks-list'} )
5431             && $rOpts->{'keyword-group-blanks-list'} )
5432             {
5433 0         0 my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
5434 0         0 my @keyword_list;
5435             my @comment_list;
5436 0         0 foreach my $word (@words) {
5437 0 0 0     0 if ( $word eq 'BC' || $word eq 'SBC' ) {
5438 0         0 push @comment_list, $word;
5439 0 0       0 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
  0         0  
5440             }
5441             else {
5442 0         0 push @keyword_list, $word;
5443             }
5444             }
5445             $keyword_group_list_pattern =
5446 0         0 make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
5447 0         0 $keyword_group_list_comment_pattern =
5448             make_block_pattern( '-kgbl', join( SPACE, @comment_list ) );
5449             }
5450 554         1108 return;
5451             } ## end sub make_keyword_group_list_pattern
5452              
5453             sub make_block_brace_vertical_tightness_pattern {
5454              
5455             # turn any input list into a regex for recognizing selected block types
5456 554     554 0 1690 $block_brace_vertical_tightness_pattern =
5457             '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
5458 554 0 33     2381 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
5459             && $rOpts->{'block-brace-vertical-tightness-list'} )
5460             {
5461             $block_brace_vertical_tightness_pattern =
5462             make_block_pattern( '-bbvtl',
5463 0         0 $rOpts->{'block-brace-vertical-tightness-list'} );
5464             }
5465 554         1175 return;
5466             } ## end sub make_block_brace_vertical_tightness_pattern
5467              
5468             sub make_blank_line_pattern {
5469              
5470 554     554 0 1600 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
5471 554         1383 my $key = 'blank-lines-before-closing-block-list';
5472 554 50 66     2115 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
5473             $blank_lines_before_closing_block_pattern =
5474 1         6 make_block_pattern( '-blbcl', $rOpts->{$key} );
5475             }
5476              
5477 554         1299 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
5478 554         1232 $key = 'blank-lines-after-opening-block-list';
5479 554 50 66     2217 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
5480             $blank_lines_after_opening_block_pattern =
5481 1         4 make_block_pattern( '-blaol', $rOpts->{$key} );
5482             }
5483 554         1102 return;
5484             } ## end sub make_blank_line_pattern
5485              
5486             sub make_block_pattern {
5487              
5488             # given a string of block-type keywords, return a regex to match them
5489             # The only tricky part is that labels are indicated with a single ':'
5490             # and the 'sub' token text may have additional text after it (name of
5491             # sub).
5492             #
5493             # Example:
5494             #
5495             # input string: "if else elsif unless while for foreach do : sub";
5496             # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
5497              
5498             # Minor Update:
5499             #
5500             # To distinguish between anonymous subs and named subs, use 'sub' to
5501             # indicate a named sub, and 'asub' to indicate an anonymous sub
5502              
5503 2219     2219 0 5378 my ( $abbrev, $string ) = @_;
5504 2219         5120 my @list = split_words($string);
5505 2219         4350 my @words = ();
5506 2219         3556 my %seen;
5507 2219         4802 for my $i (@list) {
5508 8838 100       16473 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
  557         2012  
  557         2710  
5509 8281 50       15843 next if $seen{$i};
5510 8281         16059 $seen{$i} = 1;
5511 8281 100       34448 if ( $i eq 'sub' ) {
    100          
    50          
    50          
    100          
    50          
5512             }
5513             elsif ( $i eq 'asub' ) {
5514             }
5515             elsif ( $i eq ';' ) {
5516 0         0 push @words, ';';
5517             }
5518             elsif ( $i eq '{' ) {
5519 0         0 push @words, '\{';
5520             }
5521             elsif ( $i eq ':' ) {
5522 551         2862 push @words, '\w+:';
5523             }
5524             elsif ( $i =~ /^\w/ ) {
5525 6623         12439 push @words, $i;
5526             }
5527             else {
5528 0         0 Warn("unrecognized block type $i after $abbrev, ignoring\n");
5529             }
5530             }
5531              
5532             # Fix 2 for c091, prevent the pattern from matching an empty string
5533             # '1 ' is an impossible block name.
5534 1662 100       5392 if ( !@words ) { push @words, "1 " }
  555         2197  
5535              
5536 1662         6527 my $pattern = '(' . join( '|', @words ) . ')$';
5537 1662         3215 my $sub_patterns = EMPTY_STRING;
5538 1662 100       4305 if ( $seen{'sub'} ) {
5539 553         1772 $sub_patterns .= '|' . $SUB_PATTERN;
5540             }
5541 1662 100       4599 if ( $seen{'asub'} ) {
5542 554         2556 $sub_patterns .= '|' . $ASUB_PATTERN;
5543             }
5544 1662 100       3984 if ($sub_patterns) {
5545 1106         3398 $pattern = '(' . $pattern . $sub_patterns . ')';
5546             }
5547 1662         3898 $pattern = '^' . $pattern;
5548 1662         7298 return $pattern;
5549             } ## end sub make_block_pattern
5550              
5551             sub make_static_side_comment_pattern {
5552              
5553             # create the pattern used to identify static side comments
5554 554     554 0 1494 $static_side_comment_pattern = '^##';
5555              
5556             # allow the user to change it
5557 554 50       2446 if ( $rOpts->{'static-side-comment-prefix'} ) {
5558 0         0 my $prefix = $rOpts->{'static-side-comment-prefix'};
5559 0         0 $prefix =~ s/^\s*//;
5560 0         0 my $pattern = '^' . $prefix;
5561 0 0       0 if ( bad_pattern($pattern) ) {
5562 0         0 Die(
5563             "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
5564             );
5565             }
5566 0         0 $static_side_comment_pattern = $pattern;
5567             }
5568 554         1107 return;
5569             } ## end sub make_static_side_comment_pattern
5570              
5571             sub make_closing_side_comment_prefix {
5572              
5573             # Be sure we have a valid closing side comment prefix
5574 554     554 0 1655 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
5575 554         1129 my $csc_prefix_pattern;
5576 554 100       2225 if ( !defined($csc_prefix) ) {
5577 552         1379 $csc_prefix = '## end';
5578 552         1389 $csc_prefix_pattern = '^##\s+end';
5579             }
5580             else {
5581 2         8 my $test_csc_prefix = $csc_prefix;
5582 2 50       15 if ( $test_csc_prefix !~ /^#/ ) {
5583 0         0 $test_csc_prefix = '#' . $test_csc_prefix;
5584             }
5585              
5586             # make a regex to recognize the prefix
5587 2         4 my $test_csc_prefix_pattern = $test_csc_prefix;
5588              
5589             # escape any special characters
5590 2         7 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
5591              
5592 2         5 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
5593              
5594             # allow exact number of intermediate spaces to vary
5595 2         11 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
5596              
5597             # make sure we have a good pattern
5598             # if we fail this we probably have an error in escaping
5599             # characters.
5600              
5601 2 50       11 if ( bad_pattern($test_csc_prefix_pattern) ) {
5602              
5603             # shouldn't happen..must have screwed up escaping, above
5604 0         0 if (DEVEL_MODE) {
5605             Fault(<<EOM);
5606             Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
5607             EOM
5608             }
5609              
5610             # just warn and keep going with defaults
5611             Warn(
5612 0         0 "Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
5613             );
5614 0         0 Warn("Please consider using a simpler -cscp prefix\n");
5615 0         0 Warn("Using default -cscp instead; please check output\n");
5616             }
5617             else {
5618 2         6 $csc_prefix = $test_csc_prefix;
5619 2         7 $csc_prefix_pattern = $test_csc_prefix_pattern;
5620             }
5621             }
5622 554         1767 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
5623 554         1497 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
5624 554         1251 return;
5625             } ## end sub make_closing_side_comment_prefix
5626              
5627             ##################################################
5628             # CODE SECTION 4: receive lines from the tokenizer
5629             ##################################################
5630              
5631             { ## begin closure write_line
5632              
5633             my $nesting_depth;
5634              
5635             # Variables used by sub check_sequence_numbers:
5636             my $last_seqno;
5637             my %saw_opening_seqno;
5638             my %saw_closing_seqno;
5639             my $initial_seqno;
5640              
5641             sub initialize_write_line {
5642              
5643 555     555 0 1355 $nesting_depth = undef;
5644              
5645 555         1394 $last_seqno = SEQ_ROOT;
5646 555         1273 %saw_opening_seqno = ();
5647 555         1285 %saw_closing_seqno = ();
5648              
5649 555         1138 return;
5650             } ## end sub initialize_write_line
5651              
5652             sub check_sequence_numbers {
5653              
5654             # Routine for checking sequence numbers. This only needs to be
5655             # done occasionally in DEVEL_MODE to be sure everything is working
5656             # correctly.
5657 0     0 0 0 my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
5658 0         0 my $jmax = @{$rtokens} - 1;
  0         0  
5659 0 0       0 return unless ( $jmax >= 0 );
5660 0         0 foreach my $j ( 0 .. $jmax ) {
5661 0         0 my $seqno = $rtype_sequence->[$j];
5662 0         0 my $token = $rtokens->[$j];
5663 0         0 my $type = $rtoken_type->[$j];
5664 0 0       0 $seqno = EMPTY_STRING unless ( defined($seqno) );
5665 0         0 my $err_msg =
5666             "Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
5667              
5668 0 0       0 if ( !$seqno ) {
5669              
5670             # Sequence numbers are generated for opening tokens, so every opening
5671             # token should be sequenced. Closing tokens will be unsequenced
5672             # if they do not have a matching opening token.
5673 0 0 0     0 if ( $is_opening_sequence_token{$token}
      0        
5674             && $type ne 'q'
5675             && $type ne 'Q' )
5676             {
5677 0         0 Fault(
5678             <<EOM
5679             $err_msg Unexpected opening token without sequence number
5680             EOM
5681             );
5682             }
5683             }
5684             else {
5685              
5686             # Save starting seqno to identify sequence method:
5687             # New method starts with 2 and has continuous numbering
5688             # Old method starts with >2 and may have gaps
5689 0 0       0 if ( !defined($initial_seqno) ) { $initial_seqno = $seqno }
  0         0  
5690              
5691 0 0       0 if ( $is_opening_sequence_token{$token} ) {
    0          
5692              
5693             # New method should have continuous numbering
5694 0 0 0     0 if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
5695 0         0 Fault(
5696             <<EOM
5697             $err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
5698             EOM
5699             );
5700             }
5701 0         0 $last_seqno = $seqno;
5702              
5703             # Numbers must be unique
5704 0 0       0 if ( $saw_opening_seqno{$seqno} ) {
5705 0         0 my $lno = $saw_opening_seqno{$seqno};
5706 0         0 Fault(
5707             <<EOM
5708             $err_msg Already saw an opening tokens at line $lno with this sequence number
5709             EOM
5710             );
5711             }
5712 0         0 $saw_opening_seqno{$seqno} = $input_line_no;
5713             }
5714              
5715             # only one closing item per seqno
5716             elsif ( $is_closing_sequence_token{$token} ) {
5717 0 0       0 if ( $saw_closing_seqno{$seqno} ) {
5718 0         0 my $lno = $saw_closing_seqno{$seqno};
5719 0         0 Fault(
5720             <<EOM
5721             $err_msg Already saw a closing token with this seqno at line $lno
5722             EOM
5723             );
5724             }
5725 0         0 $saw_closing_seqno{$seqno} = $input_line_no;
5726              
5727             # Every closing seqno must have an opening seqno
5728 0 0       0 if ( !$saw_opening_seqno{$seqno} ) {
5729 0         0 Fault(
5730             <<EOM
5731             $err_msg Saw a closing token but no opening token with this seqno
5732             EOM
5733             );
5734             }
5735             }
5736              
5737             # Sequenced items must be opening or closing
5738             else {
5739 0         0 Fault(
5740             <<EOM
5741             $err_msg Unexpected token type with a sequence number
5742             EOM
5743             );
5744             }
5745             }
5746             }
5747 0         0 return;
5748             } ## end sub check_sequence_numbers
5749              
5750             sub store_block_type {
5751 965     965 0 2545 my ( $self, $block_type, $seqno ) = @_;
5752              
5753 965 50       2494 return if ( !$block_type );
5754              
5755             # Save the type of a block in a hash using sequence number as key
5756 965         2565 $self->[_rblock_type_of_seqno_]->{$seqno} = $block_type;
5757              
5758             # and save named subs and anynymous subs in separate hashes so that
5759             # we only have to do the pattern tests once.
5760 965 100       8937 if ( $block_type =~ /$ASUB_PATTERN/ ) {
    100          
5761 173         667 $self->[_ris_asub_block_]->{$seqno} = 1;
5762             }
5763             elsif ( $block_type =~ /$SUB_PATTERN/ ) {
5764 116         353 $self->[_ris_sub_block_]->{$seqno} = 1;
5765             }
5766 965         2149 return;
5767             } ## end sub store_block_type
5768              
5769             sub write_line {
5770              
5771             # This routine receives lines one-by-one from the tokenizer and stores
5772             # them in a format suitable for further processing. After the last
5773             # line has been sent, the tokenizer will call sub 'finish_formatting'
5774             # to do the actual formatting.
5775              
5776 7647     7647 0 14721 my ( $self, $line_of_tokens_old ) = @_;
5777              
5778 7647         12949 my $rLL = $self->[_rLL_];
5779 7647         14441 my $line_of_tokens = {};
5780 7647         17637 foreach (
5781             qw(
5782             _curly_brace_depth
5783             _ending_in_quote
5784             _guessed_indentation_level
5785             _line_number
5786             _line_text
5787             _line_type
5788             _paren_depth
5789             _quote_character
5790             _square_bracket_depth
5791             _starting_in_quote
5792             )
5793             )
5794             {
5795 76470         154648 $line_of_tokens->{$_} = $line_of_tokens_old->{$_};
5796             }
5797              
5798 7647         13568 my $line_type = $line_of_tokens_old->{_line_type};
5799 7647         10809 my $tee_output;
5800              
5801 7647         12462 my $Klimit = $self->[_Klimit_];
5802 7647         11397 my $Kfirst;
5803              
5804             # Handle line of non-code
5805 7647 100       15706 if ( $line_type ne 'CODE' ) {
5806 169   66     942 $tee_output ||= $rOpts_tee_pod
      66        
5807             && substr( $line_type, 0, 3 ) eq 'POD';
5808              
5809 169         363 $line_of_tokens->{_level_0} = 0;
5810 169         324 $line_of_tokens->{_ci_level_0} = 0;
5811 169         354 $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
5812 169         305 $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
5813 169         335 $line_of_tokens->{_ended_in_blank_token} = undef;
5814              
5815             }
5816              
5817             # Handle line of code
5818             else {
5819              
5820 7478         12215 my $rtokens = $line_of_tokens_old->{_rtokens};
5821 7478         10287 my $jmax = @{$rtokens} - 1;
  7478         13163  
5822              
5823 7478 100       15128 if ( $jmax >= 0 ) {
5824              
5825 6672 100       14765 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
5826              
5827             #----------------------------
5828             # get the tokens on this line
5829             #----------------------------
5830 6672         19608 $self->write_line_inner_loop( $line_of_tokens_old,
5831             $line_of_tokens );
5832              
5833             # update Klimit for added tokens
5834 6672         8994 $Klimit = @{$rLL} - 1;
  6672         11296  
5835              
5836             } ## end if ( $jmax >= 0 )
5837             else {
5838              
5839             # blank line
5840 806         2302 $line_of_tokens->{_level_0} = 0;
5841 806         1741 $line_of_tokens->{_ci_level_0} = 0;
5842 806         1628 $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
5843 806         1597 $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
5844 806         1691 $line_of_tokens->{_ended_in_blank_token} = undef;
5845              
5846             }
5847              
5848 7478   66     38580 $tee_output ||=
      66        
5849             $rOpts_tee_block_comments
5850             && $jmax == 0
5851             && $rLL->[$Kfirst]->[_TYPE_] eq '#';
5852              
5853 7478   100     28058 $tee_output ||=
      100        
5854             $rOpts_tee_side_comments
5855             && defined($Kfirst)
5856             && $Klimit > $Kfirst
5857             && $rLL->[$Klimit]->[_TYPE_] eq '#';
5858              
5859             } ## end if ( $line_type eq 'CODE')
5860              
5861             # Finish storing line variables
5862 7647         29771 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
5863 7647         13752 $self->[_Klimit_] = $Klimit;
5864 7647         12604 my $rlines = $self->[_rlines_];
5865 7647         10923 push @{$rlines}, $line_of_tokens;
  7647         14345  
5866              
5867 7647 100       16275 if ($tee_output) {
5868 5         9 my $fh_tee = $self->[_fh_tee_];
5869 5         9 my $line_text = $line_of_tokens_old->{_line_text};
5870 5 50       20 $fh_tee->print($line_text) if ($fh_tee);
5871             }
5872              
5873 7647         66200 return;
5874             } ## end sub write_line
5875              
5876             sub write_line_inner_loop {
5877 6672     6672 0 12530 my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_;
5878              
5879             #---------------------------------------------------------------------
5880             # Copy the tokens on one line received from the tokenizer to their new
5881             # storage locations.
5882             #---------------------------------------------------------------------
5883              
5884             # Input parameters:
5885             # $line_of_tokens_old = line received from tokenizer
5886             # $line_of_tokens = line of tokens being formed for formatter
5887              
5888 6672         10600 my $rtokens = $line_of_tokens_old->{_rtokens};
5889 6672         9240 my $jmax = @{$rtokens} - 1;
  6672         10497  
5890 6672 50       15605 if ( $jmax < 0 ) {
5891              
5892             # safety check; shouldn't happen
5893 0         0 DEVEL_MODE && Fault("unexpected jmax=$jmax\n");
5894 0         0 return;
5895             }
5896              
5897 6672         11583 my $line_index = $line_of_tokens_old->{_line_number} - 1;
5898 6672         10377 my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
5899 6672         10077 my $rblock_type = $line_of_tokens_old->{_rblock_type};
5900 6672         10034 my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
5901 6672         9869 my $rlevels = $line_of_tokens_old->{_rlevels};
5902 6672         9834 my $rci_levels = $line_of_tokens_old->{_rci_levels};
5903              
5904 6672         9643 my $rLL = $self->[_rLL_];
5905 6672         10228 my $rSS = $self->[_rSS_];
5906 6672         9946 my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
5907              
5908 6672         9265 DEVEL_MODE
5909             && check_sequence_numbers( $rtokens, $rtoken_type,
5910             $rtype_sequence, $line_index + 1 );
5911              
5912             # Find the starting nesting depth ...
5913             # It must be the value of variable 'level' of the first token
5914             # because the nesting depth is used as a token tag in the
5915             # vertical aligner and is compared to actual levels.
5916             # So vertical alignment problems will occur with any other
5917             # starting value.
5918 6672 100       14743 if ( !defined($nesting_depth) ) {
5919 552         1553 $nesting_depth = $rlevels->[0];
5920 552 50       1846 $nesting_depth = 0 if ( $nesting_depth < 0 );
5921 552         1660 $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
5922             }
5923              
5924 6672         10079 my $j = -1;
5925              
5926             # NOTE: coding efficiency is critical in this loop over all tokens
5927 6672         9960 foreach my $token ( @{$rtokens} ) {
  6672         12245  
5928              
5929             # Do not clip the 'level' variable yet. We will do this
5930             # later, in sub 'store_token_to_go'. The reason is that in
5931             # files with level errors, the logic in 'weld_cuddled_else'
5932             # uses a stack logic that will give bad welds if we clip
5933             # levels here.
5934             ## $j++;
5935             ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
5936              
5937 51311         70652 my $seqno = EMPTY_STRING;
5938              
5939             # Handle tokens with sequence numbers ...
5940             # note the ++ increment hidden here for efficiency
5941 51311 100       94364 if ( $rtype_sequence->[ ++$j ] ) {
5942 9116         14532 $seqno = $rtype_sequence->[$j];
5943 9116         12906 my $sign = 1;
5944 9116 100       26015 if ( $is_opening_token{$token} ) {
    100          
    100          
    50          
5945 4371         6185 $self->[_K_opening_container_]->{$seqno} = @{$rLL};
  4371         15678  
5946 4371         9437 $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
5947 4371         6747 $nesting_depth++;
5948              
5949             # Save a sequenced block type at its opening token.
5950             # Note that unsequenced block types can occur in
5951             # unbalanced code with errors but are ignored here.
5952 4371 100       11766 $self->store_block_type( $rblock_type->[$j], $seqno )
5953             if ( $rblock_type->[$j] );
5954             }
5955             elsif ( $is_closing_token{$token} ) {
5956              
5957             # The opening depth should always be defined, and
5958             # it should equal $nesting_depth-1. To protect
5959             # against unforseen error conditions, however, we
5960             # will check this and fix things if necessary. For
5961             # a test case see issue c055.
5962 4371         8254 my $opening_depth = $rdepth_of_opening_seqno->[$seqno];
5963 4371 50       9351 if ( !defined($opening_depth) ) {
5964 0         0 $opening_depth = $nesting_depth - 1;
5965 0 0       0 $opening_depth = 0 if ( $opening_depth < 0 );
5966 0         0 $rdepth_of_opening_seqno->[$seqno] = $opening_depth;
5967              
5968             # This is not fatal but should not happen. The
5969             # tokenizer generates sequence numbers
5970             # incrementally upon encountering each new
5971             # opening token, so every positive sequence
5972             # number should correspond to an opening token.
5973 0         0 DEVEL_MODE && Fault(<<EOM);
5974             No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
5975             EOM
5976             }
5977 4371         6660 $self->[_K_closing_container_]->{$seqno} = @{$rLL};
  4371         10406  
5978 4371         7069 $nesting_depth = $opening_depth;
5979 4371         6908 $sign = -1;
5980             }
5981             elsif ( $token eq '?' ) {
5982 187         368 $self->[_K_opening_ternary_]->{$seqno} = @{$rLL};
  187         756  
5983             }
5984             elsif ( $token eq ':' ) {
5985 187         441 $sign = -1;
5986 187         377 $self->[_K_closing_ternary_]->{$seqno} = @{$rLL};
  187         485  
5987             }
5988              
5989             # The only sequenced types output by the tokenizer are
5990             # the opening & closing containers and the ternary
5991             # types. So we would only get here if the tokenizer has
5992             # been changed to mark some other tokens with sequence
5993             # numbers, or if an error has been introduced in a
5994             # hash such as %is_opening_container
5995             else {
5996 0         0 DEVEL_MODE && Fault(<<EOM);
5997             Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
5998             Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
5999             EOM
6000             }
6001              
6002 9116 100       16700 if ( $sign > 0 ) {
6003 4558         6299 $self->[_Iss_opening_]->[$seqno] = @{$rSS};
  4558         9222  
6004              
6005             # For efficiency, we find the maximum level of
6006             # opening tokens of any type. The actual maximum
6007             # level will be that of their contents which is 1
6008             # greater. That will be fixed in sub
6009             # 'finish_formatting'.
6010 4558         7805 my $level = $rlevels->[$j];
6011 4558 100       10895 if ( $level > $self->[_maximum_level_] ) {
6012 836         1966 $self->[_maximum_level_] = $level;
6013 836         1951 $self->[_maximum_level_at_line_] = $line_index + 1;
6014             }
6015             }
6016 4558         6222 else { $self->[_Iss_closing_]->[$seqno] = @{$rSS} }
  4558         9002  
6017 9116         12824 push @{$rSS}, $sign * $seqno;
  9116         17151  
6018              
6019             }
6020              
6021 51311         64981 my @tokary;
6022 51311         191932 @tokary[
6023              
6024             _TOKEN_,
6025             _TYPE_,
6026             _TYPE_SEQUENCE_,
6027             _LEVEL_,
6028             _CI_LEVEL_,
6029             _LINE_INDEX_,
6030              
6031             ] = (
6032              
6033             $token,
6034             $rtoken_type->[$j],
6035             $seqno,
6036             $rlevels->[$j],
6037             $rci_levels->[$j],
6038             $line_index,
6039              
6040             );
6041 51311         69610 push @{$rLL}, \@tokary;
  51311         121380  
6042             } ## end token loop
6043              
6044             # Need to remember if we can trim the input line
6045 6672         19475 $line_of_tokens->{_ended_in_blank_token} = $rtoken_type->[$jmax] eq 'b';
6046              
6047             # Values needed by Logger
6048 6672         14090 $line_of_tokens->{_level_0} = $rlevels->[0];
6049 6672         12653 $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
6050             $line_of_tokens->{_nesting_blocks_0} =
6051 6672         14440 $line_of_tokens_old->{_nesting_blocks_0};
6052             $line_of_tokens->{_nesting_tokens_0} =
6053 6672         13403 $line_of_tokens_old->{_nesting_tokens_0};
6054              
6055 6672         14608 return;
6056              
6057             } ## end sub write_line_inner_loop
6058              
6059             } ## end closure write_line
6060              
6061             #############################################
6062             # CODE SECTION 5: Pre-process the entire file
6063             #############################################
6064              
6065             sub finish_formatting {
6066              
6067 555     555 0 1987 my ( $self, $severe_error ) = @_;
6068              
6069             # The file has been tokenized and is ready to be formatted.
6070             # All of the relevant data is stored in $self, ready to go.
6071              
6072             # Returns:
6073             # true if input file was copied verbatim due to errors
6074             # false otherwise
6075              
6076             # Some of the code in sub break_lists is not robust enough to process code
6077             # with arbitrary brace errors. The simplest fix is to just return the file
6078             # verbatim if there are brace errors. This fixes issue c160.
6079 555   33     3564 $severe_error ||= get_saw_brace_error();
6080              
6081             # Check the maximum level. If it is extremely large we will give up and
6082             # output the file verbatim. Note that the actual maximum level is 1
6083             # greater than the saved value, so we fix that here.
6084 555         1728 $self->[_maximum_level_] += 1;
6085 555         1455 my $maximum_level = $self->[_maximum_level_];
6086 555         3185 my $maximum_table_index = $#maximum_line_length_at_level;
6087 555 50 33     3497 if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
6088 0   0     0 $severe_error ||= 1;
6089 0         0 Warn(<<EOM);
6090             The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
6091             Something may be wrong; formatting will be skipped.
6092             EOM
6093             }
6094              
6095             # Dump any requested block summary data
6096 555 50       2411 if ( $rOpts->{'dump-block-summary'} ) {
6097 0 0       0 if ($severe_error) { Exit(1) }
  0         0  
6098 0         0 $self->dump_block_summary();
6099 0         0 Exit(0);
6100             }
6101              
6102             # output file verbatim if severe error or no formatting requested
6103 555 50 33     3538 if ( $severe_error || $rOpts->{notidy} ) {
6104 0         0 $self->dump_verbatim();
6105 0         0 $self->wrapup($severe_error);
6106 0         0 return 1;
6107             }
6108              
6109             # Update the 'save_logfile' flag based to include any tokenization errors.
6110             # We can save time by skipping logfile calls if it is not going to be saved.
6111 555         1508 my $logger_object = $self->[_logger_object_];
6112 555 100       1881 if ($logger_object) {
6113 553         2732 my $save_logfile = $logger_object->get_save_logfile();
6114 553         1546 $self->[_save_logfile_] = $save_logfile;
6115 553         1381 my $file_writer_object = $self->[_file_writer_object_];
6116 553         3104 $file_writer_object->set_save_logfile($save_logfile);
6117             }
6118              
6119             {
6120 555         1275 my $rix_side_comments = $self->set_CODE_type();
  555         4107  
6121              
6122 555         4384 $self->find_non_indenting_braces($rix_side_comments);
6123              
6124             # Handle any requested side comment deletions. It is easier to get
6125             # this done here rather than farther down the pipeline because IO
6126             # lines take a different route, and because lines with deleted HSC
6127             # become BL lines. We have already handled any tee requests in sub
6128             # getline, so it is safe to delete side comments now.
6129 555 100 100     3417 $self->delete_side_comments($rix_side_comments)
6130             if ( $rOpts_delete_side_comments
6131             || $rOpts_delete_closing_side_comments );
6132             }
6133              
6134             # Verify that the line hash does not have any unknown keys.
6135 555         1160 $self->check_line_hashes() if (DEVEL_MODE);
6136              
6137             {
6138             # Make a pass through all tokens, adding or deleting any whitespace as
6139             # required. Also make any other changes, such as adding semicolons.
6140             # All token changes must be made here so that the token data structure
6141             # remains fixed for the rest of this iteration.
6142 555         1100 my ( $error, $rqw_lines ) = $self->respace_tokens();
  555         3390  
6143 555 50       2233 if ($error) {
6144 0         0 $self->dump_verbatim();
6145 0         0 $self->wrapup();
6146 0         0 return 1;
6147             }
6148              
6149             # calling set_ci after respace allows it to use type counts
6150 555         3693 $self->set_ci();
6151              
6152 555         4500 $self->find_multiline_qw($rqw_lines);
6153             }
6154              
6155 555         3776 $self->examine_vertical_tightness_flags();
6156              
6157 555         3825 $self->set_excluded_lp_containers();
6158              
6159 555         3083 $self->keep_old_line_breaks();
6160              
6161             # Implement any welding needed for the -wn or -cb options
6162 555         2636 $self->weld_containers();
6163              
6164             # Collect info needed to implement the -xlp style
6165 555 100 100     2579 $self->xlp_collapsed_lengths()
6166             if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
6167              
6168             # Locate small nested blocks which should not be broken
6169 555         3517 $self->mark_short_nested_blocks();
6170              
6171 555         2793 $self->special_indentation_adjustments();
6172              
6173             # Verify that the main token array looks OK. If this ever causes a fault
6174             # then place similar checks before the sub calls above to localize the
6175             # problem.
6176 555         1002 $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
6177              
6178             # Finishes formatting and write the result to the line sink.
6179             # Eventually this call should just change the 'rlines' data according to the
6180             # new line breaks and then return so that we can do an internal iteration
6181             # before continuing with the next stages of formatting.
6182 555         3420 $self->process_all_lines();
6183              
6184             # A final routine to tie up any loose ends
6185 555         4313 $self->wrapup();
6186 555         2666 return;
6187             } ## end sub finish_formatting
6188              
6189             my %is_loop_type;
6190              
6191             BEGIN {
6192 38     38   284 my @q = qw( for foreach while do until );
6193 38         26712 @{is_loop_type}{@q} = (1) x scalar(@q);
6194             }
6195              
6196             sub find_level_info {
6197              
6198             # Find level ranges and total variations of all code blocks in this file.
6199              
6200             # Returns:
6201             # ref to hash with block info, with seqno as key (see below)
6202              
6203 0     0 0 0 my ($self) = @_;
6204              
6205             # The array _rSS_ has the complete container tree for this file.
6206 0         0 my $rSS = $self->[_rSS_];
6207              
6208             # We will be ignoring everything except code block containers
6209 0         0 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6210              
6211 0         0 my @stack;
6212             my %level_info;
6213              
6214             # TREE_LOOP:
6215 0         0 foreach my $sseq ( @{$rSS} ) {
  0         0  
6216 0         0 my $stack_depth = @stack;
6217 0 0       0 my $seq_next = $sseq > 0 ? $sseq : -$sseq;
6218              
6219 0 0       0 next if ( !$rblock_type_of_seqno->{$seq_next} );
6220 0 0       0 if ( $sseq > 0 ) {
6221              
6222             # STACK_LOOP:
6223 0         0 my $item;
6224 0         0 foreach my $seq (@stack) {
6225 0         0 $item = $level_info{$seq};
6226 0 0       0 if ( $item->{maximum_depth} < $stack_depth ) {
6227 0         0 $item->{maximum_depth} = $stack_depth;
6228             }
6229 0         0 $item->{block_count}++;
6230             } ## end STACK LOOP
6231              
6232 0         0 push @stack, $seq_next;
6233 0         0 my $block_type = $rblock_type_of_seqno->{$seq_next};
6234              
6235             # If this block is a loop nested within a loop, then we
6236             # will mark it as an 'inner_loop'. This is a useful
6237             # complexity measure.
6238 0         0 my $is_inner_loop = 0;
6239 0 0 0     0 if ( $is_loop_type{$block_type} && defined($item) ) {
6240 0         0 $is_inner_loop = $is_loop_type{ $item->{block_type} };
6241             }
6242              
6243 0         0 $level_info{$seq_next} = {
6244             starting_depth => $stack_depth,
6245             maximum_depth => $stack_depth,
6246             block_count => 1,
6247             block_type => $block_type,
6248             is_inner_loop => $is_inner_loop,
6249             };
6250             }
6251             else {
6252 0         0 my $seq_test = pop @stack;
6253              
6254             # error check
6255 0 0       0 if ( $seq_test != $seq_next ) {
6256              
6257             # Shouldn't happen - the $rSS array must have an error
6258 0         0 DEVEL_MODE && Fault("stack error finding total depths\n");
6259              
6260 0         0 %level_info = ();
6261 0         0 last;
6262             }
6263             }
6264             } ## end TREE_LOOP
6265 0         0 return \%level_info;
6266             } ## end sub find_level_info
6267              
6268             sub find_loop_label {
6269              
6270 0     0 0 0 my ( $self, $seqno ) = @_;
6271              
6272             # Given:
6273             # $seqno = sequence number of a block of code for a loop
6274             # Return:
6275             # $label = the loop label text, if any, or an empty string
6276              
6277 0         0 my $rLL = $self->[_rLL_];
6278 0         0 my $rlines = $self->[_rlines_];
6279 0         0 my $K_opening_container = $self->[_K_opening_container_];
6280              
6281 0         0 my $label = EMPTY_STRING;
6282 0         0 my $K_opening = $K_opening_container->{$seqno};
6283              
6284             # backup to the line with the opening paren, if any, in case the
6285             # keyword is on a different line
6286 0         0 my $Kp = $self->K_previous_code($K_opening);
6287 0 0       0 return $label unless ( defined($Kp) );
6288 0 0       0 if ( $rLL->[$Kp]->[_TOKEN_] eq ')' ) {
6289 0         0 $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
6290 0         0 $K_opening = $K_opening_container->{$seqno};
6291             }
6292              
6293 0 0       0 return $label unless ( defined($K_opening) );
6294 0         0 my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
6295              
6296             # look for a label within a few lines; allow a couple of blank lines
6297 0         0 foreach my $lx ( reverse( $lx_open - 3 .. $lx_open ) ) {
6298 0 0       0 last if ( $lx < 0 );
6299 0         0 my $line_of_tokens = $rlines->[$lx];
6300 0         0 my $line_type = $line_of_tokens->{_line_type};
6301              
6302             # stop search on a non-code line
6303 0 0       0 last if ( $line_type ne 'CODE' );
6304              
6305 0         0 my $rK_range = $line_of_tokens->{_rK_range};
6306 0         0 my ( $Kfirst, $Klast ) = @{$rK_range};
  0         0  
6307              
6308             # skip a blank line
6309 0 0       0 next if ( !defined($Kfirst) );
6310              
6311             # check for a lable
6312 0 0       0 if ( $rLL->[$Kfirst]->[_TYPE_] eq 'J' ) {
6313 0         0 $label = $rLL->[$Kfirst]->[_TOKEN_];
6314 0         0 last;
6315             }
6316              
6317             # quit the search if we are above the starting line
6318 0 0       0 last if ( $lx < $lx_open );
6319             }
6320              
6321 0         0 return $label;
6322             } ## end sub find_loop_label
6323              
6324             { ## closure find_mccabe_count
6325             my %is_mccabe_logic_keyword;
6326             my %is_mccabe_logic_operator;
6327              
6328             BEGIN {
6329 38     38   265 my @q = (qw( && || ||= &&= ? <<= >>= ));
6330 38         228 @is_mccabe_logic_operator{@q} = (1) x scalar(@q);
6331              
6332 38         191 @q = (qw( and or xor if else elsif unless until while for foreach ));
6333 38         96564 @is_mccabe_logic_keyword{@q} = (1) x scalar(@q);
6334             } ## end BEGIN
6335              
6336             sub find_mccabe_count {
6337 0     0 0 0 my ($self) = @_;
6338              
6339             # Find the cumulative mccabe count to each token
6340             # Return '$rmccabe_count_sum' = ref to array with cumulative
6341             # mccabe count to each token $K
6342              
6343             # NOTE: This sub currently follows the definitions in Perl::Critic
6344              
6345 0         0 my $rmccabe_count_sum;
6346 0         0 my $rLL = $self->[_rLL_];
6347 0         0 my $count = 0;
6348 0         0 my $Klimit = $self->[_Klimit_];
6349 0         0 foreach my $KK ( 0 .. $Klimit ) {
6350 0         0 $rmccabe_count_sum->{$KK} = $count;
6351 0         0 my $type = $rLL->[$KK]->[_TYPE_];
6352 0 0       0 if ( $type eq 'k' ) {
    0          
6353 0         0 my $token = $rLL->[$KK]->[_TOKEN_];
6354 0 0       0 if ( $is_mccabe_logic_keyword{$token} ) { $count++ }
  0         0  
6355             }
6356             elsif ( $is_mccabe_logic_operator{$type} ) {
6357 0         0 $count++;
6358             }
6359             }
6360 0         0 $rmccabe_count_sum->{ $Klimit + 1 } = $count;
6361 0         0 return $rmccabe_count_sum;
6362             } ## end sub find_mccabe_count
6363             } ## end closure find_mccabe_count
6364              
6365             sub find_code_line_count {
6366 0     0 0 0 my ($self) = @_;
6367              
6368             # Find the cumulative number of lines of code, excluding blanks,
6369             # comments and pod.
6370             # Return '$rcode_line_count' = ref to array with cumulative
6371             # code line count for each input line number.
6372              
6373 0         0 my $rcode_line_count;
6374 0         0 my $rLL = $self->[_rLL_];
6375 0         0 my $rlines = $self->[_rlines_];
6376 0         0 my $ix_line = -1;
6377 0         0 my $code_line_count = 0;
6378              
6379             # loop over all lines
6380 0         0 foreach my $line_of_tokens ( @{$rlines} ) {
  0         0  
6381 0         0 $ix_line++;
6382              
6383             # what type of line?
6384 0         0 my $line_type = $line_of_tokens->{_line_type};
6385              
6386             # if 'CODE' it must be non-blank and non-comment
6387 0 0       0 if ( $line_type eq 'CODE' ) {
    0          
6388 0         0 my $rK_range = $line_of_tokens->{_rK_range};
6389 0         0 my ( $Kfirst, $Klast ) = @{$rK_range};
  0         0  
6390              
6391 0 0       0 if ( defined($Kfirst) ) {
6392              
6393             # it is non-blank
6394 0 0       0 my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
6395 0 0 0     0 if ( $jmax > 0 || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
6396              
6397             # ok, it is a non-comment
6398 0         0 $code_line_count++;
6399             }
6400             }
6401             }
6402              
6403             # Count all other special line types except pod;
6404             # For a list of line types see sub 'process_all_lines'
6405 0         0 elsif ( $line_type !~ /^POD/ ) { $code_line_count++ }
6406              
6407             # Store the cumulative count using the input line index
6408 0         0 $rcode_line_count->[$ix_line] = $code_line_count;
6409             }
6410 0         0 return $rcode_line_count;
6411             } ## end sub find_code_line_count
6412              
6413             sub find_selected_packages {
6414              
6415 0     0 0 0 my ( $self, $rdump_block_types ) = @_;
6416              
6417             # returns a list of all selected package statements in a file
6418 0         0 my @package_list;
6419              
6420 0 0 0     0 unless ( $rdump_block_types->{'*'}
      0        
6421             || $rdump_block_types->{'package'}
6422             || $rdump_block_types->{'class'} )
6423             {
6424 0         0 return \@package_list;
6425             }
6426              
6427 0         0 my $rLL = $self->[_rLL_];
6428 0         0 my $Klimit = $self->[_Klimit_];
6429 0         0 my $rlines = $self->[_rlines_];
6430              
6431 0         0 my $K_closing_container = $self->[_K_closing_container_];
6432 0         0 my @package_sweep;
6433 0         0 foreach my $KK ( 0 .. $Klimit ) {
6434 0         0 my $item = $rLL->[$KK];
6435 0         0 my $type = $item->[_TYPE_];
6436 0 0       0 if ( $type ne 'i' ) {
6437 0         0 next;
6438             }
6439 0         0 my $token = $item->[_TOKEN_];
6440 0 0 0     0 if ( substr( $token, 0, 7 ) eq 'package' && $token =~ /^package\s/
      0        
      0        
6441             || substr( $token, 0, 5 ) eq 'class' && $token =~ /^class\s/ )
6442             {
6443              
6444 0         0 $token =~ s/\s+/ /g;
6445 0         0 my ( $keyword, $name ) = split /\s+/, $token, 2;
6446              
6447 0         0 my $lx_start = $item->[_LINE_INDEX_];
6448 0         0 my $level = $item->[_LEVEL_];
6449 0         0 my $parent_seqno = $self->parent_seqno_by_K($KK);
6450              
6451             # Skip a class BLOCK because it will be handled as a block
6452 0 0       0 if ( $keyword eq 'class' ) {
6453 0         0 my $line_of_tokens = $rlines->[$lx_start];
6454 0         0 my $rK_range = $line_of_tokens->{_rK_range};
6455 0         0 my ( $K_first, $K_last ) = @{$rK_range};
  0         0  
6456 0 0       0 if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
6457 0         0 $K_last = $self->K_previous_code($K_last);
6458             }
6459 0 0       0 if ( defined($K_last) ) {
6460 0         0 my $seqno_class = $rLL->[$K_last]->[_TYPE_SEQUENCE_];
6461             my $block_type_next =
6462 0         0 $self->[_rblock_type_of_seqno_]->{$seqno_class};
6463              
6464             # these block types are currently marked 'package'
6465             # but may be 'class' in the future, so allow both.
6466 0 0 0     0 if ( defined($block_type_next)
6467             && $block_type_next =~ /^(class|package)\b/ )
6468             {
6469 0         0 next;
6470             }
6471             }
6472             }
6473              
6474 0         0 my $K_closing = $Klimit;
6475 0 0       0 if ( $parent_seqno != SEQ_ROOT ) {
6476 0         0 my $Kc = $K_closing_container->{$parent_seqno};
6477 0 0       0 if ( defined($Kc) ) {
6478 0         0 $K_closing = $Kc;
6479             }
6480             }
6481              
6482             # This package ends any previous package at this level
6483 0 0       0 if ( defined( my $ix = $package_sweep[$level] ) ) {
6484 0         0 my $rpk = $package_list[$ix];
6485 0         0 my $Kc = $rpk->{K_closing};
6486 0 0       0 if ( $Kc > $KK ) {
6487 0         0 $rpk->{K_closing} = $KK - 1;
6488             }
6489             }
6490 0         0 $package_sweep[$level] = @package_list;
6491              
6492             # max_change and block_count are not currently reported 'package'
6493 0         0 push @package_list,
6494             {
6495             line_start => $lx_start + 1,
6496             K_opening => $KK,
6497             K_closing => $Klimit,
6498             name => $name,
6499             type => $keyword,
6500             level => $level,
6501             max_change => 0,
6502             block_count => 0,
6503             };
6504             }
6505             }
6506              
6507 0         0 return \@package_list;
6508             } ## end sub find_selected_packages
6509              
6510             sub find_selected_blocks {
6511              
6512 0     0 0 0 my ( $self, $rdump_block_types ) = @_;
6513              
6514             # Find blocks needed for --dump-block-summary
6515             # Returns:
6516             # $rslected_blocks = ref to a list of information on the selected blocks
6517              
6518 0         0 my $rLL = $self->[_rLL_];
6519 0         0 my $rlines = $self->[_rlines_];
6520 0         0 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6521 0         0 my $K_opening_container = $self->[_K_opening_container_];
6522 0         0 my $K_closing_container = $self->[_K_closing_container_];
6523 0         0 my $ris_asub_block = $self->[_ris_asub_block_];
6524 0         0 my $ris_sub_block = $self->[_ris_sub_block_];
6525              
6526 0         0 my $dump_all_types = $rdump_block_types->{'*'};
6527              
6528             # Get level variation info for code blocks
6529 0         0 my $rlevel_info = $self->find_level_info();
6530              
6531 0         0 my @selected_blocks;
6532              
6533             #---------------------------------------------------
6534             # BEGIN loop over all blocks to find selected blocks
6535             #---------------------------------------------------
6536 0         0 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
  0         0  
6537              
6538 0         0 my $type;
6539 0         0 my $name = EMPTY_STRING;
6540 0         0 my $block_type = $rblock_type_of_seqno->{$seqno};
6541 0         0 my $K_opening = $K_opening_container->{$seqno};
6542 0         0 my $K_closing = $K_closing_container->{$seqno};
6543 0         0 my $level = $rLL->[$K_opening]->[_LEVEL_];
6544              
6545 0         0 my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
6546 0         0 my $line_of_tokens = $rlines->[$lx_open];
6547 0         0 my $rK_range = $line_of_tokens->{_rK_range};
6548 0         0 my ( $Kfirst, $Klast ) = @{$rK_range};
  0         0  
6549 0 0 0     0 if ( !defined($Kfirst) || !defined($Klast) || $Kfirst > $K_opening ) {
      0        
6550 0         0 my $line_type = $line_of_tokens->{_line_type};
6551              
6552             # shouldn't happen
6553 0         0 my $CODE_type = $line_of_tokens->{_code_type};
6554 0         0 DEVEL_MODE && Fault(<<EOM);
6555             unexpected line_type=$line_type at line $lx_open, code type=$CODE_type
6556             EOM
6557 0         0 next;
6558             }
6559              
6560 0         0 my ( $max_change, $block_count, $inner_loop_plus ) =
6561             ( 0, 0, EMPTY_STRING );
6562 0         0 my $item = $rlevel_info->{$seqno};
6563 0 0       0 if ( defined($item) ) {
6564 0         0 my $starting_depth = $item->{starting_depth};
6565 0         0 my $maximum_depth = $item->{maximum_depth};
6566 0         0 $block_count = $item->{block_count};
6567 0         0 $max_change = $maximum_depth - $starting_depth + 1;
6568              
6569             # this is a '+' character if this block is an inner loops
6570 0 0       0 $inner_loop_plus = $item->{is_inner_loop} ? '+' : EMPTY_STRING;
6571             }
6572              
6573             # Skip closures unless type 'closure' is explicitly requested
6574 0 0 0     0 if ( ( $block_type eq '}' || $block_type eq ';' )
    0 0        
    0 0        
    0 0        
    0 0        
    0 0        
      0        
      0        
      0        
      0        
      0        
6575             && $rdump_block_types->{'closure'} )
6576             {
6577 0         0 $type = 'closure';
6578             }
6579              
6580             # Both 'sub' and 'asub' select an anonymous sub.
6581             # This allows anonymous subs to be explicitely selected
6582             elsif (
6583             $ris_asub_block->{$seqno}
6584             && ( $dump_all_types
6585             || $rdump_block_types->{'sub'}
6586             || $rdump_block_types->{'asub'} )
6587             )
6588             {
6589 0         0 $type = 'asub';
6590              
6591             # Look back to try to find some kind of name, such as
6592             # my $var = sub { - var is type 'i'
6593             # var => sub { - var is type 'w'
6594             # -var => sub { - var is type 'w'
6595             # 'var' => sub { - var is type 'Q'
6596 0         0 my ( $saw_equals, $saw_fat_comma, $blank_count );
6597 0         0 foreach my $KK ( reverse( $Kfirst .. $K_opening - 1 ) ) {
6598 0         0 my $token_type = $rLL->[$KK]->[_TYPE_];
6599 0 0       0 if ( $token_type eq 'b' ) { $blank_count++; next }
  0         0  
  0         0  
6600 0 0       0 if ( $token_type eq '=>' ) { $saw_fat_comma++; next }
  0         0  
  0         0  
6601 0 0       0 if ( $token_type eq '=' ) { $saw_equals++; next }
  0         0  
  0         0  
6602 0 0 0     0 if ( $token_type eq 'i' && $saw_equals
      0        
      0        
      0        
6603             || ( $token_type eq 'w' || $token_type eq 'Q' )
6604             && $saw_fat_comma )
6605             {
6606 0         0 $name = $rLL->[$KK]->[_TOKEN_];
6607 0         0 last;
6608             }
6609             }
6610             }
6611             elsif ( $ris_sub_block->{$seqno}
6612             && ( $dump_all_types || $rdump_block_types->{'sub'} ) )
6613             {
6614 0         0 $type = 'sub';
6615              
6616             # what we want:
6617             # $block_type $name
6618             # 'sub setidentifier($)' => 'setidentifier'
6619             # 'method setidentifier($)' => 'setidentifier'
6620 0         0 my @parts = split /\s+/, $block_type;
6621 0         0 $name = $parts[1];
6622 0         0 $name =~ s/\(.*$//;
6623             }
6624             elsif (
6625             $block_type =~ /^(package|class)\b/
6626             && ( $dump_all_types
6627             || $rdump_block_types->{'package'}
6628             || $rdump_block_types->{'class'} )
6629             )
6630             {
6631 0         0 $type = 'class';
6632 0         0 my @parts = split /\s+/, $block_type;
6633 0         0 $name = $parts[1];
6634 0         0 $name =~ s/\(.*$//;
6635             }
6636             elsif (
6637             $is_loop_type{$block_type}
6638             && ( $dump_all_types
6639             || $rdump_block_types->{$block_type}
6640             || $rdump_block_types->{ $block_type . $inner_loop_plus }
6641             || $rdump_block_types->{$inner_loop_plus} )
6642             )
6643             {
6644 0         0 $type = $block_type . $inner_loop_plus;
6645             }
6646             elsif ( $dump_all_types || $rdump_block_types->{$block_type} ) {
6647 0 0       0 if ( $is_loop_type{$block_type} ) {
6648 0         0 $name = $self->find_loop_label($seqno);
6649             }
6650 0         0 $type = $block_type;
6651             }
6652             else {
6653 0         0 next;
6654             }
6655              
6656 0         0 push @selected_blocks,
6657             {
6658             K_opening => $K_opening,
6659             K_closing => $K_closing,
6660             line_start => $lx_open + 1,
6661             name => $name,
6662             type => $type,
6663             level => $level,
6664             max_change => $max_change,
6665             block_count => $block_count,
6666             };
6667             } ## END loop to get info for selected blocks
6668 0         0 return \@selected_blocks;
6669             } ## end sub find_selected_blocks
6670              
6671             sub dump_block_summary {
6672 0     0 0 0 my ($self) = @_;
6673              
6674             # Dump information about selected code blocks to STDOUT
6675             # This sub is called when
6676             # --dump-block-summary (-dbs) is set.
6677              
6678             # The following controls are available:
6679             # --dump-block-types=s (-dbt=s), where s is a list of block types
6680             # (if else elsif for foreach while do ... sub) ; default is 'sub'
6681             # --dump-block-minimum-lines=n (-dbml=n), where n is the minimum
6682             # number of lines for a block to be included; default is 20.
6683              
6684 0         0 my $rOpts_dump_block_types = $rOpts->{'dump-block-types'};
6685 0 0       0 if ( !defined($rOpts_dump_block_types) ) { $rOpts_dump_block_types = 'sub' }
  0         0  
6686 0         0 $rOpts_dump_block_types =~ s/^\s+//;
6687 0         0 $rOpts_dump_block_types =~ s/\s+$//;
6688 0         0 my @list = split /\s+/, $rOpts_dump_block_types;
6689 0         0 my %dump_block_types;
6690 0         0 @{dump_block_types}{@list} = (1) x scalar(@list);
6691              
6692             # Get block info
6693 0         0 my $rselected_blocks = $self->find_selected_blocks( \%dump_block_types );
6694              
6695             # Get package info
6696 0         0 my $rpackage_list = $self->find_selected_packages( \%dump_block_types );
6697              
6698 0 0 0     0 return if ( !@{$rselected_blocks} && !@{$rpackage_list} );
  0         0  
  0         0  
6699              
6700 0         0 my $input_stream_name = get_input_stream_name();
6701              
6702             # Get code line count
6703 0         0 my $rcode_line_count = $self->find_code_line_count();
6704              
6705             # Get mccabe count
6706 0         0 my $rmccabe_count_sum = $self->find_mccabe_count();
6707              
6708 0         0 my $rOpts_dump_block_minimum_lines = $rOpts->{'dump-block-minimum-lines'};
6709 0 0       0 if ( !defined($rOpts_dump_block_minimum_lines) ) {
6710 0         0 $rOpts_dump_block_minimum_lines = 20;
6711             }
6712              
6713 0         0 my $rLL = $self->[_rLL_];
6714              
6715             # merge blocks and packages, add various counts, filter and print to STDOUT
6716 0         0 my $routput_lines = [];
6717 0         0 foreach my $item ( @{$rselected_blocks}, @{$rpackage_list} ) {
  0         0  
  0         0  
6718              
6719 0         0 my $K_opening = $item->{K_opening};
6720 0         0 my $K_closing = $item->{K_closing};
6721              
6722             # define total number of lines
6723 0         0 my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
6724 0         0 my $lx_close = $rLL->[$K_closing]->[_LINE_INDEX_];
6725 0         0 my $line_count = $lx_close - $lx_open + 1;
6726              
6727             # define total number of lines of code excluding blanks, comments, pod
6728 0         0 my $code_lines_open = $rcode_line_count->[$lx_open];
6729 0         0 my $code_lines_close = $rcode_line_count->[$lx_close];
6730 0         0 my $code_lines = 0;
6731 0 0 0     0 if ( defined($code_lines_open) && defined($code_lines_close) ) {
6732 0         0 $code_lines = $code_lines_close - $code_lines_open + 1;
6733             }
6734              
6735             # filter out blocks below the selected code line limit
6736 0 0       0 if ( $code_lines < $rOpts_dump_block_minimum_lines ) {
6737 0         0 next;
6738             }
6739              
6740             # add mccabe_count for this block
6741 0         0 my $mccabe_closing = $rmccabe_count_sum->{ $K_closing + 1 };
6742 0         0 my $mccabe_opening = $rmccabe_count_sum->{$K_opening};
6743 0         0 my $mccabe_count = 1; # add 1 to match Perl::Critic
6744 0 0 0     0 if ( defined($mccabe_opening) && defined($mccabe_closing) ) {
6745 0         0 $mccabe_count += $mccabe_closing - $mccabe_opening;
6746             }
6747              
6748             # Store the final set of print variables
6749 0         0 push @{$routput_lines}, [
6750              
6751             $input_stream_name,
6752             $item->{line_start},
6753             $line_count,
6754             $code_lines,
6755             $item->{type},
6756             $item->{name},
6757             $item->{level},
6758             $item->{max_change},
6759             $item->{block_count},
6760 0         0 $mccabe_count,
6761              
6762             ];
6763             }
6764              
6765 0 0       0 return unless @{$routput_lines};
  0         0  
6766              
6767             # Sort blocks and packages on starting line number
6768 0         0 my @sorted_lines = sort { $a->[1] <=> $b->[1] } @{$routput_lines};
  0         0  
  0         0  
6769              
6770 0         0 print STDOUT
6771             "file,line,line_count,code_lines,type,name,level,max_change,block_count,mccabe_count\n";
6772              
6773 0         0 foreach my $rline_vars (@sorted_lines) {
6774 0         0 my $line = join( ",", @{$rline_vars} ) . "\n";
  0         0  
6775 0         0 print STDOUT $line;
6776             }
6777 0         0 return;
6778             } ## end sub dump_block_summary
6779              
6780             sub set_ci {
6781              
6782 555     555 0 1670 my ($self) = @_;
6783              
6784             # Set the basic continuation indentation (ci) for all tokens.
6785             # This is a replacement for the values previously computed in
6786             # sub Perl::Tidy::Tokenizer::tokenizer_wrapup. In most cases it
6787             # produces identical results, but in a few cases it is an improvement.
6788              
6789 38     38   438 use constant DEBUG_SET_CI => 0;
  38         139  
  38         6663  
6790              
6791             # The following flag values are temporarily available for experimentation:
6792             # -exp=ci0 OLD: use ci values computed by tokenizer
6793             # -exp=ci1 MIXED: old ci values except for new ci for comments.
6794             # -exp=ci2 NEW: ci values computed by this sub
6795 555         1307 my $calculate_ci = 2; # current default
6796 555 50       3543 if ( DEVEL_MODE || DEBUG_SET_CI ) { $calculate_ci = 2 }
6797 0 50       0 elsif ($rOpts->{'experimental'}
6798             && $rOpts->{'experimental'} =~ /\bci(\d+)\b/ )
6799             {
6800 0         0 $calculate_ci = $1;
6801             }
6802 555 50       2031 return if ( !$calculate_ci );
6803 555         1579 my $ci_comments_only = $calculate_ci == 1;
6804              
6805             # This turns on an optional piece of logic which makes the new and
6806             # old computations of ci agree. It has almost no effect on actual
6807             # programs but is useful for testing.
6808 38     38   430 use constant SET_CI_OPTION_0 => 1;
  38         116  
  38         222940  
6809              
6810             # This is slightly different from the hash in in break_lists
6811             # with a similar name (removed '?' and ':' to fix t007 and others)
6812 555         1155 my %is_logical_container_for_ci;
6813 555         3237 my @q = qw# if elsif unless while and or err not && | || ! #;
6814 555         5756 @is_logical_container_for_ci{@q} = (1) x scalar(@q);
6815              
6816             # This is slightly different from a tokenizer hash with a similar name:
6817 555         1300 my %is_container_label_type_for_ci;
6818 555         2939 @q = qw# k && | || ? : ! #;
6819 555         3373 @is_container_label_type_for_ci{@q} = (1) x scalar(@q);
6820              
6821             # Undo ci of closing list paren followed by these binary operators:
6822             # - initially defined for issue t027, then
6823             # - added '=' for t015
6824             # - added '=~' for 'locale.in'
6825             # - added '<=>' for 'corelist.in'
6826             # Note:
6827             # See @value_requestor_type for more that might be included
6828             # See also @is_binary_type
6829 555         1258 my %bin_op_type;
6830 555         3732 @q = qw# . ** -> + - / * = != ^ < > % >= <= =~ !~ <=> x #;
6831 555         5592 @bin_op_type{@q} = (1) x scalar(@q);
6832              
6833 555         1344 my %is_list_end_type;
6834 555         2321 @q = qw( ; { } );
6835 555         1397 push @q, ',';
6836 555         2398 @is_list_end_type{@q} = (1) x scalar(@q);
6837              
6838 555         1501 my $rLL = $self->[_rLL_];
6839 555         1242 my $Klimit = $self->[_Klimit_];
6840 555 100       2069 return unless defined($Klimit);
6841              
6842 552         1331 my $token = ';';
6843 552         1152 my $type = ';';
6844 552         1298 my $last_token = $token;
6845 552         1272 my $last_type = $type;
6846 552         1082 my $ci_last = 0;
6847 552         1074 my $ci_next = 0;
6848 552         1098 my $ci_next_next = 1;
6849 552         1326 my $rstack = [];
6850              
6851 552         1253 my $seq_root = SEQ_ROOT;
6852 552         6794 my $rparent = {
6853             _seqno => $seq_root,
6854             _ci_open => 0,
6855             _ci_open_next => 0,
6856             _ci_close => 0,
6857             _ci_close_next => 0,
6858             _container_type => 'Block',
6859             _ci_next_next => $ci_next_next,
6860             _comma_count => 0,
6861             _semicolon_count => 0,
6862             _Kc => undef,
6863             };
6864              
6865             # Debug stuff
6866 552         1584 my @debug_lines;
6867             my %saw_ci_diff;
6868              
6869 552         1329 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6870 552         1347 my $ris_sub_block = $self->[_ris_sub_block_];
6871 552         1331 my $ris_asub_block = $self->[_ris_asub_block_];
6872 552         1280 my $K_opening_container = $self->[_K_opening_container_];
6873 552         1239 my $K_closing_container = $self->[_K_closing_container_];
6874 552         1228 my $K_opening_ternary = $self->[_K_opening_ternary_];
6875 552         1164 my $K_closing_ternary = $self->[_K_closing_ternary_];
6876 552         1213 my $rlines = $self->[_rlines_];
6877 552         1204 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
6878              
6879 552         1630 my $want_break_before_comma = $want_break_before{','};
6880              
6881             my $map_block_follows = sub {
6882              
6883             # return true if a sort/map/etc block follows the closing brace
6884             # of container $seqno
6885 104     104   313 my ($seqno) = @_;
6886 104         241 my $Kc = $K_closing_container->{$seqno};
6887 104 50       322 return unless defined($Kc);
6888 104         394 my $Kcn = $self->K_next_code($Kc);
6889 104 50       360 return unless defined($Kcn);
6890 104         262 my $seqno_n = $rLL->[$Kcn]->[_TYPE_SEQUENCE_];
6891              
6892             #return if ( defined($seqno_n) );
6893 104 100       434 return if ($seqno_n);
6894 34         298 my $Knn = $self->K_next_code($Kcn);
6895 34 50       199 return unless defined($Knn);
6896 34         98 my $seqno_nn = $rLL->[$Knn]->[_TYPE_SEQUENCE_];
6897 34 100       162 return unless ($seqno_nn);
6898 25         79 my $K_nno = $K_opening_container->{$seqno_nn};
6899 25 100 66     201 return unless $K_nno && $K_nno == $Knn;
6900 13         40 my $block_type = $rblock_type_of_seqno->{$seqno_nn};
6901              
6902 13 100       57 if ($block_type) {
6903 6         22 return $is_block_with_ci{$block_type};
6904             }
6905 7         29 return;
6906 552         4995 };
6907              
6908             my $redo_preceding_comment_ci = sub {
6909              
6910             # We need to reset the ci of the previous comment(s)
6911 187     187   474 my ( $K, $ci ) = @_;
6912 187         638 my $Km = $self->K_previous_code($K);
6913 187 50       796 return if ( !defined($Km) );
6914 187         754 foreach my $Kt ( $Km + 1 .. $K - 1 ) {
6915 180 50       695 if ( $rLL->[$Kt]->[_TYPE_] eq '#' ) {
6916 0         0 $rLL->[$Kt]->[_CI_LEVEL_] = $ci;
6917             }
6918             }
6919 187         405 return;
6920 552         3479 };
6921              
6922             # Definitions of the sequence of ci_values being maintained:
6923             # $ci_last = the ci value of the previous non-blank, non-comment token
6924             # $ci_this = the ci value to be stored for this token at index $KK
6925             # $ci_next = the normal ci for the next token, set by the previous tok
6926             # $ci_next_next = the normal next value of $ci_next in this container
6927              
6928             #--------------------------
6929             # Main loop over all tokens
6930             #--------------------------
6931 552         1449 my $KK = -1;
6932 552         1183 foreach my $rtoken_K ( @{$rLL} ) {
  552         1979  
6933              
6934 58381         70797 $KK++;
6935 58381         83588 $type = $rtoken_K->[_TYPE_];
6936              
6937             #------------------
6938             # Section 1. Blanks
6939             #------------------
6940 58381 100       98074 if ( $type eq 'b' ) {
6941              
6942 22224         30916 $rtoken_K->[_CI_LEVEL_] = $ci_next;
6943              
6944             # 'next' to avoid saving last_ values for blanks and commas
6945 22224         32618 next;
6946             }
6947              
6948             #--------------------
6949             # Section 2. Comments
6950             #--------------------
6951 36157 100       58763 if ( $type eq '#' ) {
6952              
6953 1092         1825 my $ci_this = $ci_next;
6954              
6955             # If at '#' in ternary before a ? or :, use that level to make
6956             # the comment line up with the next ? or : line. (see c202/t052)
6957             # i.e. if a nested ? follows, we increase the '#' level by 1, and
6958             # if a nested : follows, we decrease the '#' level by 1.
6959             # This is the only place where this sub changes a _LEVEL_ value.
6960 1092         1646 my $Kn;
6961 1092         2059 my $parent_container_type = $rparent->{_container_type};
6962 1092 100       2626 if ( $parent_container_type eq 'Ternary' ) {
6963 4         10 $Kn = $self->K_next_code($KK);
6964 4 50       11 if ($Kn) {
6965 4         9 my $type_kn = $rLL->[$Kn]->[_TYPE_];
6966 4 50       12 if ( $is_ternary{$type_kn} ) {
6967 4         7 my $level_KK = $rLL->[$KK]->[_LEVEL_];
6968 4         8 my $level_Kn = $rLL->[$Kn]->[_LEVEL_];
6969 4         10 $rLL->[$KK]->[_LEVEL_] = $rLL->[$Kn]->[_LEVEL_];
6970              
6971             # and use the ci of a terminating ':'
6972 4 50       9 if ( $Kn == $rparent->{_Kc} ) {
6973 4         9 $ci_this = $rparent->{_ci_close};
6974             }
6975             }
6976             }
6977             }
6978              
6979             # Undo ci for a block comment followed by a closing token or , or ;
6980             # provided that the parent container:
6981             # - ends without ci, or
6982             # - starts ci=0 and is a comma list or this follows a closing type
6983             # - has a level jump
6984 1092 50 66     2783 if (
      66        
6985             $ci_this
6986             && (
6987             !$rparent->{_ci_close}
6988             || (
6989             !$rparent->{_ci_open_next}
6990             && ( ( $rparent->{_comma_count} || $last_type eq ',' )
6991             || $is_closing_type{$last_type} )
6992             )
6993             )
6994             )
6995             {
6996             # Be sure this is a block comment
6997 37         102 my $lx = $rtoken_K->[_LINE_INDEX_];
6998 37         95 my $rK_range = $rlines->[$lx]->{_rK_range};
6999 37         65 my $Kfirst;
7000 37 50       108 if ($rK_range) { $Kfirst = $rK_range->[0] }
  37         86  
7001 37 100 66     236 if ( defined($Kfirst) && $Kfirst == $KK ) {
7002              
7003             # Look for trailing closing token
7004             # [ and possibly ',' or ';' ]
7005 11 50       70 $Kn = $self->K_next_code($KK) if ( !$Kn );
7006 11         37 my $Kc = $rparent->{_Kc};
7007 11 0 66     117 if (
      33        
      66        
7008             $Kn
7009             && $Kc
7010             && (
7011             $Kn == $Kc
7012              
7013             # only look for comma if -wbb=',' is set
7014             # to minimize changes to existing formatting
7015             || ( $rLL->[$Kn]->[_TYPE_] eq ','
7016             && $want_break_before_comma
7017             && $parent_container_type eq 'List' )
7018              
7019             # do not look ahead for a bare ';' because
7020             # it changes old formatting with little benefit.
7021             ## || ( $rLL->[$Kn]->[_TYPE_] eq ';'
7022             ## && $parent_container_type eq 'Block' )
7023             )
7024             )
7025             {
7026              
7027             # Be sure container has a level jump
7028 0         0 my $level_KK = $rLL->[$KK]->[_LEVEL_];
7029 0         0 my $level_Kc = $rLL->[$Kc]->[_LEVEL_];
7030 0 0       0 if ( $level_Kc < $level_KK ) {
7031 0         0 $ci_this = 0;
7032             }
7033             }
7034             }
7035             }
7036              
7037 1092         1718 $ci_next = $ci_this;
7038 1092         1845 $rtoken_K->[_CI_LEVEL_] = $ci_this;
7039              
7040             # 'next' to avoid saving last_ values for blanks and commas
7041 1092         2196 next;
7042             }
7043              
7044             #------------------------------------------------------------
7045             # Section 3. Continuing with non-blank and non-comment tokens
7046             #------------------------------------------------------------
7047              
7048 35065         50012 $token = $rtoken_K->[_TOKEN_];
7049              
7050             # Set ci values appropriate for most tokens:
7051 35065         45547 my $ci_this = $ci_next;
7052 35065         43136 $ci_next = $ci_next_next;
7053              
7054             # Now change these ci values as necessary for special cases...
7055              
7056             #----------------------------
7057             # Section 4. Container tokens
7058             #----------------------------
7059 35065 100 100     146202 if ( $rtoken_K->[_TYPE_SEQUENCE_] ) {
    100 100        
    100 100        
    100          
    100          
7060              
7061 9116         14379 my $seqno = $rtoken_K->[_TYPE_SEQUENCE_];
7062              
7063             #-------------------------------------
7064             # Section 4.1 Opening container tokens
7065             #-------------------------------------
7066 9116 100       18251 if ( $is_opening_sequence_token{$token} ) {
7067              
7068 4558         7234 my $level = $rtoken_K->[_LEVEL_];
7069              
7070             # Default ci values for the closing token, to be modified
7071             # as necessary:
7072 4558         6562 my $ci_close = $ci_next;
7073 4558         6375 my $ci_close_next = $ci_next_next;
7074              
7075             my $Kc =
7076             $type eq '?'
7077             ? $K_closing_ternary->{$seqno}
7078 4558 100       9827 : $K_closing_container->{$seqno};
7079              
7080             # $Kn = $self->K_next_nonblank($KK);
7081 4558         6232 my $Kn;
7082 4558 50       9219 if ( $KK < $Klimit ) {
7083 4558         6685 $Kn = $KK + 1;
7084 4558 100 66     15169 if ( $rLL->[$Kn]->[_TYPE_] eq 'b' && $Kn < $Klimit ) {
7085 3119         4860 $Kn += 1;
7086             }
7087             }
7088              
7089             # $Kcn = $self->K_next_code($Kc);
7090 4558         6177 my $Kcn;
7091 4558 100 66     13949 if ( $Kc && $Kc < $Klimit ) {
7092 4439         6279 $Kcn = $Kc + 1;
7093 4439 100 100     13760 if ( $rLL->[$Kcn]->[_TYPE_] eq 'b' && $Kcn < $Klimit ) {
7094 2327         3625 $Kcn += 1;
7095             }
7096 4439 100       9407 if ( $rLL->[$Kcn]->[_TYPE_] eq '#' ) {
7097 90         494 $Kcn = $self->K_next_code($Kcn);
7098             }
7099             }
7100              
7101 4558 50       9662 my $opening_level_jump =
7102             $Kn ? $rLL->[$Kn]->[_LEVEL_] - $level : 0;
7103              
7104             # initialize ci_next_next to its standard value
7105 4558         6538 $ci_next_next = 1;
7106              
7107             # Default: ci of first item of list with level jump is same as
7108             # ci of first item of container
7109 4558 100       9014 if ( $opening_level_jump > 0 ) {
7110 3841         6303 $ci_next = $rparent->{_ci_open_next};
7111             }
7112              
7113 4558         6788 my ( $comma_count, $semicolon_count );
7114 4558         7388 my $rtype_count = $rtype_count_by_seqno->{$seqno};
7115 4558 100       8861 if ($rtype_count) {
7116 1885         4053 $comma_count = $rtype_count->{','};
7117 1885         3328 $semicolon_count = $rtype_count->{';'};
7118              
7119             # Do not include a terminal semicolon in the count (the
7120             # comma_count has already been corrected by respace_tokens)
7121             # We only need to know if there are semicolons or not, so
7122             # for speed we can just do this test if the count is 1.
7123 1885 100 100     6009 if ( $semicolon_count && $semicolon_count == 1 ) {
7124 393         1549 my $Kcm = $self->K_previous_code($Kc);
7125 393 100       1548 if ( $rLL->[$Kcm]->[_TYPE_] eq ';' ) {
7126 373         773 $semicolon_count--;
7127             }
7128             }
7129             }
7130              
7131 4558         6281 my $container_type;
7132              
7133             #-------------------------
7134             # Section 4.1.1 Code Block
7135             #-------------------------
7136 4558         7829 my $block_type = $rblock_type_of_seqno->{$seqno};
7137 4558 100       10517 if ($block_type) {
    100          
7138 964         3564 $container_type = 'Block';
7139              
7140             # set default depending on block type
7141 964         1603 $ci_close = 0;
7142              
7143             my $no_semicolon =
7144             $is_block_without_semicolon{$block_type}
7145 964   100     5163 || $ris_sub_block->{$seqno}
7146             || $last_type eq 'J';
7147              
7148 964 100       2351 if ( !$no_semicolon ) {
7149              
7150             # Optional fix for block types sort/map/etc which use
7151             # zero ci at terminal brace if previous keyword had
7152             # zero ci. This will cause sort/map/grep filter blocks
7153             # to line up. Note that sub 'undo_ci' will also try to
7154             # do this, so this is not a critical operation.
7155 538 100       1782 if ( $is_block_with_ci{$block_type} ) {
7156 347         804 my $parent_seqno = $rparent->{_seqno};
7157             my $rtype_count_p =
7158 347         679 $rtype_count_by_seqno->{$parent_seqno};
7159 347 100 100     2198 if (
      100        
      100        
7160              
7161             # only do this within containers
7162             $parent_seqno != SEQ_ROOT
7163              
7164             # only in containers without ',' and ';'
7165             && !$rparent->{_comma_count}
7166             && !$rparent->{_semicolon_count}
7167              
7168             && $map_block_follows->($seqno)
7169             )
7170             {
7171 6 50       16 if ($ci_last) {
7172 0         0 $ci_close = $ci_this;
7173             }
7174             }
7175             else {
7176 341         659 $ci_close = $ci_this;
7177             }
7178             }
7179              
7180             # keep ci if certain operators follow (fix c202/t024)
7181 538 100 100     1923 if ( !$ci_close && $Kcn ) {
7182 174         406 my $type_kcn = $rLL->[$Kcn]->[_TYPE_];
7183 174         324 my $token_kcn = $rLL->[$Kcn]->[_TOKEN_];
7184 174 100 100     1664 if ( $type_kcn =~ /^(\.|\&\&|\|\|)$/
      66        
7185             || $type_kcn eq 'k' && $is_and_or{$token_kcn} )
7186             {
7187 1         3 $ci_close = $ci_this;
7188             }
7189             }
7190             }
7191              
7192 964 100       2564 if ( $rparent->{_container_type} ne 'Ternary' ) {
7193 958         1576 $ci_this = 0;
7194             }
7195 964         1490 $ci_next = 0;
7196 964         1553 $ci_close_next = $ci_close;
7197             }
7198              
7199             #----------------------
7200             # Section 4.1.2 Ternary
7201             #----------------------
7202             elsif ( $type eq '?' ) {
7203 187         504 $container_type = 'Ternary';
7204 187 100 66     946 if ( $rparent->{_container_type} eq 'List'
7205             && !$rparent->{_ci_open_next} )
7206             {
7207 52         116 $ci_this = 0;
7208 52         101 $ci_close = 0;
7209             }
7210              
7211             # redo ci of any preceding comments if necessary
7212             # at an outermost ? (which has no level jump)
7213 187 50       525 if ( !$opening_level_jump ) {
7214 187         516 $redo_preceding_comment_ci->( $KK, $ci_this );
7215             }
7216             }
7217              
7218             #-------------------------------
7219             # Section 4.1.3 Logical or List?
7220             #-------------------------------
7221             else {
7222             my $is_logical = $is_container_label_type_for_ci{$last_type}
7223 3407   100     16138 && $is_logical_container_for_ci{$last_token}
7224              
7225             # Part 1 of optional patch to get agreement with previous
7226             # ci This makes almost no difference in a typical program
7227             # because we will seldom break within an array index.
7228             || $type eq '[' && SET_CI_OPTION_0;
7229              
7230 3407 100 100     11170 if ( !$is_logical && $token eq '(' ) {
7231              
7232             # 'foreach' and 'for' paren contents are treated as
7233             # logical except for C-style 'for'
7234 1894 100 66     8021 if ( $last_type eq 'k' ) {
    100          
    100          
7235 433   66     2068 $is_logical ||= $last_token eq 'foreach';
7236              
7237             # C-style 'for' container will be type 'List'
7238 433 100       1130 if ( $last_token eq 'for' ) {
7239             $is_logical =
7240 28   100     184 !( $rtype_count && $rtype_count->{'f'} );
7241             }
7242             }
7243              
7244             # Check for 'for' and 'foreach' loops with iterators
7245             elsif ( $last_type eq 'i' && defined($Kcn) ) {
7246 590         1336 my $seqno_kcn = $rLL->[$Kcn]->[_TYPE_SEQUENCE_];
7247 590         1182 my $type_kcn = $rLL->[$Kcn]->[_TOKEN_];
7248 590 100 100     2068 if ( $seqno_kcn && $type_kcn eq '{' ) {
7249             my $block_type_kcn =
7250 47         118 $rblock_type_of_seqno->{$seqno_kcn};
7251 47   66     406 $is_logical ||= $block_type_kcn
      66        
7252             && ( $block_type_kcn eq 'for'
7253             || $block_type_kcn eq 'foreach' );
7254             }
7255              
7256             # Search backwards for 'for'/'foreach' with
7257             # iterator in case user is running from an editor
7258             # and did not include the block (fixes case
7259             # 'xci.in').
7260 590         1869 my $Km = $self->K_previous_code($KK);
7261 590         1989 foreach ( 0 .. 2 ) {
7262 602         1602 $Km = $self->K_previous_code($Km);
7263 602 100       2097 last unless defined($Km);
7264 600 100       2045 last unless $rLL->[$Km]->[_TYPE_] eq 'k';
7265 49         113 my $tok = $rLL->[$Km]->[_TOKEN_];
7266 49 100       215 next if $tok eq 'my';
7267 37   33     193 $is_logical ||=
      66        
7268             ( $tok eq 'for' || $tok eq 'foreach' );
7269 37         103 last;
7270             }
7271             }
7272             elsif ( $last_token eq '(' ) {
7273             $is_logical ||=
7274 61   66     424 $rparent->{_container_type} eq 'Logical';
7275             }
7276             }
7277              
7278             #------------------------
7279             # Section 4.1.3.1 Logical
7280             #------------------------
7281 3407 100       6430 if ($is_logical) {
7282 598         1143 $container_type = 'Logical';
7283              
7284             # Pass ci though an '!'
7285 598 100       1508 if ( $last_type eq '!' ) { $ci_this = $ci_last }
  6         23  
7286              
7287 598         956 $ci_next_next = 0;
7288 598         921 $ci_close_next = $ci_this;
7289              
7290             # Part 2 of optional patch to get agreement with
7291             # previous ci
7292 598 100 100     2216 if ( $type eq '[' && SET_CI_OPTION_0 ) {
7293              
7294 307         523 $ci_next_next = $ci_this;
7295              
7296             # Undo ci at a chain of indexes or hash keys
7297 307 100       832 if ( $last_type eq '}' ) {
7298 7         16 $ci_this = $ci_last;
7299             }
7300             }
7301              
7302 598 100       1365 if ($opening_level_jump) {
7303 291         534 $ci_next = 0;
7304             }
7305             }
7306              
7307             #---------------------
7308             # Section 4.1.3.2 List
7309             #---------------------
7310             else {
7311              
7312             # Here 'List' is a catchall for none of the above types
7313 2809         4507 $container_type = 'List';
7314              
7315             # lists in blocks ...
7316 2809 100       6155 if ( $rparent->{_container_type} eq 'Block' ) {
7317              
7318             # undo ci if another closing token follows
7319 1652 100       3669 if ( defined($Kcn) ) {
7320 1651         3308 my $closing_level_jump =
7321             $rLL->[$Kcn]->[_LEVEL_] - $level;
7322 1651 100       3885 if ( $closing_level_jump < 0 ) {
7323 58         145 $ci_close = $ci_this;
7324             }
7325             }
7326             }
7327              
7328             # lists not in blocks ...
7329             else {
7330              
7331 1157 100       3224 if ( !$rparent->{_comma_count} ) {
7332              
7333 566         1115 $ci_close = $ci_this;
7334              
7335             # undo ci at binary op after right paren if no
7336             # commas in container; fixes t027, t028
7337 566 100 66     2397 if ( $ci_close_next != $ci_close
      100        
7338             && defined($Kcn)
7339             && $bin_op_type{ $rLL->[$Kcn]->[_TYPE_] } )
7340             {
7341 20         45 $ci_close_next = $ci_close;
7342             }
7343             }
7344              
7345 1157 100       2807 if ( $rparent->{_container_type} eq 'Ternary' ) {
7346 55         130 $ci_next = 0;
7347             }
7348             }
7349              
7350             # Undo ci at a chain of indexes or hash keys
7351 2809 50 66     8454 if ( $token ne '(' && $last_type eq '}' ) {
7352 0         0 $ci_this = $ci_close = $ci_last;
7353             }
7354             }
7355             }
7356              
7357             #---------------------------------------
7358             # Section 4.1.4 Store opening token info
7359             #---------------------------------------
7360              
7361             # Most closing tokens should align with their opening tokens.
7362 4558 100 100     18871 if (
      100        
      100        
7363             $type eq '{'
7364             && $token ne '('
7365             && $is_list_end_type{$last_type}
7366              
7367             # avoid asub blocks, which may have prototypes ending in '}'
7368             && !$ris_asub_block->{$seqno}
7369             )
7370             {
7371 724         1287 $ci_close = $ci_this;
7372             }
7373              
7374             # Closing ci must never be less than opening
7375 4558 50       9032 if ( $ci_close < $ci_this ) { $ci_close = $ci_this }
  0         0  
7376              
7377 4558         6183 push @{$rstack}, $rparent;
  4558         8357  
7378 4558         30131 $rparent = {
7379             _seqno => $seqno,
7380             _container_type => $container_type,
7381             _ci_next_next => $ci_next_next,
7382             _ci_open => $ci_this,
7383             _ci_open_next => $ci_next,
7384             _ci_close => $ci_close,
7385             _ci_close_next => $ci_close_next,
7386             _comma_count => $comma_count,
7387             _semicolon_count => $semicolon_count,
7388             _Kc => $Kc,
7389             };
7390             }
7391              
7392             #-------------------------------------
7393             # Section 4.2 Closing container tokens
7394             #-------------------------------------
7395             else {
7396              
7397 4558         8563 my $seqno_test = $rparent->{_seqno};
7398 4558 50       9298 if ( $seqno_test ne $seqno ) {
7399              
7400             # Shouldn't happen if we are processing balanced text.
7401             # (Unbalanced text should go out verbatim)
7402 0         0 DEVEL_MODE
7403             && Fault("stack error: $seqno_test != $seqno\n");
7404             }
7405              
7406             # Use ci_this, ci_next values set by the matching opening token:
7407 4558         6744 $ci_this = $rparent->{_ci_close};
7408 4558         6485 $ci_next = $rparent->{_ci_close_next};
7409 4558         6408 my $ci_open_old = $rparent->{_ci_open};
7410              
7411             # Then pop the stack and use the parent ci_next_next value:
7412 4558 50       6014 if ( @{$rstack} ) {
  4558         9017  
7413 4558         5874 $rparent = pop @{$rstack};
  4558         11873  
7414 4558         7707 $ci_next_next = $rparent->{_ci_next_next};
7415             }
7416             else {
7417              
7418             # Shouldn't happen if we are processing balanced text.
7419 0         0 DEVEL_MODE && Fault("empty stack - shouldn't happen\n");
7420             }
7421              
7422             # Fix: undo ci at a closing token followed by a closing token.
7423             # Goal is to keep formatting independent of the existence of a
7424             # trailing comma or semicolon.
7425 4558 100 100     16939 if ( $ci_this > 0 && !$ci_open_old && !$rparent->{_ci_close} ) {
      100        
7426 205         493 my $Kc = $rparent->{_Kc};
7427 205         835 my $Kn = $self->K_next_code($KK);
7428 205 100 66     958 if ( $Kc && $Kn && $Kc == $Kn ) {
      100        
7429 5         15 $ci_this = $ci_next = 0;
7430             }
7431             }
7432             }
7433             }
7434              
7435             #---------------------------------
7436             # Section 5. Semicolons and Labels
7437             #---------------------------------
7438             # The next token after a ';' and label (type 'J') starts a new stmt
7439             # The ci after a C-style for ';' (type 'f') is handled similarly.
7440             # TODO: There is type 'f' redundant coding in sub respace which can
7441             # be removed if this becomes the standard routine for computing ci.
7442             elsif ( $type eq ';' || $type eq 'J' || $type eq 'f' ) {
7443 2674         4964 $ci_next = 0;
7444 2674 100       6822 if ( $is_closing_type{$last_type} ) { $ci_this = $ci_last }
  1248         2120  
7445             }
7446              
7447             #--------------------
7448             # Section 6. Keywords
7449             #--------------------
7450             # Undo ci after a format statement
7451             elsif ( $type eq 'k' ) {
7452 2804 100       7983 if ( substr( $token, 0, 6 ) eq 'format' ) {
7453 1         3 $ci_next = 0;
7454             }
7455             }
7456              
7457             #------------------
7458             # Section 7. Commas
7459             #------------------
7460             # A comma and the subsequent item normally have ci undone
7461             # unless ci has been set at a lower level
7462             elsif ( $type eq ',' ) {
7463              
7464 3034 100       7418 if ( $rparent->{_container_type} eq 'List' ) {
7465 2815         4624 $ci_this = $ci_next = $rparent->{_ci_open_next};
7466             }
7467             }
7468              
7469             #---------------------------------
7470             # Section 8. Hanging side comments
7471             #---------------------------------
7472             # Treat hanging side comments like blanks
7473             elsif ( $type eq 'q' && $token eq EMPTY_STRING ) {
7474 54         104 $ci_next = $ci_this;
7475              
7476 54         95 $rtoken_K->[_CI_LEVEL_] = $ci_this;
7477              
7478             # 'next' to avoid saving last_ values for blanks and commas
7479 54         106 next;
7480             }
7481              
7482             # Save debug info if requested
7483 35011         44687 DEBUG_SET_CI && do {
7484              
7485             my $seqno = $rtoken_K->[_TYPE_SEQUENCE_];
7486             my $level = $rtoken_K->[_LEVEL_];
7487             my $ci = $rtoken_K->[_CI_LEVEL_];
7488             if ( $ci > 1 ) { $ci = 1 }
7489              
7490             my $tok = $token;
7491             my $last_tok = $last_token;
7492             $tok =~ s/\t//g;
7493             $last_tok =~ s/\t//g;
7494             $tok = length($tok) > 3 ? substr( $tok, 0, 8 ) : $tok;
7495             $last_tok =
7496             length($last_tok) > 3 ? substr( $last_tok, 0, 8 ) : $last_tok;
7497             $tok =~ s/["']//g;
7498             $last_tok =~ s/["']//g;
7499             my $block_type;
7500             $block_type = $rblock_type_of_seqno->{$seqno} if ($seqno);
7501             $block_type = EMPTY_STRING unless ($block_type);
7502             my $ptype = $rparent->{_container_type};
7503             my $pname = $ptype;
7504              
7505             my $error =
7506             $ci_this == $ci ? EMPTY_STRING : $type eq 'b' ? "error" : "ERROR";
7507             if ($error) { $saw_ci_diff{$KK} = 1 }
7508              
7509             my $lno = $rtoken_K->[_LINE_INDEX_] + 1;
7510             $debug_lines[$KK] = <<EOM;
7511             $lno\t$ci\t$ci_this\t$ci_next\t$last_type\t$last_tok\t$type\t$tok\t$seqno\t$level\t$pname\t$block_type\t$error
7512             EOM
7513             };
7514              
7515             #----------------------------------
7516             # Store the ci value for this token
7517             #----------------------------------
7518 35011 50       62444 $rtoken_K->[_CI_LEVEL_] = $ci_this
7519              
7520             # do not store in hybrid testing mode
7521             if ( !$ci_comments_only );
7522              
7523             # Remember last nonblank, non-comment token info for the next pass
7524 35011         43667 $ci_last = $ci_this;
7525 35011         46307 $last_token = $token;
7526 35011         52902 $last_type = $type;
7527              
7528             } ## End main loop over tokens
7529              
7530             #----------------------
7531             # Post-loop operations:
7532             #----------------------
7533              
7534             # if the logfile is saved, we need to save the leading ci of
7535             # each old line of code.
7536 552 100       3597 if ( $self->[_save_logfile_] ) {
7537 2         5 foreach my $line_of_tokens ( @{$rlines} ) {
  2         6  
7538 10         16 my $line_type = $line_of_tokens->{_line_type};
7539 10 100       24 next if ( $line_type ne 'CODE' );
7540 7         11 my ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
  7         18  
7541 7 100       16 next if ( !defined($Kfirst) );
7542 6         29 $line_of_tokens->{_ci_level_0} = $rLL->[$Kfirst]->[_CI_LEVEL_];
7543             }
7544             }
7545              
7546 552         1208 if (DEBUG_SET_CI) {
7547             my @output_lines;
7548             foreach my $KK ( 0 .. $Klimit ) {
7549             my $line = $debug_lines[$KK];
7550             if ($line) {
7551             my $Kp = $self->K_previous_code($KK);
7552             my $Kn = $self->K_next_code($KK);
7553             if ( DEBUG_SET_CI > 1
7554             || $Kp && $saw_ci_diff{$Kp}
7555             || $saw_ci_diff{$KK}
7556             || $Kn && $saw_ci_diff{$Kn} )
7557             {
7558             push @output_lines, $line;
7559             }
7560             }
7561             }
7562             if (@output_lines) {
7563             unshift @output_lines, <<EOM;
7564             lno\tci\tci_this\tci_next\tlast_type\tlast_tok\ttype\ttok\tseqno\tlevel\tpname\tblock_type\terror?
7565             EOM
7566             foreach my $line (@output_lines) {
7567             chomp $line;
7568             print STDERR $line, "\n";
7569             }
7570             }
7571             }
7572              
7573 552         14809 return;
7574             } ## end sub set_ci
7575              
7576             sub set_CODE_type {
7577 555     555 0 1707 my ($self) = @_;
7578              
7579             # Examine each line of code and set a flag '$CODE_type' to describe it.
7580             # Also return a list of lines with side comments.
7581              
7582 555         1579 my $rLL = $self->[_rLL_];
7583 555         1344 my $rlines = $self->[_rlines_];
7584              
7585 555         1450 my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
7586 555         1350 my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
7587             my $rOpts_static_block_comment_prefix =
7588 555         2871 $rOpts->{'static-block-comment-prefix'};
7589              
7590             # Remember indexes of lines with side comments
7591 555         1265 my @ix_side_comments;
7592              
7593 555         1277 my $In_format_skipping_section = 0;
7594 555         1243 my $Saw_VERSION_in_this_file = 0;
7595 555         1032 my $has_side_comment = 0;
7596 555         1099 my $last_line_had_side_comment = 0;
7597 555         1553 my ( $Kfirst, $Klast );
7598 555         0 my $CODE_type;
7599              
7600             # Loop to set CODE_type
7601              
7602             # Possible CODE_types
7603             # 'VB' = Verbatim - line goes out verbatim (a quote)
7604             # 'FS' = Format Skipping - line goes out verbatim
7605             # 'BL' = Blank Line
7606             # 'HSC' = Hanging Side Comment - fix this hanging side comment
7607             # 'SBCX'= Static Block Comment Without Leading Space
7608             # 'SBC' = Static Block Comment
7609             # 'BC' = Block Comment - an ordinary full line comment
7610             # 'IO' = Indent Only - line goes out unchanged except for indentation
7611             # 'NIN' = No Internal Newlines - line does not get broken
7612             # 'VER' = VERSION statement
7613             # '' = ordinary line of code with no restrictions
7614              
7615 555         1110 my $ix_line = -1;
7616 555         1140 foreach my $line_of_tokens ( @{$rlines} ) {
  555         1660  
7617 7647         9792 $ix_line++;
7618 7647         13746 my $line_type = $line_of_tokens->{_line_type};
7619              
7620 7647         10092 my $last_CODE_type = $CODE_type;
7621 7647         10632 $CODE_type = EMPTY_STRING;
7622              
7623 7647 100       13899 if ( $line_type ne 'CODE' ) {
7624 169         278 next;
7625             }
7626              
7627 7478         12843 my $input_line = $line_of_tokens->{_line_text};
7628              
7629 7478         9998 my $Klast_prev = $Klast;
7630 7478         9494 ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
  7478         15406  
7631 7478 100       14131 my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
7632              
7633 7478         9589 my $is_block_comment;
7634 7478 100 100     25366 if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
7635 1114 100       2689 if ( $jmax == 0 ) { $is_block_comment = 1; }
  786         1508  
7636 328         1020 else { $has_side_comment = 1 }
7637             }
7638              
7639             # Write line verbatim if we are in a formatting skip section
7640 7478 100       13371 if ($In_format_skipping_section) {
7641              
7642             # Note: extra space appended to comment simplifies pattern matching
7643 57 100 66     460 if (
    50 66        
      33        
      0        
      33        
      0        
      0        
7644             $is_block_comment
7645              
7646             # optional fast pre-check
7647             && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
7648             || $rOpts_format_skipping_end )
7649              
7650             && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
7651             /$format_skipping_pattern_end/
7652             )
7653             {
7654 13         48 $In_format_skipping_section = 0;
7655 13         36 my $input_line_no = $line_of_tokens->{_line_number};
7656 13         64 write_logfile_entry(
7657             "Line $input_line_no: Exiting format-skipping section\n");
7658             }
7659             elsif (
7660             $is_block_comment
7661              
7662             # optional fast pre-check
7663             && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
7664             || $rOpts_format_skipping_begin )
7665              
7666             && $rOpts_format_skipping
7667             && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
7668             /$format_skipping_pattern_begin/
7669             )
7670             {
7671             # warn of duplicate starting comment lines, git #118
7672 0         0 my $input_line_no = $line_of_tokens->{_line_number};
7673 0         0 warning(
7674             "Already in format-skipping section which started at line $In_format_skipping_section\n",
7675             $input_line_no
7676             );
7677             }
7678 57         100 $CODE_type = 'FS';
7679 57         92 next;
7680             }
7681              
7682             # Check for a continued quote..
7683 7421 100       13953 if ( $line_of_tokens->{_starting_in_quote} ) {
7684              
7685             # A line which is entirely a quote or pattern must go out
7686             # verbatim. Note: the \n is contained in $input_line.
7687 47 100       160 if ( $jmax <= 0 ) {
7688 28 50 33     96 if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) {
7689 0         0 my $input_line_number = $line_of_tokens->{_line_number};
7690 0         0 $self->note_embedded_tab($input_line_number);
7691             }
7692 28         48 $CODE_type = 'VB';
7693 28         48 next;
7694             }
7695             }
7696              
7697             # See if we are entering a formatting skip section
7698 7393 100 100     16828 if (
      100        
      100        
      100        
7699             $is_block_comment
7700              
7701             # optional fast pre-check
7702             && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
7703             || $rOpts_format_skipping_begin )
7704              
7705             && $rOpts_format_skipping
7706             && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
7707             /$format_skipping_pattern_begin/
7708             )
7709             {
7710 13         54 my $input_line_no = $line_of_tokens->{_line_number};
7711 13         41 $In_format_skipping_section = $input_line_no;
7712 13         65 write_logfile_entry(
7713             "Line $input_line_no: Entering format-skipping section\n");
7714 13         35 $CODE_type = 'FS';
7715 13         56 next;
7716             }
7717              
7718             # ignore trailing blank tokens (they will get deleted later)
7719 7380 100 100     21063 if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
7720 145         294 $jmax--;
7721             }
7722              
7723             # blank line..
7724 7380 100       13173 if ( $jmax < 0 ) {
7725 801         1671 $CODE_type = 'BL';
7726 801         1513 next;
7727             }
7728              
7729             # Handle comments
7730 6579 100       11441 if ($is_block_comment) {
7731              
7732             # see if this is a static block comment (starts with ## by default)
7733 760         1354 my $is_static_block_comment = 0;
7734 760         1679 my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
7735 760 100 100     4019 if (
      100        
      100        
7736              
7737             # optional fast pre-check
7738             (
7739             substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
7740             || $rOpts_static_block_comment_prefix
7741             )
7742              
7743             && $rOpts_static_block_comments
7744             && $input_line =~ /$static_block_comment_pattern/
7745             )
7746             {
7747 21         54 $is_static_block_comment = 1;
7748             }
7749              
7750             # Check for comments which are line directives
7751             # Treat exactly as static block comments without leading space
7752             # reference: perlsyn, near end, section Plain Old Comments (Not!)
7753             # example: '# line 42 "new_filename.plx"'
7754 760 100 100     3741 if (
7755             $no_leading_space
7756             && $input_line =~ /^\# \s*
7757             line \s+ (\d+) \s*
7758             (?:\s("?)([^"]+)\2)? \s*
7759             $/x
7760             )
7761             {
7762 2         7 $is_static_block_comment = 1;
7763             }
7764              
7765             # look for hanging side comment ...
7766 760 100 100     2416 if (
      66        
7767             $last_line_had_side_comment # this follows as side comment
7768             && !$no_leading_space # with some leading space, and
7769             && !$is_static_block_comment # this is not a static comment
7770             )
7771             {
7772              
7773             # continuing an existing HSC chain?
7774 61 100 33     646 if ( $last_CODE_type eq 'HSC' ) {
    100 66        
      33        
      33        
7775 26         65 $has_side_comment = 1;
7776 26         47 $CODE_type = 'HSC';
7777 26         51 next;
7778             }
7779              
7780             # starting a new HSC chain?
7781             elsif (
7782              
7783             $rOpts->{'hanging-side-comments'} # user is allowing
7784             # hanging side comments
7785             # like this
7786              
7787             && ( defined($Klast_prev) && $Klast_prev > 1 )
7788              
7789             # and the previous side comment was not static (issue c070)
7790             && !(
7791             $rOpts->{'static-side-comments'}
7792             && $rLL->[$Klast_prev]->[_TOKEN_] =~
7793             /$static_side_comment_pattern/
7794             )
7795              
7796             )
7797             {
7798              
7799             # and it is not a closing side comment (issue c070).
7800 33         88 my $K_penult = $Klast_prev - 1;
7801 33 100       132 $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
7802 33   66     246 my $follows_csc =
7803             ( $rLL->[$K_penult]->[_TOKEN_] eq '}'
7804             && $rLL->[$K_penult]->[_TYPE_] eq '}'
7805             && $rLL->[$Klast_prev]->[_TOKEN_] =~
7806             /$closing_side_comment_prefix_pattern/ );
7807              
7808 33 50       108 if ( !$follows_csc ) {
7809 33         94 $has_side_comment = 1;
7810 33         93 $CODE_type = 'HSC';
7811 33         70 next;
7812             }
7813             }
7814             }
7815              
7816 701 100 66     2815 if ($is_static_block_comment) {
    50 33        
7817 23 100       90 $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
7818 23         51 next;
7819             }
7820             elsif ($last_line_had_side_comment
7821             && !$rOpts_maximum_consecutive_blank_lines
7822             && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
7823             {
7824             # Emergency fix to keep a block comment from becoming a hanging
7825             # side comment. This fix is for the case that blank lines
7826             # cannot be inserted. There is related code in sub
7827             # 'process_line_of_CODE'
7828 0         0 $CODE_type = 'SBCX';
7829 0         0 next;
7830             }
7831             else {
7832 678         1262 $CODE_type = 'BC';
7833 678         1426 next;
7834             }
7835             }
7836              
7837             # End of comments. Handle a line of normal code:
7838              
7839 5819 100       10232 if ($rOpts_indent_only) {
7840 12         24 $CODE_type = 'IO';
7841 12         20 next;
7842             }
7843              
7844 5807 100       10178 if ( !$rOpts_add_newlines ) {
7845 64         92 $CODE_type = 'NIN';
7846 64         103 next;
7847             }
7848              
7849             # Patch needed for MakeMaker. Do not break a statement
7850             # in which $VERSION may be calculated. See MakeMaker.pm;
7851             # this is based on the coding in it.
7852             # The first line of a file that matches this will be eval'd:
7853             # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
7854             # Examples:
7855             # *VERSION = \'1.01';
7856             # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
7857             # We will pass such a line straight through without breaking
7858             # it unless -npvl is used.
7859              
7860             # Patch for problem reported in RT #81866, where files
7861             # had been flattened into a single line and couldn't be
7862             # tidied without -npvl. There are two parts to this patch:
7863             # First, it is not done for a really long line (80 tokens for now).
7864             # Second, we will only allow up to one semicolon
7865             # before the VERSION. We need to allow at least one semicolon
7866             # for statements like this:
7867             # require Exporter; our $VERSION = $Exporter::VERSION;
7868             # where both statements must be on a single line for MakeMaker
7869              
7870 5743 100 66     27744 if ( !$Saw_VERSION_in_this_file
      100        
7871             && $jmax < 80
7872             && $input_line =~
7873             /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
7874             {
7875 4         14 $Saw_VERSION_in_this_file = 1;
7876 4         15 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
7877              
7878             # This code type has lower priority than others
7879 4         10 $CODE_type = 'VER';
7880 4         21 next;
7881             }
7882             }
7883             continue {
7884 7647         14752 $line_of_tokens->{_code_type} = $CODE_type;
7885              
7886 7647         10078 $last_line_had_side_comment = $has_side_comment;
7887 7647 100       15972 if ($has_side_comment) {
7888 387         971 push @ix_side_comments, $ix_line;
7889 387         803 $has_side_comment = 0;
7890             }
7891             }
7892              
7893 555         3442 return \@ix_side_comments;
7894             } ## end sub set_CODE_type
7895              
7896             sub find_non_indenting_braces {
7897              
7898 555     555 0 1895 my ( $self, $rix_side_comments ) = @_;
7899              
7900             # Find and mark all non-indenting braces in this file.
7901              
7902             # Given:
7903             # $rix_side_comments = index of lines which have side comments
7904             # Find and save the line indexes of these special side comments in:
7905             # $self->[_rseqno_non_indenting_brace_by_ix_];
7906              
7907             # Non-indenting braces are opening braces of the form
7908             # { #<<< ...
7909             # which do not cause an increase in indentation level.
7910             # They are enabled with the --non-indenting-braces, or -nib, flag.
7911              
7912 555 100       2210 return unless ( $rOpts->{'non-indenting-braces'} );
7913 554         1495 my $rLL = $self->[_rLL_];
7914 554 100 66     2193 return unless ( defined($rLL) && @{$rLL} );
  554         1952  
7915 551         1379 my $rlines = $self->[_rlines_];
7916 551         1320 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
7917 551         1429 my $rseqno_non_indenting_brace_by_ix =
7918             $self->[_rseqno_non_indenting_brace_by_ix_];
7919              
7920 551         1228 foreach my $ix ( @{$rix_side_comments} ) {
  551         1742  
7921 381         681 my $line_of_tokens = $rlines->[$ix];
7922 381         722 my $line_type = $line_of_tokens->{_line_type};
7923 381 50       971 if ( $line_type ne 'CODE' ) {
7924              
7925             # shouldn't happen
7926 0         0 DEVEL_MODE && Fault("unexpected line_type=$line_type\n");
7927 0         0 next;
7928             }
7929 381         704 my $rK_range = $line_of_tokens->{_rK_range};
7930 381         587 my ( $Kfirst, $Klast ) = @{$rK_range};
  381         847  
7931 381 50 33     1736 unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
7932              
7933             # shouldn't happen
7934 0         0 DEVEL_MODE && Fault("did not get a comment\n");
7935 0         0 next;
7936             }
7937 381 100       985 next unless ( $Klast > $Kfirst ); # maybe HSC
7938 322         617 my $token_sc = $rLL->[$Klast]->[_TOKEN_];
7939 322         600 my $K_m = $Klast - 1;
7940 322         657 my $type_m = $rLL->[$K_m]->[_TYPE_];
7941 322 100 66     1351 if ( $type_m eq 'b' && $K_m > $Kfirst ) {
7942 316         560 $K_m--;
7943 316         602 $type_m = $rLL->[$K_m]->[_TYPE_];
7944             }
7945 322         606 my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
7946 322 100       1268 if ($seqno_m) {
7947 111         241 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
7948              
7949             # The pattern ends in \s but we have removed the newline, so
7950             # we added it back for the match. That way we require an exact
7951             # match to the special string and also allow additional text.
7952 111         220 $token_sc .= "\n";
7953 111 100 100     1252 if ( $block_type_m
      100        
7954             && $is_opening_type{$type_m}
7955             && $token_sc =~ /$non_indenting_brace_pattern/ )
7956             {
7957 6         27 $rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m;
7958             }
7959             }
7960             }
7961 551         1381 return;
7962             } ## end sub find_non_indenting_braces
7963              
7964             sub delete_side_comments {
7965 10     10 0 51 my ( $self, $rix_side_comments ) = @_;
7966              
7967             # Given a list of indexes of lines with side comments, handle any
7968             # requested side comment deletions.
7969              
7970 10         30 my $rLL = $self->[_rLL_];
7971 10         27 my $rlines = $self->[_rlines_];
7972 10         25 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
7973 10         30 my $rseqno_non_indenting_brace_by_ix =
7974             $self->[_rseqno_non_indenting_brace_by_ix_];
7975              
7976 10         34 foreach my $ix ( @{$rix_side_comments} ) {
  10         35  
7977 23         41 my $line_of_tokens = $rlines->[$ix];
7978 23         40 my $line_type = $line_of_tokens->{_line_type};
7979              
7980             # This fault shouldn't happen because we only saved CODE lines with
7981             # side comments in the TASK 1 loop above.
7982 23 50       62 if ( $line_type ne 'CODE' ) {
7983 0         0 if (DEVEL_MODE) {
7984             my $lno = $ix + 1;
7985             Fault(<<EOM);
7986             Hit unexpected line_type = '$line_type' near line $lno while deleting side comments, should be 'CODE'
7987             EOM
7988             }
7989 0         0 next;
7990             }
7991              
7992 23         39 my $CODE_type = $line_of_tokens->{_code_type};
7993 23         41 my $rK_range = $line_of_tokens->{_rK_range};
7994 23         40 my ( $Kfirst, $Klast ) = @{$rK_range};
  23         48  
7995              
7996 23 50 33     115 unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
7997 0         0 if (DEVEL_MODE) {
7998             my $lno = $ix + 1;
7999             Fault(<<EOM);
8000             Did not find side comment near line $lno while deleting side comments
8001             EOM
8002             }
8003 0         0 next;
8004             }
8005              
8006 23   33     186 my $delete_side_comment =
8007             $rOpts_delete_side_comments
8008             && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
8009             && (!$CODE_type
8010             || $CODE_type eq 'HSC'
8011             || $CODE_type eq 'IO'
8012             || $CODE_type eq 'NIN' );
8013              
8014             # Do not delete special control side comments
8015 23 50       68 if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) {
8016 0         0 $delete_side_comment = 0;
8017             }
8018              
8019 23 0 66     93 if (
      66        
      0        
      33        
8020             $rOpts_delete_closing_side_comments
8021             && !$delete_side_comment
8022             && $Klast > $Kfirst
8023             && ( !$CODE_type
8024             || $CODE_type eq 'HSC'
8025             || $CODE_type eq 'IO'
8026             || $CODE_type eq 'NIN' )
8027             )
8028             {
8029 3         10 my $token = $rLL->[$Klast]->[_TOKEN_];
8030 3         12 my $K_m = $Klast - 1;
8031 3         8 my $type_m = $rLL->[$K_m]->[_TYPE_];
8032 3 50 33     35 if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
  3         8  
8033 3         9 my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
8034 3 100       14 if ($seqno_m) {
8035 2         5 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
8036 2 50 33     129 if ( $block_type_m
      33        
8037             && $token =~ /$closing_side_comment_prefix_pattern/
8038             && $block_type_m =~ /$closing_side_comment_list_pattern/ )
8039             {
8040 2         8 $delete_side_comment = 1;
8041             }
8042             }
8043             } ## end if ( $rOpts_delete_closing_side_comments...)
8044              
8045 23 100       55 if ($delete_side_comment) {
8046              
8047             # We are actually just changing the side comment to a blank.
8048             # This may produce multiple blanks in a row, but sub respace_tokens
8049             # will check for this and fix it.
8050 22         55 $rLL->[$Klast]->[_TYPE_] = 'b';
8051 22         39 $rLL->[$Klast]->[_TOKEN_] = SPACE;
8052              
8053             # The -io option outputs the line text, so we have to update
8054             # the line text so that the comment does not reappear.
8055 22 100       53 if ( $CODE_type eq 'IO' ) {
8056 2         4 my $line = EMPTY_STRING;
8057 2         7 foreach my $KK ( $Kfirst .. $Klast - 1 ) {
8058 18         30 $line .= $rLL->[$KK]->[_TOKEN_];
8059             }
8060 2         14 $line =~ s/\s+$//;
8061 2         10 $line_of_tokens->{_line_text} = $line . "\n";
8062             }
8063              
8064             # If we delete a hanging side comment the line becomes blank.
8065 22 100       64 if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
  5         13  
8066             }
8067             }
8068 10         34 return;
8069             } ## end sub delete_side_comments
8070              
8071             sub dump_verbatim {
8072 0     0 0 0 my $self = shift;
8073              
8074             # Dump the input file to the output verbatim. This is called when
8075             # there is a severe error and formatted output cannot be made.
8076 0         0 my $rlines = $self->[_rlines_];
8077 0         0 foreach my $line ( @{$rlines} ) {
  0         0  
8078 0         0 my $input_line = $line->{_line_text};
8079 0         0 $self->write_unindented_line($input_line);
8080             }
8081 0         0 return;
8082             } ## end sub dump_verbatim
8083              
8084             my %wU;
8085             my %wiq;
8086             my %is_wit;
8087             my %is_sigil;
8088             my %is_nonlist_keyword;
8089             my %is_nonlist_type;
8090             my %is_s_y_m_slash;
8091             my %is_unexpected_equals;
8092              
8093             BEGIN {
8094              
8095             # added 'U' to fix cases b1125 b1126 b1127
8096 38     38   288 my @q = qw(w U);
8097 38         187 @{wU}{@q} = (1) x scalar(@q);
8098              
8099 38         276 @q = qw(w i q Q G C Z);
8100 38         198 @{wiq}{@q} = (1) x scalar(@q);
8101              
8102 38         121 @q = qw(w i t);
8103 38         147 @{is_wit}{@q} = (1) x scalar(@q);
8104              
8105 38         132 @q = qw($ & % * @);
8106 38         178 @{is_sigil}{@q} = (1) x scalar(@q);
8107              
8108             # Parens following these keywords will not be marked as lists. Note that
8109             # 'for' is not included and is handled separately, by including 'f' in the
8110             # hash %is_counted_type, since it may or may not be a c-style for loop.
8111 38         131 @q = qw( if elsif unless and or );
8112 38         148 @is_nonlist_keyword{@q} = (1) x scalar(@q);
8113              
8114             # Parens following these types will not be marked as lists
8115 38         93 @q = qw( && || );
8116 38         150 @is_nonlist_type{@q} = (1) x scalar(@q);
8117              
8118 38         123 @q = qw( s y m / );
8119 38         185 @is_s_y_m_slash{@q} = (1) x scalar(@q);
8120              
8121 38         137 @q = qw( = == != );
8122 38         535342 @is_unexpected_equals{@q} = (1) x scalar(@q);
8123              
8124             } ## end BEGIN
8125              
8126             { #<<< begin closure respace_tokens
8127              
8128             my $rLL_new; # This will be the new array of tokens
8129              
8130             # These are variables in $self
8131             my $rLL;
8132             my $length_function;
8133             my $is_encoded_data;
8134              
8135             my $K_closing_ternary;
8136             my $K_opening_ternary;
8137             my $rchildren_of_seqno;
8138             my $rhas_broken_code_block;
8139             my $rhas_broken_list;
8140             my $rhas_broken_list_with_lec;
8141             my $rhas_code_block;
8142             my $rhas_list;
8143             my $rhas_ternary;
8144             my $ris_assigned_structure;
8145             my $ris_broken_container;
8146             my $ris_excluded_lp_container;
8147             my $ris_list_by_seqno;
8148             my $ris_permanently_broken;
8149             my $rlec_count_by_seqno;
8150             my $roverride_cab3;
8151             my $rparent_of_seqno;
8152             my $rtype_count_by_seqno;
8153             my $rblock_type_of_seqno;
8154              
8155             my $K_opening_container;
8156             my $K_closing_container;
8157              
8158             my %K_first_here_doc_by_seqno;
8159              
8160             my $last_nonblank_code_type;
8161             my $last_nonblank_code_token;
8162             my $last_nonblank_block_type;
8163             my $last_last_nonblank_code_type;
8164             my $last_last_nonblank_code_token;
8165              
8166             my %seqno_stack;
8167             my %K_old_opening_by_seqno;
8168             my $depth_next;
8169             my $depth_next_max;
8170              
8171             my $cumulative_length;
8172              
8173             # Variables holding the current line info
8174             my $Ktoken_vars;
8175             my $Kfirst_old;
8176             my $Klast_old;
8177             my $Klast_old_code;
8178             my $CODE_type;
8179              
8180             my $rwhitespace_flags;
8181              
8182             sub initialize_respace_tokens_closure {
8183              
8184 552     552 0 1676 my ($self) = @_;
8185              
8186 552         1687 $rLL_new = []; # This is the new array
8187              
8188 552         7625 $rLL = $self->[_rLL_];
8189 552         3836 $length_function = $self->[_length_function_];
8190 552         1691 $is_encoded_data = $self->[_is_encoded_data_];
8191              
8192 552         1600 $K_closing_ternary = $self->[_K_closing_ternary_];
8193 552         1731 $K_opening_ternary = $self->[_K_opening_ternary_];
8194 552         4085 $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
8195 552         1415 $rhas_broken_code_block = $self->[_rhas_broken_code_block_];
8196 552         1507 $rhas_broken_list = $self->[_rhas_broken_list_];
8197 552         1382 $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
8198 552         1394 $rhas_code_block = $self->[_rhas_code_block_];
8199 552         1455 $rhas_list = $self->[_rhas_list_];
8200 552         1471 $rhas_ternary = $self->[_rhas_ternary_];
8201 552         1568 $ris_assigned_structure = $self->[_ris_assigned_structure_];
8202 552         2161 $ris_broken_container = $self->[_ris_broken_container_];
8203 552         1315 $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
8204 552         1787 $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
8205 552         1427 $ris_permanently_broken = $self->[_ris_permanently_broken_];
8206 552         1412 $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
8207 552         1356 $roverride_cab3 = $self->[_roverride_cab3_];
8208 552         1228 $rparent_of_seqno = $self->[_rparent_of_seqno_];
8209 552         2552 $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
8210 552         1238 $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8211              
8212 552         1476 %K_first_here_doc_by_seqno = ();
8213              
8214 552         1389 $last_nonblank_code_type = ';';
8215 552         1252 $last_nonblank_code_token = ';';
8216 552         1152 $last_nonblank_block_type = EMPTY_STRING;
8217 552         1234 $last_last_nonblank_code_type = ';';
8218 552         1156 $last_last_nonblank_code_token = ';';
8219              
8220 552         1924 %seqno_stack = ();
8221 552         2158 %K_old_opening_by_seqno = (); # Note: old K index
8222 552         1279 $depth_next = 0;
8223 552         1142 $depth_next_max = 0;
8224              
8225             # we will be setting token lengths as we go
8226 552         1156 $cumulative_length = 0;
8227              
8228 552         1168 $Ktoken_vars = undef; # the old K value of $rtoken_vars
8229 552         1047 $Kfirst_old = undef; # min K of old line
8230 552         1027 $Klast_old = undef; # max K of old line
8231 552         1058 $Klast_old_code = undef; # K of last token if side comment
8232 552         1102 $CODE_type = EMPTY_STRING;
8233              
8234             # Set the whitespace flags, which indicate the token spacing preference.
8235 552         4537 $rwhitespace_flags = $self->set_whitespace_flags();
8236              
8237             # Note that $K_opening_container and $K_closing_container have values
8238             # defined in sub get_line() for the previous K indexes. They were needed
8239             # in case option 'indent-only' was set, and we didn't get here. We no
8240             # longer need those and will eliminate them now to avoid any possible
8241             # mixing of old and new values. This must be done AFTER the call to
8242             # set_whitespace_flags, which needs these.
8243 552         3550 $K_opening_container = $self->[_K_opening_container_] = {};
8244 552         2974 $K_closing_container = $self->[_K_closing_container_] = {};
8245              
8246 552         1313 return;
8247              
8248             } ## end sub initialize_respace_tokens_closure
8249              
8250             sub respace_tokens {
8251              
8252 555     555 0 1344 my $self = shift;
8253              
8254             #--------------------------------------------------------------------------
8255             # This routine is called once per file to do as much formatting as possible
8256             # before new line breaks are set.
8257             #--------------------------------------------------------------------------
8258              
8259             # Return parameters:
8260             # Set $severe_error=true if processing must terminate immediately
8261 555         2749 my ( $severe_error, $rqw_lines );
8262              
8263             # We change any spaces in --indent-only mode
8264 555 100       2303 if ( $rOpts->{'indent-only'} ) {
8265              
8266             # We need to define lengths for -indent-only to avoid undefs, even
8267             # though these values are not actually needed for option --indent-only.
8268              
8269 3         20 $rLL = $self->[_rLL_];
8270 3         20 $length_function = $self->[_length_function_];
8271 3         12 $cumulative_length = 0;
8272              
8273 3         7 foreach my $item ( @{$rLL} ) {
  3         10  
8274 122         171 my $token = $item->[_TOKEN_];
8275 122         201 my $token_length = $length_function->($token);
8276 122         154 $cumulative_length += $token_length;
8277 122         159 $item->[_TOKEN_LENGTH_] = $token_length;
8278 122         178 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
8279             }
8280              
8281 3         18 return ( $severe_error, $rqw_lines );
8282             }
8283              
8284             # This routine makes all necessary and possible changes to the tokenization
8285             # after the initial tokenization of the file. This is a tedious routine,
8286             # but basically it consists of inserting and deleting whitespace between
8287             # nonblank tokens according to the selected parameters. In a few cases
8288             # non-space characters are added, deleted or modified.
8289              
8290             # The goal of this routine is to create a new token array which only needs
8291             # the definition of new line breaks and padding to complete formatting. In
8292             # a few cases we have to cheat a little to achieve this goal. In
8293             # particular, we may not know if a semicolon will be needed, because it
8294             # depends on how the line breaks go. To handle this, we include the
8295             # semicolon as a 'phantom' which can be displayed as normal or as an empty
8296             # string.
8297              
8298             # Method: The old tokens are copied one-by-one, with changes, from the old
8299             # linear storage array $rLL to a new array $rLL_new.
8300              
8301             # (re-)initialize closure variables for this problem
8302 552         2832 $self->initialize_respace_tokens_closure();
8303              
8304             #--------------------------------
8305             # Main over all lines of the file
8306             #--------------------------------
8307 552         1636 my $rlines = $self->[_rlines_];
8308 552         1547 my $line_type = EMPTY_STRING;
8309 552         1175 my $last_K_out;
8310              
8311 552         1326 foreach my $line_of_tokens ( @{$rlines} ) {
  552         1862  
8312              
8313 7628         14935 my $input_line_number = $line_of_tokens->{_line_number};
8314 7628         11634 my $last_line_type = $line_type;
8315 7628         13092 $line_type = $line_of_tokens->{_line_type};
8316 7628 100       15541 next unless ( $line_type eq 'CODE' );
8317 7459         12650 $CODE_type = $line_of_tokens->{_code_type};
8318              
8319 7459 100       14572 if ( $CODE_type eq 'BL' ) {
8320 803         1894 my $seqno = $seqno_stack{ $depth_next - 1 };
8321 803 100       2067 if ( defined($seqno) ) {
8322 79         257 $self->[_rblank_and_comment_count_]->{$seqno} += 1;
8323             $self->set_permanently_broken($seqno)
8324 79 100 66     496 if (!$ris_permanently_broken->{$seqno}
8325             && $rOpts_maximum_consecutive_blank_lines );
8326             }
8327             }
8328              
8329 7459         11801 my $rK_range = $line_of_tokens->{_rK_range};
8330 7459         11888 my ( $Kfirst, $Klast ) = @{$rK_range};
  7459         14531  
8331 7459 100       15674 next unless defined($Kfirst);
8332 6656         12251 ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
8333 6656         9585 $Klast_old_code = $Klast_old;
8334              
8335             # Be sure an old K value is defined for sub store_token
8336 6656         8972 $Ktoken_vars = $Kfirst;
8337              
8338             # Check for correct sequence of token indexes...
8339             # An error here means that sub write_line() did not correctly
8340             # package the tokenized lines as it received them. If we
8341             # get a fault here it has not output a continuous sequence
8342             # of K values. Or a line of CODE may have been mis-marked as
8343             # something else. There is no good way to continue after such an
8344             # error.
8345 6656 100       12075 if ( defined($last_K_out) ) {
8346 6107 50       13844 if ( $Kfirst != $last_K_out + 1 ) {
8347 0         0 Fault_Warn(
8348             "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
8349             );
8350 0         0 $severe_error = 1;
8351 0         0 return ( $severe_error, $rqw_lines );
8352             }
8353             }
8354             else {
8355              
8356             # The first token should always have been given index 0 by sub
8357             # write_line()
8358 549 50       1897 if ( $Kfirst != 0 ) {
8359 0         0 Fault("Program Bug: first K is $Kfirst but should be 0");
8360             }
8361             }
8362 6656         9336 $last_K_out = $Klast;
8363              
8364             # Handle special lines of code
8365 6656 100 100     18069 if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
      100        
8366              
8367             # CODE_types are as follows.
8368             # 'BL' = Blank Line
8369             # 'VB' = Verbatim - line goes out verbatim
8370             # 'FS' = Format Skipping - line goes out verbatim, no blanks
8371             # 'IO' = Indent Only - only indentation may be changed
8372             # 'NIN' = No Internal Newlines - line does not get broken
8373             # 'HSC'=Hanging Side Comment - fix this hanging side comment
8374             # 'BC'=Block Comment - an ordinary full line comment
8375             # 'SBC'=Static Block Comment - a block comment which does not get
8376             # indented
8377             # 'SBCX'=Static Block Comment Without Leading Space
8378             # 'VER'=VERSION statement
8379             # '' or (undefined) - no restrictions
8380              
8381             # For a hanging side comment we insert an empty quote before
8382             # the comment so that it becomes a normal side comment and
8383             # will be aligned by the vertical aligner
8384 849 100       2254 if ( $CODE_type eq 'HSC' ) {
8385              
8386             # Safety Check: This must be a line with one token (a comment)
8387 54         136 my $rvars_Kfirst = $rLL->[$Kfirst];
8388 54 50 33     317 if ( $Kfirst == $Klast && $rvars_Kfirst->[_TYPE_] eq '#' ) {
8389              
8390             # Note that even if the flag 'noadd-whitespace' is set, we
8391             # will make an exception here and allow a blank to be
8392             # inserted to push the comment to the right. We can think
8393             # of this as an adjustment of indentation rather than
8394             # whitespace between tokens. This will also prevent the
8395             # hanging side comment from getting converted to a block
8396             # comment if whitespace gets deleted, as for example with
8397             # the -extrude and -mangle options.
8398 54         186 my $rcopy =
8399             copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING );
8400 54         177 $self->store_token($rcopy);
8401 54         178 $rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE );
8402 54         183 $self->store_token($rcopy);
8403 54         196 $self->store_token($rvars_Kfirst);
8404 54         145 next;
8405             }
8406             else {
8407              
8408             # This line was mis-marked by sub scan_comment. Catch in
8409             # DEVEL_MODE, otherwise try to repair and keep going.
8410 0         0 Fault(
8411             "Program bug. A hanging side comment has been mismarked"
8412             ) if (DEVEL_MODE);
8413              
8414 0         0 $CODE_type = EMPTY_STRING;
8415 0         0 $line_of_tokens->{_code_type} = $CODE_type;
8416             }
8417             }
8418              
8419             # Copy tokens unchanged
8420 795         2176 foreach my $KK ( $Kfirst .. $Klast ) {
8421 1249         1955 $Ktoken_vars = $KK;
8422 1249         3674 $self->store_token( $rLL->[$KK] );
8423             }
8424 795         1676 next;
8425             }
8426              
8427             # Handle normal line..
8428              
8429             # Define index of last token before any side comment for comma counts
8430 5807         12260 my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
8431 5807 100 100     22650 if ( ( $type_end eq '#' || $type_end eq 'b' )
      66        
8432             && $Klast_old_code > $Kfirst_old )
8433             {
8434 470         824 $Klast_old_code--;
8435 470 100 66     2154 if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
8436             && $Klast_old_code > $Kfirst_old )
8437             {
8438 319         583 $Klast_old_code--;
8439             }
8440             }
8441              
8442             # Insert any essential whitespace between lines
8443             # if last line was normal CODE.
8444             # Patch for rt #125012: use K_previous_code rather than '_nonblank'
8445             # because comments may disappear.
8446             # Note that we must do this even if --noadd-whitespace is set
8447 5807 100       12164 if ( $last_line_type eq 'CODE' ) {
8448 5500         10141 my $type_next = $rLL->[$Kfirst]->[_TYPE_];
8449 5500         9159 my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
8450 5500 100       13727 if (
8451             is_essential_whitespace(
8452             $last_last_nonblank_code_token,
8453             $last_last_nonblank_code_type,
8454             $last_nonblank_code_token,
8455             $last_nonblank_code_type,
8456             $token_next,
8457             $type_next,
8458             )
8459             )
8460             {
8461 127         426 $self->store_token();
8462             }
8463             }
8464              
8465             #-----------------------------------------------
8466             # Inner loop to respace tokens on a line of code
8467             #-----------------------------------------------
8468              
8469             # The inner loop is in a separate sub for clarity
8470 5807         14702 $self->respace_tokens_inner_loop( $Kfirst, $Klast, $input_line_number );
8471              
8472             } # End line loop
8473              
8474             # finalize data structures
8475 552         4681 $self->respace_post_loop_ops();
8476              
8477             # Reset memory to be the new array
8478 552         1314 $self->[_rLL_] = $rLL_new;
8479 552         1165 my $Klimit;
8480 552 100       1184 if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
  552         1922  
  549         1069  
  549         1368  
8481 552         1259 $self->[_Klimit_] = $Klimit;
8482              
8483             # During development, verify that the new array still looks okay.
8484 552         969 DEVEL_MODE && $self->check_token_array();
8485              
8486             # update the token limits of each line
8487 552         3469 ( $severe_error, $rqw_lines ) = $self->resync_lines_and_tokens();
8488              
8489 552         2313 return ( $severe_error, $rqw_lines );
8490             } ## end sub respace_tokens
8491              
8492             sub respace_tokens_inner_loop {
8493              
8494 5807     5807 0 11701 my ( $self, $Kfirst, $Klast, $input_line_number ) = @_;
8495              
8496             #-----------------------------------------------------------------
8497             # Loop to copy all tokens on one line, making any spacing changes,
8498             # while also collecting information needed by later subs.
8499             #-----------------------------------------------------------------
8500 5807         13136 foreach my $KK ( $Kfirst .. $Klast ) {
8501              
8502             # TODO: consider eliminating this closure var by passing directly to
8503             # store_token following pattern of store_token_to_go.
8504 49886         65860 $Ktoken_vars = $KK;
8505              
8506 49886         71155 my $rtoken_vars = $rLL->[$KK];
8507 49886         74233 my $type = $rtoken_vars->[_TYPE_];
8508              
8509             # Handle a blank space ...
8510 49886 100       88381 if ( $type eq 'b' ) {
8511              
8512             # Delete it if not wanted by whitespace rules
8513             # or we are deleting all whitespace
8514             # Note that whitespace flag is a flag indicating whether a
8515             # white space BEFORE the token is needed
8516 15110 100       28075 next if ( $KK >= $Klast ); # skip terminal blank
8517 14950         21887 my $Knext = $KK + 1;
8518              
8519 14950 50       25881 if ($rOpts_freeze_whitespace) {
8520 0         0 $self->store_token($rtoken_vars);
8521 0         0 next;
8522             }
8523              
8524 14950         21514 my $ws = $rwhitespace_flags->[$Knext];
8525 14950 100 100     44351 if ( $ws == -1
8526             || $rOpts_delete_old_whitespace )
8527             {
8528              
8529 752         1517 my $token_next = $rLL->[$Knext]->[_TOKEN_];
8530 752         1307 my $type_next = $rLL->[$Knext]->[_TYPE_];
8531              
8532 752         1709 my $do_not_delete = is_essential_whitespace(
8533             $last_last_nonblank_code_token,
8534             $last_last_nonblank_code_type,
8535             $last_nonblank_code_token,
8536             $last_nonblank_code_type,
8537             $token_next,
8538             $type_next,
8539             );
8540              
8541             # Note that repeated blanks will get filtered out here
8542 752 100       2167 next unless ($do_not_delete);
8543             }
8544              
8545             # make it just one character
8546 14311         26381 $rtoken_vars->[_TOKEN_] = SPACE;
8547 14311         32415 $self->store_token($rtoken_vars);
8548 14311         25347 next;
8549             }
8550              
8551 34776         52957 my $token = $rtoken_vars->[_TOKEN_];
8552              
8553             # Handle a sequenced token ... i.e. one of ( ) { } [ ] ? :
8554 34776 100       110422 if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
    100          
    100          
    100          
    100          
    100          
    100          
    100          
8555              
8556             # One of ) ] } ...
8557 9062 100       20681 if ( $is_closing_token{$token} ) {
8558              
8559 4345         8055 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8560 4345         7877 my $block_type = $rblock_type_of_seqno->{$type_sequence};
8561              
8562             #---------------------------------------------
8563             # check for semicolon addition in a code block
8564             #---------------------------------------------
8565 4345 100       8273 if ($block_type) {
8566              
8567             # if not preceded by a ';' ..
8568 965 100       3189 if ( $last_nonblank_code_type ne ';' ) {
8569              
8570             # tentatively insert a semicolon if appropriate
8571             $self->add_phantom_semicolon($KK)
8572 537 100       2699 if $rOpts->{'add-semicolons'};
8573             }
8574             }
8575              
8576             #----------------------------------------------------------
8577             # check for addition/deletion of a trailing comma in a list
8578             #----------------------------------------------------------
8579             else {
8580              
8581             # if this is a list ..
8582 3380         5558 my $rtype_count = $rtype_count_by_seqno->{$type_sequence};
8583 3380 100 100     15739 if ( $rtype_count
      100        
      100        
8584             && $rtype_count->{','}
8585             && !$rtype_count->{';'}
8586             && !$rtype_count->{'f'} )
8587             {
8588              
8589             # if NOT preceded by a comma..
8590 1020 100       2703 if ( $last_nonblank_code_type ne ',' ) {
8591              
8592             # insert a comma if requested
8593 735 100 66     2402 if ( $rOpts_add_trailing_commas
8594             && %trailing_comma_rules )
8595             {
8596             $self->add_trailing_comma( $KK, $Kfirst,
8597 24         74 $trailing_comma_rules{$token} );
8598             }
8599             }
8600              
8601             # if preceded by a comma ..
8602             else {
8603              
8604             # delete a trailing comma if requested
8605 285         529 my $deleted;
8606 285 100 66     2761 if ( $rOpts_delete_trailing_commas
8607             && %trailing_comma_rules )
8608             {
8609             $deleted =
8610             $self->delete_trailing_comma( $KK, $Kfirst,
8611 60         177 $trailing_comma_rules{$token} );
8612             }
8613              
8614             # delete a weld-interfering comma if requested
8615 285 50 100     1371 if ( !$deleted
      66        
8616             && $rOpts_delete_weld_interfering_commas
8617             && $is_closing_type{
8618             $last_last_nonblank_code_type} )
8619             {
8620 1         8 $self->delete_weld_interfering_comma($KK);
8621             }
8622             }
8623             }
8624             }
8625             }
8626             }
8627              
8628             # Modify certain tokens here for whitespace
8629             # The following is not yet done, but could be:
8630             # sub (x x x)
8631             # ( $type =~ /^[wit]$/ )
8632             elsif ( $is_wit{$type} ) {
8633              
8634             # index() is several times faster than a regex test with \s here
8635             ## $token =~ /\s/
8636 7042 100 66     29109 if ( index( $token, SPACE ) > 0 || index( $token, "\t" ) > 0 ) {
8637              
8638             # change '$ var' to '$var' etc
8639             # change '@ ' to '@'
8640             # Examples: <<snippets/space1.in>>
8641 159         510 my $ord = ord( substr( $token, 1, 1 ) );
8642 159 100 66     1116 if (
      33        
8643              
8644             # quick test for possible blank at second char
8645             $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
8646             || $ord > ORD_PRINTABLE_MAX )
8647             )
8648             {
8649 6         44 my ( $sigil, $word ) = split /\s+/, $token, 2;
8650              
8651             # $sigil =~ /^[\$\&\%\*\@]$/ )
8652 6 100       33 if ( $is_sigil{$sigil} ) {
8653 5         9 $token = $sigil;
8654 5 50       15 $token .= $word if ( defined($word) ); # fix c104
8655 5         10 $rtoken_vars->[_TOKEN_] = $token;
8656             }
8657             }
8658              
8659             # Trim certain spaces in identifiers
8660 159 50       543 if ( $type eq 'i' ) {
8661              
8662 159 100       2192 if ( $token =~ /$SUB_PATTERN/ ) {
    100          
8663              
8664             # -spp = 0 : no space before opening prototype paren
8665             # -spp = 1 : stable (follow input spacing)
8666             # -spp = 2 : always space before opening prototype paren
8667 128 100 66     873 if ( !defined($rOpts_space_prototype_paren)
    100          
    50          
8668             || $rOpts_space_prototype_paren == 1 )
8669             {
8670             ## default: stable
8671             }
8672             elsif ( $rOpts_space_prototype_paren == 0 ) {
8673 5         27 $token =~ s/\s+\(/\(/;
8674             }
8675             elsif ( $rOpts_space_prototype_paren == 2 ) {
8676 5         27 $token =~ s/\(/ (/;
8677             }
8678              
8679             # one space max, and no tabs
8680 128         831 $token =~ s/\s+/ /g;
8681 128         428 $rtoken_vars->[_TOKEN_] = $token;
8682              
8683 128         540 $self->[_ris_special_identifier_token_]->{$token} =
8684             'sub';
8685              
8686             }
8687              
8688             # clean up spaces in package identifiers, like
8689             # "package Bob::Dog;"
8690             elsif ( $token =~ /^(package|class)\s/ ) {
8691 25         130 $token =~ s/\s+/ /g;
8692 25         70 $rtoken_vars->[_TOKEN_] = $token;
8693              
8694 25         92 $self->[_ris_special_identifier_token_]->{$token} =
8695             'package';
8696             }
8697              
8698             # trim identifiers of trailing blanks which can occur
8699             # under some unusual circumstances, such as if the
8700             # identifier 'witch' has trailing blanks on input here:
8701             #
8702             # sub
8703             # witch
8704             # () # prototype may be on new line ...
8705             # ...
8706 159         439 my $ord_ch = ord( substr( $token, -1, 1 ) );
8707 159 50 33     1092 if (
      33        
8708              
8709             # quick check for possible ending space
8710             $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
8711             || $ord_ch > ORD_PRINTABLE_MAX )
8712             )
8713             {
8714 0         0 $token =~ s/\s+$//g;
8715 0         0 $rtoken_vars->[_TOKEN_] = $token;
8716             }
8717             }
8718             }
8719             }
8720              
8721             # handle semicolons
8722             elsif ( $type eq ';' ) {
8723              
8724             # Remove unnecessary semicolons, but not after bare
8725             # blocks, where it could be unsafe if the brace is
8726             # mis-tokenized.
8727 2388 100 100     19388 if (
      100        
8728             $rOpts->{'delete-semicolons'}
8729             && (
8730             (
8731             $last_nonblank_block_type
8732             && $last_nonblank_code_type eq '}'
8733             && (
8734             $is_block_without_semicolon{
8735             $last_nonblank_block_type}
8736             || $last_nonblank_block_type =~ /$SUB_PATTERN/
8737             || $last_nonblank_block_type =~ /^\w+:$/
8738             )
8739             )
8740             || $last_nonblank_code_type eq ';'
8741             )
8742             )
8743             {
8744              
8745             # This looks like a deletable semicolon, but even if a
8746             # semicolon can be deleted it is not necessarily best to do
8747             # so. We apply these additional rules for deletion:
8748             # - Always ok to delete a ';' at the end of a line
8749             # - Never delete a ';' before a '#' because it would
8750             # promote it to a block comment.
8751             # - If a semicolon is not at the end of line, then only
8752             # delete if it is followed by another semicolon or closing
8753             # token. This includes the comment rule. It may take
8754             # two passes to get to a final state, but it is a little
8755             # safer. For example, keep the first semicolon here:
8756             # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
8757             # It is not required but adds some clarity.
8758 16         32 my $ok_to_delete = 1;
8759 16 100       35 if ( $KK < $Klast ) {
8760 15         44 my $Kn = $self->K_next_nonblank($KK);
8761 15 100 66     58 if ( defined($Kn) && $Kn <= $Klast ) {
8762 14         31 my $next_nonblank_token_type = $rLL->[$Kn]->[_TYPE_];
8763 14   66     44 $ok_to_delete = $next_nonblank_token_type eq ';'
8764             || $next_nonblank_token_type eq '}';
8765             }
8766             }
8767              
8768             # do not delete only nonblank token in a file
8769             else {
8770 1         11 my $Kp = $self->K_previous_code( undef, $rLL_new );
8771 1         5 my $Kn = $self->K_next_nonblank($KK);
8772 1   33     10 $ok_to_delete = defined($Kn) || defined($Kp);
8773             }
8774              
8775 16 100       34 if ($ok_to_delete) {
8776 13         41 $self->note_deleted_semicolon($input_line_number);
8777 13         26 next;
8778             }
8779             else {
8780 3         11 write_logfile_entry("Extra ';'\n");
8781             }
8782             }
8783             }
8784              
8785             # Old patch to add space to something like "x10".
8786             # Note: This is now done in the Tokenizer, but this code remains
8787             # for reference.
8788             elsif ( $type eq 'n' ) {
8789 1856 50 33     6928 if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
8790 0         0 $token =~ s/x/x /;
8791 0         0 $rtoken_vars->[_TOKEN_] = $token;
8792 0         0 if (DEVEL_MODE) {
8793             Fault(<<EOM);
8794             Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
8795             EOM
8796             }
8797             }
8798             }
8799              
8800             # check for a qw quote
8801             elsif ( $type eq 'q' ) {
8802              
8803             # trim blanks from right of qw quotes
8804             # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
8805             # this)
8806 274         1959 $token =~ s/\s*$//;
8807 274         681 $rtoken_vars->[_TOKEN_] = $token;
8808 274 50 66     880 if ( $self->[_save_logfile_] && $token =~ /\t/ ) {
8809 0         0 $self->note_embedded_tab($input_line_number);
8810             }
8811 274 100 66     843 if ( $rwhitespace_flags->[$KK] == WS_YES
      100        
      100        
8812 257         1612 && @{$rLL_new}
8813             && $rLL_new->[-1]->[_TYPE_] ne 'b'
8814             && $rOpts_add_whitespace )
8815             {
8816 66         226 $self->store_token();
8817             }
8818 274         788 $self->store_token($rtoken_vars);
8819 274         665 next;
8820             } ## end if ( $type eq 'q' )
8821              
8822             # delete repeated commas if requested
8823             elsif ( $type eq ',' ) {
8824 2957 100 100     7575 if ( $last_nonblank_code_type eq ','
8825             && $rOpts->{'delete-repeated-commas'} )
8826             {
8827             # Could note this deletion as a possible future update:
8828             ## $self->note_deleted_comma($input_line_number);
8829 3         6 next;
8830             }
8831              
8832             # remember input line index of first comma if -wtc is used
8833 2954 100       6606 if (%trailing_comma_rules) {
8834 259         520 my $seqno = $seqno_stack{ $depth_next - 1 };
8835 259 100 66     1051 if ( defined($seqno)
8836             && !defined( $self->[_rfirst_comma_line_index_]->{$seqno} )
8837             )
8838             {
8839 112         307 $self->[_rfirst_comma_line_index_]->{$seqno} =
8840             $rtoken_vars->[_LINE_INDEX_];
8841             }
8842             }
8843             }
8844              
8845             # change 'LABEL :' to 'LABEL:'
8846             elsif ( $type eq 'J' ) {
8847 79         333 $token =~ s/\s+//g;
8848 79         163 $rtoken_vars->[_TOKEN_] = $token;
8849             }
8850              
8851             # check a quote for problems
8852             elsif ( $type eq 'Q' ) {
8853 2458 100       6679 $self->check_Q( $KK, $Kfirst, $input_line_number )
8854             if ( $self->[_save_logfile_] );
8855             }
8856              
8857             # Store this token with possible previous blank
8858 34486 100 100     70351 if ( $rwhitespace_flags->[$KK] == WS_YES
      100        
      100        
8859 22174         92191 && @{$rLL_new}
8860             && $rLL_new->[-1]->[_TYPE_] ne 'b'
8861             && $rOpts_add_whitespace )
8862             {
8863 7465         15545 $self->store_token();
8864             }
8865 34486         63935 $self->store_token($rtoken_vars);
8866              
8867             } # End token loop
8868              
8869 5807         13096 return;
8870             } ## end sub respace_tokens_inner_loop
8871              
8872             sub respace_post_loop_ops {
8873              
8874 552     552 0 1883 my ($self) = @_;
8875              
8876             # Walk backwards through the tokens, making forward links to sequence items.
8877 552 100       1147 if ( @{$rLL_new} ) {
  552         1965  
8878 549         1035 my $KNEXT;
8879 549         1597 foreach my $KK ( reverse( 0 .. @{$rLL_new} - 1 ) ) {
  549         5540  
8880 58259         82946 $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
8881 58259 100       100535 if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
  9110         13052  
8882             }
8883 549         3732 $self->[_K_first_seq_item_] = $KNEXT;
8884             }
8885              
8886             # Find and remember lists by sequence number
8887 552         1877 my %is_C_style_for;
8888 552         1556 foreach my $seqno ( keys %{$K_opening_container} ) {
  552         3911  
8889 4368         7032 my $K_opening = $K_opening_container->{$seqno};
8890 4368 50       7930 next unless defined($K_opening);
8891              
8892             # code errors may leave undefined closing tokens
8893 4368         6455 my $K_closing = $K_closing_container->{$seqno};
8894 4368 50       7984 next unless defined($K_closing);
8895              
8896 4368         6772 my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_];
8897 4368         6491 my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_];
8898 4368         6301 my $line_diff = $lx_close - $lx_open;
8899 4368         7654 $ris_broken_container->{$seqno} = $line_diff;
8900              
8901             # See if this is a list
8902 4368         5639 my $is_list;
8903 4368         6755 my $rtype_count = $rtype_count_by_seqno->{$seqno};
8904 4368 100       8289 if ($rtype_count) {
8905 1885         3538 my $comma_count = $rtype_count->{','};
8906 1885         4807 my $fat_comma_count = $rtype_count->{'=>'};
8907 1885         2923 my $semicolon_count = $rtype_count->{';'};
8908 1885 100       4317 if ( $rtype_count->{'f'} ) {
8909 17         60 $semicolon_count += $rtype_count->{'f'};
8910 17         50 $is_C_style_for{$seqno} = 1;
8911             }
8912              
8913             # We will define a list to be a container with one or more commas
8914             # and no semicolons. Note that we have included the semicolons
8915             # in a 'for' container in the semicolon count to keep c-style for
8916             # statements from being formatted as lists.
8917 1885 100 100     7933 if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
      100        
8918 1218         2082 $is_list = 1;
8919              
8920             # We need to do one more check for a parenthesized list:
8921             # At an opening paren following certain tokens, such as 'if',
8922             # we do not want to format the contents as a list.
8923 1218 100       4713 if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
8924 731         2570 my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
8925 731 100       1767 if ( defined($Kp) ) {
8926 730         1403 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
8927 730         1378 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
8928             $is_list =
8929             $type_p eq 'k'
8930             ? !$is_nonlist_keyword{$token_p}
8931 730 100       2466 : !$is_nonlist_type{$type_p};
8932             }
8933             }
8934             }
8935             }
8936              
8937             # Look for a block brace marked as uncertain. If the tokenizer thinks
8938             # its guess is uncertain for the type of a brace following an unknown
8939             # bareword then it adds a trailing space as a signal. We can fix the
8940             # type here now that we have had a better look at the contents of the
8941             # container. This fixes case b1085. To find the corresponding code in
8942             # Tokenizer.pm search for 'b1085' with an editor.
8943 4368         6851 my $block_type = $rblock_type_of_seqno->{$seqno};
8944 4368 100 100     10797 if ( $block_type && substr( $block_type, -1, 1 ) eq SPACE ) {
8945              
8946             # Always remove the trailing space
8947 18         190 $block_type =~ s/\s+$//;
8948              
8949             # Try to filter out parenless sub calls
8950 18         108 my $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
8951 18         51 my $Knn2;
8952 18 50       78 if ( defined($Knn1) ) {
8953 18         54 $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new );
8954             }
8955 18 50       127 my $type_nn1 = defined($Knn1) ? $rLL_new->[$Knn1]->[_TYPE_] : 'b';
8956 18 50       80 my $type_nn2 = defined($Knn2) ? $rLL_new->[$Knn2]->[_TYPE_] : 'b';
8957              
8958             # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
8959 18 100 100     121 if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
8960 6         15 $is_list = 0;
8961             }
8962              
8963             # Convert to a hash brace if it looks like it holds a list
8964 18 100       55 if ($is_list) {
8965              
8966 1         2 $block_type = EMPTY_STRING;
8967              
8968 1         4 $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
8969 1         3 $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
8970             }
8971              
8972 18         44 $rblock_type_of_seqno->{$seqno} = $block_type;
8973             }
8974              
8975             # Handle a list container
8976 4368 100 100     16262 if ( $is_list && !$block_type ) {
    100 100        
8977 1202         2669 $ris_list_by_seqno->{$seqno} = $seqno;
8978 1202         2201 my $seqno_parent = $rparent_of_seqno->{$seqno};
8979 1202         1850 my $depth = 0;
8980 1202   66     4835 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
8981 1209         1726 $depth++;
8982              
8983             # for $rhas_list we need to save the minimum depth
8984 1209 100 100     3808 if ( !$rhas_list->{$seqno_parent}
8985             || $rhas_list->{$seqno_parent} > $depth )
8986             {
8987 619         1234 $rhas_list->{$seqno_parent} = $depth;
8988             }
8989              
8990 1209 100       2414 if ($line_diff) {
8991 391         730 $rhas_broken_list->{$seqno_parent} = 1;
8992              
8993             # Patch1: We need to mark broken lists with non-terminal
8994             # line-ending commas for the -bbx=2 parameter. This insures
8995             # that the list will stay broken. Otherwise the flag
8996             # -bbx=2 can be unstable. This fixes case b789 and b938.
8997              
8998             # Patch2: Updated to also require either one fat comma or
8999             # one more line-ending comma. Fixes cases b1069 b1070
9000             # b1072 b1076.
9001 391 100 100     1653 if (
      100        
9002             $rlec_count_by_seqno->{$seqno}
9003             && ( $rlec_count_by_seqno->{$seqno} > 1
9004             || $rtype_count_by_seqno->{$seqno}->{'=>'} )
9005             )
9006             {
9007 177         391 $rhas_broken_list_with_lec->{$seqno_parent} = 1;
9008             }
9009             }
9010 1209         4277 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
9011             }
9012             }
9013              
9014             # Handle code blocks ...
9015             # The -lp option needs to know if a container holds a code block
9016             elsif ( $block_type && $rOpts_line_up_parentheses ) {
9017 43         98 my $seqno_parent = $rparent_of_seqno->{$seqno};
9018 43   66     224 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
9019 71         129 $rhas_code_block->{$seqno_parent} = 1;
9020 71         120 $rhas_broken_code_block->{$seqno_parent} = $line_diff;
9021 71         223 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
9022             }
9023             }
9024             }
9025              
9026             # Find containers with ternaries, needed for -lp formatting.
9027 552         2458 foreach my $seqno ( keys %{$K_opening_ternary} ) {
  552         2736  
9028 187         493 my $seqno_parent = $rparent_of_seqno->{$seqno};
9029 187   66     1007 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
9030 153         299 $rhas_ternary->{$seqno_parent} = 1;
9031 153         580 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
9032             }
9033             }
9034              
9035             # Turn off -lp for containers with here-docs with text within a container,
9036             # since they have their own fixed indentation. Fixes case b1081.
9037 552 100       2258 if ($rOpts_line_up_parentheses) {
9038 31         121 foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
9039 1         3 my $Kh = $K_first_here_doc_by_seqno{$seqno};
9040 1         2 my $Kc = $K_closing_container->{$seqno};
9041 1         3 my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
9042 1         2 my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
9043 1 50       5 next if ( $line_Kh == $line_Kc );
9044 0         0 $ris_excluded_lp_container->{$seqno} = 1;
9045             }
9046             }
9047              
9048             # Set a flag to turn off -cab=3 in complex structures. Otherwise,
9049             # instability can occur. When it is overridden the behavior of the closest
9050             # match, -cab=2, will be used instead. This fixes cases b1096 b1113.
9051 552 50       2291 if ( $rOpts_comma_arrow_breakpoints == 3 ) {
9052 0         0 foreach my $seqno ( keys %{$K_opening_container} ) {
  0         0  
9053              
9054 0         0 my $rtype_count = $rtype_count_by_seqno->{$seqno};
9055 0 0 0     0 next unless ( $rtype_count && $rtype_count->{'=>'} );
9056              
9057             # override -cab=3 if this contains a sub-list
9058 0 0       0 if ( !defined( $roverride_cab3->{$seqno} ) ) {
9059 0 0       0 if ( $rhas_list->{$seqno} ) {
9060 0         0 $roverride_cab3->{$seqno} = 2;
9061             }
9062              
9063             # or if this is a sub-list of its parent container
9064             else {
9065 0         0 my $seqno_parent = $rparent_of_seqno->{$seqno};
9066 0 0 0     0 if ( defined($seqno_parent)
9067             && $ris_list_by_seqno->{$seqno_parent} )
9068             {
9069 0         0 $roverride_cab3->{$seqno} = 2;
9070             }
9071             }
9072             }
9073             }
9074             }
9075              
9076             # Add -ci to C-style for loops (issue c154)
9077             # This is much easier to do here than in the tokenizer.
9078 552         1951 foreach my $seqno ( keys %is_C_style_for ) {
9079 17         50 my $K_opening = $K_opening_container->{$seqno};
9080 17         40 my $K_closing = $K_closing_container->{$seqno};
9081 17         40 my $type_last = 'f';
9082 17         72 for my $KK ( $K_opening + 1 .. $K_closing - 1 ) {
9083 567 100       960 $rLL_new->[$KK]->[_CI_LEVEL_] = $type_last eq 'f' ? 0 : 1;
9084 567         816 my $type = $rLL_new->[$KK]->[_TYPE_];
9085 567 100 100     1384 if ( $type ne 'b' && $type ne '#' ) { $type_last = $type }
  310         476  
9086             }
9087             }
9088              
9089 552         1492 return;
9090             } ## end sub respace_post_loop_ops
9091              
9092             sub set_permanently_broken {
9093 164     164 0 498 my ( $self, $seqno ) = @_;
9094              
9095             # Mark this container, and all of its parent containers, as being
9096             # permanently broken (for example, by containing a blank line). This
9097             # is needed for certain list formatting operations.
9098 164         684 while ( defined($seqno) ) {
9099 407         788 $ris_permanently_broken->{$seqno} = 1;
9100 407         1029 $seqno = $rparent_of_seqno->{$seqno};
9101             }
9102 164         390 return;
9103             } ## end sub set_permanently_broken
9104              
9105             sub store_token {
9106              
9107 58313     58313 0 92145 my ( $self, $item ) = @_;
9108              
9109             #------------------------------------------
9110             # Store one token during respace operations
9111             #------------------------------------------
9112              
9113             # Input parameter:
9114             # if defined => reference to a token
9115             # if undef => make and store a blank space
9116              
9117             # NOTE: called once per token so coding efficiency is critical.
9118              
9119             # If no arg, then make and store a blank space
9120 58313 100       100470 if ( !$item ) {
9121              
9122             # - Never start the array with a space, and
9123             # - Never store two consecutive spaces
9124 7658 50 33     10371 if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] ne 'b' ) {
  7658         25420  
9125              
9126             # Note that the level and ci_level of newly created spaces should
9127             # be the same as the previous token. Otherwise the coding for the
9128             # -lp option can create a blinking state in some rare cases.
9129             # (see b1109, b1110).
9130 7658         15276 $item = [];
9131 7658         16514 $item->[_TYPE_] = 'b';
9132 7658         13813 $item->[_TOKEN_] = SPACE;
9133 7658         16247 $item->[_TYPE_SEQUENCE_] = EMPTY_STRING;
9134 7658         12560 $item->[_LINE_INDEX_] = $rLL_new->[-1]->[_LINE_INDEX_];
9135 7658         13778 $item->[_LEVEL_] = $rLL_new->[-1]->[_LEVEL_];
9136 7658         12617 $item->[_CI_LEVEL_] = $rLL_new->[-1]->[_CI_LEVEL_];
9137             }
9138 0         0 else { return }
9139             }
9140              
9141             # The next multiple assignment statements are significantly faster than
9142             # doing them one-by-one.
9143             my (
9144              
9145             $type,
9146             $token,
9147             $type_sequence,
9148              
9149 58313         76974 ) = @{$item}[
  58313         115044  
9150              
9151             _TYPE_,
9152             _TOKEN_,
9153             _TYPE_SEQUENCE_,
9154              
9155             ];
9156              
9157             # Set the token length. Later it may be adjusted again if phantom or
9158             # ignoring side comment lengths.
9159 58313 100       101146 my $token_length =
9160             $is_encoded_data ? $length_function->($token) : length($token);
9161              
9162             # handle blanks
9163 58313 100       104666 if ( $type eq 'b' ) {
    100          
9164              
9165             # Do not output consecutive blanks. This situation should have been
9166             # prevented earlier, but it is worth checking because later routines
9167             # make this assumption.
9168 22308 100 66     28799 if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
  22308         70833  
9169 5         18 return;
9170             }
9171             }
9172              
9173             # handle comments
9174             elsif ( $type eq '#' ) {
9175              
9176             # trim comments if necessary
9177 1091         2598 my $ord = ord( substr( $token, -1, 1 ) );
9178 1091 100 66     7076 if (
      66        
      66        
9179             $ord > 0
9180             && ( $ord < ORD_PRINTABLE_MIN
9181             || $ord > ORD_PRINTABLE_MAX )
9182             && $token =~ s/\s+$//
9183             )
9184             {
9185 20         152 $token_length = $length_function->($token);
9186 20         71 $item->[_TOKEN_] = $token;
9187             }
9188              
9189 1091         2018 my $ignore_sc_length = $rOpts_ignore_side_comment_lengths;
9190              
9191             # Ignore length of '## no critic' comments even if -iscl is not set
9192 1091 100 100     8469 if ( !$ignore_sc_length
      100        
      100        
      100        
9193             && !$rOpts_ignore_perlcritic_comments
9194             && $token_length > 10
9195             && substr( $token, 1, 1 ) eq '#'
9196             && $token =~ /^##\s*no\s+critic\b/ )
9197             {
9198              
9199             # Is it a side comment or a block comment?
9200 7 100       27 if ( $Ktoken_vars > $Kfirst_old ) {
9201              
9202             # This is a side comment. If we do not ignore its length, and
9203             # -iscl has not been set, then the line could be broken and
9204             # perlcritic will complain. So this is essential:
9205 3   50     23 $ignore_sc_length ||= 1;
9206              
9207             # It would be a good idea to also make this behave like a
9208             # static side comment, but this is not essential and would
9209             # change existing formatting. So we will leave it to the user
9210             # to set -ssc if desired.
9211             }
9212             else {
9213              
9214             # This is a full-line (block) comment.
9215             # It would be a good idea to make this behave like a static
9216             # block comment, but this is not essential and would change
9217             # existing formatting. So we will leave it to the user to
9218             # set -sbc if desired
9219             }
9220             }
9221              
9222             # Set length of ignored side comments as just 1
9223 1091 100 100     2772 if ( $ignore_sc_length && ( !$CODE_type || $CODE_type eq 'HSC' ) ) {
      100        
9224 17         33 $token_length = 1;
9225             }
9226              
9227 1091         2473 my $seqno = $seqno_stack{ $depth_next - 1 };
9228 1091 100       2716 if ( defined($seqno) ) {
9229 296 100       854 $self->[_rblank_and_comment_count_]->{$seqno} += 1
9230             if ( $CODE_type eq 'BC' );
9231             $self->set_permanently_broken($seqno)
9232 296 100       1046 if !$ris_permanently_broken->{$seqno};
9233             }
9234             }
9235              
9236             # handle non-blanks and non-comments
9237             else {
9238              
9239 34914         44723 my $block_type;
9240              
9241             # check for a sequenced item (i.e., container or ?/:)
9242 34914 100       59399 if ($type_sequence) {
9243              
9244             # This will be the index of this item in the new array
9245 9110         15673 my $KK_new = @{$rLL_new};
  9110         14391  
9246              
9247 9110 100       21963 if ( $is_opening_token{$token} ) {
    100          
9248              
9249 4368         10084 $K_opening_container->{$type_sequence} = $KK_new;
9250 4368         7468 $block_type = $rblock_type_of_seqno->{$type_sequence};
9251              
9252             # Fix for case b1100: Count a line ending in ', [' as having
9253             # a line-ending comma. Otherwise, these commas can be hidden
9254             # with something like --opening-square-bracket-right
9255 4368 100 100     11090 if ( $last_nonblank_code_type eq ','
      100        
9256             && $Ktoken_vars == $Klast_old_code
9257             && $Ktoken_vars > $Kfirst_old )
9258             {
9259 5         17 $rlec_count_by_seqno->{$type_sequence}++;
9260             }
9261              
9262 4368 100 100     14679 if ( $last_nonblank_code_type eq '='
9263             || $last_nonblank_code_type eq '=>' )
9264             {
9265 394         1211 $ris_assigned_structure->{$type_sequence} =
9266             $last_nonblank_code_type;
9267             }
9268              
9269 4368         8922 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
9270 4368 100       9484 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
9271 4368         6183 push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
  4368         12831  
9272 4368         9858 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
9273 4368         8185 $seqno_stack{$depth_next} = $type_sequence;
9274 4368         8096 $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
9275 4368         8190 $depth_next++;
9276              
9277 4368 100       9656 if ( $depth_next > $depth_next_max ) {
9278 1231         2367 $depth_next_max = $depth_next;
9279             }
9280             }
9281             elsif ( $is_closing_token{$token} ) {
9282              
9283 4368         9646 $K_closing_container->{$type_sequence} = $KK_new;
9284 4368         7475 $block_type = $rblock_type_of_seqno->{$type_sequence};
9285              
9286             # Do not include terminal commas in counts
9287 4368 100 66     16977 if ( $last_nonblank_code_type eq ','
9288             || $last_nonblank_code_type eq '=>' )
9289             {
9290             $rtype_count_by_seqno->{$type_sequence}
9291 300         769 ->{$last_nonblank_code_type}--;
9292              
9293 300 50 66     1945 if ( $Ktoken_vars == $Kfirst_old
      66        
9294             && $last_nonblank_code_type eq ','
9295             && $rlec_count_by_seqno->{$type_sequence} )
9296             {
9297 165         362 $rlec_count_by_seqno->{$type_sequence}--;
9298             }
9299             }
9300              
9301             # Update the stack...
9302 4368         7321 $depth_next--;
9303             }
9304             else {
9305              
9306             # For ternary, note parent but do not include as child
9307 374         1193 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
9308 374 100       1071 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
9309 374         939 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
9310              
9311             # These are not yet used but could be useful
9312 374 100       1419 if ( $token eq '?' ) {
    50          
9313 187         521 $K_opening_ternary->{$type_sequence} = $KK_new;
9314             }
9315             elsif ( $token eq ':' ) {
9316 187         578 $K_closing_ternary->{$type_sequence} = $KK_new;
9317             }
9318             else {
9319              
9320             # We really shouldn't arrive here, just being cautious:
9321             # The only sequenced types output by the tokenizer are the
9322             # opening & closing containers and the ternary types. Each
9323             # of those was checked above. So we would only get here
9324             # if the tokenizer has been changed to mark some other
9325             # tokens with sequence numbers.
9326 0         0 if (DEVEL_MODE) {
9327             Fault(
9328             "Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
9329             );
9330             }
9331             }
9332             }
9333             }
9334              
9335             # Remember the most recent two non-blank, non-comment tokens.
9336             # NOTE: the phantom semicolon code may change the output stack
9337             # without updating these values. Phantom semicolons are considered
9338             # the same as blanks for now, but future needs might change that.
9339             # See the related note in sub 'add_phantom_semicolon'.
9340 34914         47571 $last_last_nonblank_code_type = $last_nonblank_code_type;
9341 34914         46063 $last_last_nonblank_code_token = $last_nonblank_code_token;
9342              
9343 34914         44925 $last_nonblank_code_type = $type;
9344 34914         45833 $last_nonblank_code_token = $token;
9345 34914         43904 $last_nonblank_block_type = $block_type;
9346              
9347             # count selected types
9348 34914 100       67815 if ( $is_counted_type{$type} ) {
9349 6564         14473 my $seqno = $seqno_stack{ $depth_next - 1 };
9350 6564 100       13750 if ( defined($seqno) ) {
9351 4868         11364 $rtype_count_by_seqno->{$seqno}->{$type}++;
9352              
9353             # Count line-ending commas for -bbx
9354 4868 100 100     15428 if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
9355 978         2045 $rlec_count_by_seqno->{$seqno}++;
9356             }
9357              
9358             # Remember index of first here doc target
9359 4868 100 66     11286 if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
9360 6         16 my $KK_new = @{$rLL_new};
  6         18  
9361 6         17 $K_first_here_doc_by_seqno{$seqno} = $KK_new;
9362             }
9363             }
9364             }
9365             }
9366              
9367             # cumulative length is the length sum including this token
9368 58308         77166 $cumulative_length += $token_length;
9369              
9370 58308         80616 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
9371 58308         79434 $item->[_TOKEN_LENGTH_] = $token_length;
9372              
9373             # For reference, here is how to get the parent sequence number.
9374             # This is not used because it is slower than finding it on the fly
9375             # in sub parent_seqno_by_K:
9376              
9377             # my $seqno_parent =
9378             # $type_sequence && $is_opening_token{$token}
9379             # ? $seqno_stack{ $depth_next - 2 }
9380             # : $seqno_stack{ $depth_next - 1 };
9381             # my $KK = @{$rLL_new};
9382             # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
9383              
9384             # and finally, add this item to the new array
9385 58308         72729 push @{$rLL_new}, $item;
  58308         98692  
9386 58308         104830 return;
9387             } ## end sub store_token
9388              
9389             sub add_phantom_semicolon {
9390              
9391 530     530 0 2209 my ( $self, $KK ) = @_;
9392              
9393             # The token at old index $KK is a closing block brace, and not preceded
9394             # by a semicolon. Before we push it onto the new token list, we may
9395             # want to add a phantom semicolon which can be activated if the the
9396             # block is broken on output.
9397              
9398             # We are only adding semicolons for certain block types
9399 530         1179 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9400 530 50       1383 return unless ($type_sequence);
9401 530         1166 my $block_type = $rblock_type_of_seqno->{$type_sequence};
9402 530 50       1320 return unless ($block_type);
9403             return
9404 530 100 100     4518 unless ( $ok_to_add_semicolon_for_block_type{$block_type}
      100        
9405             || $block_type =~ /^(sub|package)/
9406             || $block_type =~ /^\w+\:$/ );
9407              
9408             # Find the most recent token in the new token list
9409 304         1430 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
9410 304 50       964 return unless ( defined($Kp) ); # shouldn't happen except for bad input
9411              
9412 304         702 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
9413 304         640 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
9414 304         660 my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
9415              
9416             # Do not add a semicolon if...
9417             return
9418             if (
9419              
9420             # it would follow a comment (and be isolated)
9421             $type_p eq '#'
9422              
9423             # it follows a code block ( because they are not always wanted
9424             # there and may add clutter)
9425 304 50 100     3270 || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
      100        
      66        
      66        
      66        
9426              
9427             # it would follow a label
9428             || $type_p eq 'J'
9429              
9430             # it would be inside a 'format' statement (and cause syntax error)
9431             || ( $type_p eq 'k'
9432             && $token_p =~ /format/ )
9433              
9434             );
9435              
9436             # Do not add a semicolon if it would impede a weld with an immediately
9437             # following closing token...like this
9438             # { ( some code ) }
9439             # ^--No semicolon can go here
9440              
9441             # look at the previous token... note use of the _NEW rLL array here,
9442             # but sequence numbers are invariant.
9443 170         421 my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
9444              
9445             # If it is also a CLOSING token we have to look closer...
9446 170 100 66     783 if (
      33        
      66        
9447             $seqno_inner
9448             && $is_closing_token{$token_p}
9449              
9450             # we only need to look if there is just one inner container..
9451             && defined( $rchildren_of_seqno->{$type_sequence} )
9452 39         185 && @{ $rchildren_of_seqno->{$type_sequence} } == 1
9453             )
9454             {
9455              
9456             # Go back and see if the corresponding two OPENING tokens are also
9457             # together. Note that we are using the OLD K indexing here:
9458 34         92 my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
9459 34 50       114 if ( defined($K_outer_opening) ) {
9460 34         154 my $K_nxt = $self->K_next_nonblank($K_outer_opening);
9461 34 50       116 if ( defined($K_nxt) ) {
9462 34         83 my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
9463              
9464             # Is the next token after the outer opening the same as
9465             # our inner closing (i.e. same sequence number)?
9466             # If so, do not insert a semicolon here.
9467 34 100 66     185 return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
9468             }
9469             }
9470             }
9471              
9472             # We will insert an empty semicolon here as a placeholder. Later, if
9473             # it becomes the last token on a line, we will bring it to life. The
9474             # advantage of doing this is that (1) we just have to check line
9475             # endings, and (2) the phantom semicolon has zero width and therefore
9476             # won't cause needless breaks of one-line blocks.
9477 162         398 my $Ktop = -1;
9478 162 100 100     947 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
9479             && $want_left_space{';'} == WS_NO )
9480             {
9481              
9482             # convert the blank into a semicolon..
9483             # be careful: we are working on the new stack top
9484             # on a token which has been stored.
9485 123         557 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
9486              
9487             # Convert the existing blank to:
9488             # a phantom semicolon for one_line_block option = 0 or 1
9489             # a real semicolon for one_line_block option = 2
9490 123         273 my $tok = EMPTY_STRING;
9491 123         258 my $len_tok = 0;
9492 123 100       374 if ( $rOpts_one_line_block_semicolons == 2 ) {
9493 3         6 $tok = ';';
9494 3         7 $len_tok = 1;
9495             }
9496              
9497 123         273 $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
9498 123         248 $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
9499 123         280 $rLL_new->[$Ktop]->[_TYPE_] = ';';
9500              
9501 123         414 $self->[_rtype_count_by_seqno_]->{$type_sequence}->{';'}++;
9502              
9503             # NOTE: we are changing the output stack without updating variables
9504             # $last_nonblank_code_type, etc. Future needs might require that
9505             # those variables be updated here. For now, it seems ok to skip
9506             # this.
9507              
9508             # Then store a new blank
9509 123         379 $self->store_token($rcopy);
9510             }
9511             else {
9512              
9513             # Patch for issue c078: keep line indexes in order. If the top
9514             # token is a space that we are keeping (due to '-wls=';') then
9515             # we have to check that old line indexes stay in order.
9516             # In very rare
9517             # instances in which side comments have been deleted and converted
9518             # into blanks, we may have filtered down multiple blanks into just
9519             # one. In that case the top blank may have a higher line number
9520             # than the previous nonblank token. Although the line indexes of
9521             # blanks are not really significant, we need to keep them in order
9522             # in order to pass error checks.
9523 39 100       140 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
9524 1         3 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
9525 1         7 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
9526 1 50       6 if ( $new_top_ix < $old_top_ix ) {
9527 0         0 $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
9528             }
9529             }
9530              
9531 39         183 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING );
9532 39         128 $self->store_token($rcopy);
9533             }
9534 162         611 return;
9535             } ## end sub add_phantom_semicolon
9536              
9537             sub add_trailing_comma {
9538              
9539             # Implement the --add-trailing-commas flag to the line end before index $KK:
9540              
9541 24     24 0 60 my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
9542              
9543             # Input parameter:
9544             # $KK = index of closing token in old ($rLL) token list
9545             # which starts a new line and is not preceded by a comma
9546             # $Kfirst = index of first token on the current line of input tokens
9547             # $add_flags = user control flags
9548              
9549             # For example, we might want to add a comma here:
9550              
9551             # bless {
9552             # _name => $name,
9553             # _price => $price,
9554             # _rebate => $rebate <------ location of possible bare comma
9555             # }, $pkg;
9556             # ^-------------------closing token at index $KK on new line
9557              
9558             # Do not add a comma if it would follow a comment
9559 24         75 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
9560 24 50       69 return unless ( defined($Kp) );
9561 24         54 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
9562 24 50       61 return if ( $type_p eq '#' );
9563              
9564             # see if the user wants a trailing comma here
9565 24         73 my $match =
9566             $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
9567             $trailing_comma_rule, 1 );
9568              
9569             # if so, add a comma
9570 24 100       69 if ($match) {
9571 11         43 my $Knew = $self->store_new_token( ',', ',', $Kp );
9572             }
9573              
9574 24         53 return;
9575              
9576             } ## end sub add_trailing_comma
9577              
9578             sub delete_trailing_comma {
9579              
9580 60     60 0 131 my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
9581              
9582             # Apply the --delete-trailing-commas flag to the comma before index $KK
9583              
9584             # Input parameter:
9585             # $KK = index of a closing token in OLD ($rLL) token list
9586             # which is preceded by a comma on the same line.
9587             # $Kfirst = index of first token on the current line of input tokens
9588             # $delete_option = user control flag
9589              
9590             # Returns true if the comma was deleted
9591              
9592             # For example, we might want to delete this comma:
9593             # my @asset = ("FASMX", "FASGX", "FASIX",);
9594             # | |^--------token at index $KK
9595             # | ^------comma of interest
9596             # ^-------------token at $Kfirst
9597              
9598             # Verify that the previous token is a comma. Note that we are working in
9599             # the new token list $rLL_new.
9600 60         160 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
9601 60 50       131 return unless ( defined($Kp) );
9602 60 50       145 if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
9603              
9604             # there must be a '#' between the ',' and closing token; give up.
9605 0         0 return;
9606             }
9607              
9608             # Do not delete commas when formatting under stress to avoid instability.
9609             # This fixes b1389, b1390, b1391, b1392. The $high_stress_level has
9610             # been found to work well for trailing commas.
9611 60 50       146 if ( $rLL_new->[$Kp]->[_LEVEL_] >= $high_stress_level ) {
9612 0         0 return;
9613             }
9614              
9615             # See if the user wants this trailing comma
9616 60         175 my $match =
9617             $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
9618             $trailing_comma_rule, 0 );
9619              
9620             # Patch: the --noadd-whitespace flag can cause instability in complex
9621             # structures. In this case do not delete the comma. Fixes b1409.
9622 60 50 66     186 if ( !$match && !$rOpts_add_whitespace ) {
9623 0         0 my $Kn = $self->K_next_nonblank($KK);
9624 0 0       0 if ( defined($Kn) ) {
9625 0         0 my $type_n = $rLL->[$Kn]->[_TYPE_];
9626 0 0 0     0 if ( $type_n ne ';' && $type_n ne '#' ) { return }
  0         0  
9627             }
9628             }
9629              
9630             # If no match, delete it
9631 60 100       153 if ( !$match ) {
9632              
9633 48         125 return $self->unstore_last_nonblank_token(',');
9634             }
9635 12         24 return;
9636              
9637             } ## end sub delete_trailing_comma
9638              
9639             sub delete_weld_interfering_comma {
9640              
9641 1     1 0 4 my ( $self, $KK ) = @_;
9642              
9643             # Apply the flag '--delete-weld-interfering-commas' to the comma
9644             # before index $KK
9645              
9646             # Input parameter:
9647             # $KK = index of a closing token in OLD ($rLL) token list
9648             # which is preceded by a comma on the same line.
9649              
9650             # Returns true if the comma was deleted
9651              
9652             # For example, we might want to delete this comma:
9653              
9654             # my $tmpl = { foo => {no_override => 1, default => 42}, };
9655             # || ^------$KK
9656             # |^---$Kp
9657             # $Kpp---^
9658             #
9659             # Note that:
9660             # index $KK is in the old $rLL array, but
9661             # indexes $Kp and $Kpp are in the new $rLL_new array.
9662              
9663 1         4 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9664 1 50       5 return unless ($type_sequence);
9665              
9666             # Find the previous token and verify that it is a comma.
9667 1         8 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
9668 1 50       6 return unless ( defined($Kp) );
9669 1 50       8 if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
9670              
9671             # it is not a comma, so give up ( it is probably a '#' )
9672 0         0 return;
9673             }
9674              
9675             # This must be the only comma in this list
9676 1         2 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
9677             return
9678             unless ( defined($rtype_count)
9679             && $rtype_count->{','}
9680 1 50 33     11 && $rtype_count->{','} == 1 );
      33        
9681              
9682             # Back up to the previous closing token
9683 1         6 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
9684 1 50       10 return unless ( defined($Kpp) );
9685 1         6 my $seqno_pp = $rLL_new->[$Kpp]->[_TYPE_SEQUENCE_];
9686 1         8 my $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
9687              
9688             # The containers must be nesting (i.e., sequence numbers must differ by 1 )
9689 1 50 33     10 if ( $seqno_pp && $is_closing_type{$type_pp} ) {
9690 1 50       5 if ( $seqno_pp == $type_sequence + 1 ) {
9691              
9692             # remove the ',' from the top of the new token list
9693 1         6 return $self->unstore_last_nonblank_token(',');
9694             }
9695             }
9696 0         0 return;
9697              
9698             } ## end sub delete_weld_interfering_comma
9699              
9700             sub unstore_last_nonblank_token {
9701              
9702 49     49 0 107 my ( $self, $type ) = @_;
9703              
9704             # remove the most recent nonblank token from the new token list
9705             # Input parameter:
9706             # $type = type to be removed (for safety check)
9707              
9708             # Returns true if success
9709             # false if error
9710              
9711             # This was written and is used for removing commas, but might
9712             # be useful for other tokens. If it is ever used for other tokens
9713             # then the issue of what to do about the other variables, such
9714             # as token counts and the '$last...' vars needs to be considered.
9715              
9716             # Safety check, shouldn't happen
9717 49 50       71 if ( @{$rLL_new} < 3 ) {
  49         146  
9718 0         0 DEVEL_MODE && Fault("not enough tokens on stack to remove '$type'\n");
9719 0         0 return;
9720             }
9721              
9722 49         90 my ( $rcomma, $rblank );
9723              
9724             # case 1: pop comma from top of stack
9725 49 100 33     232 if ( $rLL_new->[-1]->[_TYPE_] eq $type ) {
    50          
9726 6         15 $rcomma = pop @{$rLL_new};
  6         18  
9727             }
9728              
9729             # case 2: pop blank and then comma from top of stack
9730             elsif ($rLL_new->[-1]->[_TYPE_] eq 'b'
9731             && $rLL_new->[-2]->[_TYPE_] eq $type )
9732             {
9733 43         68 $rblank = pop @{$rLL_new};
  43         87  
9734 43         81 $rcomma = pop @{$rLL_new};
  43         74  
9735             }
9736              
9737             # case 3: error, shouldn't happen unless bad call
9738             else {
9739 0         0 DEVEL_MODE && Fault("Could not find token type '$type' to remove\n");
9740 0         0 return;
9741             }
9742              
9743             # A note on updating vars set by sub store_token for this comma: If we
9744             # reduce the comma count by 1 then we also have to change the variable
9745             # $last_nonblank_code_type to be $last_last_nonblank_code_type because
9746             # otherwise sub store_token is going to ALSO reduce the comma count.
9747             # Alternatively, we can leave the count alone and the
9748             # $last_nonblank_code_type alone. Then sub store_token will produce
9749             # the correct result. This is simpler and is done here.
9750              
9751             # Now add a blank space after the comma if appropriate.
9752             # Some unusual spacing controls might need another iteration to
9753             # reach a final state.
9754 49 50       162 if ( $rLL_new->[-1]->[_TYPE_] ne 'b' ) {
9755 49 100       121 if ( defined($rblank) ) {
9756 43         76 $rblank->[_CUMULATIVE_LENGTH_] -= 1; # fix for deleted comma
9757 43         66 push @{$rLL_new}, $rblank;
  43         82  
9758             }
9759             }
9760 49         121 return 1;
9761             } ## end sub unstore_last_nonblank_token
9762              
9763             sub match_trailing_comma_rule {
9764              
9765 84     84 0 180 my ( $self, $KK, $Kfirst, $Kp, $trailing_comma_rule, $if_add ) = @_;
9766              
9767             # Decide if a trailing comma rule is matched.
9768              
9769             # Input parameter:
9770             # $KK = index of closing token in old ($rLL) token list which follows
9771             # the location of a possible trailing comma. See diagram below.
9772             # $Kfirst = (old) index of first token on the current line of input tokens
9773             # $Kp = index of previous nonblank token in new ($rLL_new) array
9774             # $trailing_comma_rule = packed user control flags
9775             # $if_add = true if adding comma, false if deleting comma
9776              
9777             # Returns:
9778             # false if no match
9779             # true if match
9780              
9781             # For example, we might be checking for addition of a comma here:
9782              
9783             # bless {
9784             # _name => $name,
9785             # _price => $price,
9786             # _rebate => $rebate <------ location of possible trailing comma
9787             # }, $pkg;
9788             # ^-------------------closing token at index $KK
9789              
9790 84 50       172 return unless ($trailing_comma_rule);
9791 84         127 my ( $trailing_comma_style, $paren_flag ) = @{$trailing_comma_rule};
  84         223  
9792              
9793             # List of $trailing_comma_style values:
9794             # undef stable: do not change
9795             # '0' : no list should have a trailing comma
9796             # '1' or '*' : every list should have a trailing comma
9797             # 'm' a multi-line list should have a trailing commas
9798             # 'b' trailing commas should be 'bare' (comma followed by newline)
9799             # 'h' lists of key=>value pairs with a bare trailing comma
9800             # 'i' same as s=h but also include any list with no more than about one
9801             # comma per line
9802             # ' ' or -wtc not defined : leave trailing commas unchanged [DEFAULT].
9803              
9804             # Note: an interesting generalization would be to let an upper case
9805             # letter denote the negation of styles 'm', 'b', 'h', 'i'. This might
9806             # be useful for undoing operations. It would be implemented as a wrapper
9807             # around this routine.
9808              
9809             #-----------------------------------------
9810             # No style defined : do not add or delete
9811             #-----------------------------------------
9812 84 50       178 if ( !defined($trailing_comma_style) ) { return !$if_add }
  0         0  
9813              
9814             #----------------------------------------
9815             # Set some flags describing this location
9816             #----------------------------------------
9817 84         133 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9818 84 50       172 return unless ($type_sequence);
9819 84         146 my $closing_token = $rLL->[$KK]->[_TOKEN_];
9820 84         133 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
9821 84 50 33     319 return unless ( defined($rtype_count) && $rtype_count->{','} );
9822             my $is_permanently_broken =
9823 84         165 $self->[_ris_permanently_broken_]->{$type_sequence};
9824              
9825             # Note that _ris_broken_container_ also stores the line diff
9826             # but it is not available at this early stage.
9827 84         140 my $K_opening = $self->[_K_opening_container_]->{$type_sequence};
9828 84 50       229 return if ( !defined($K_opening) );
9829              
9830             # multiline definition 1: opening and closing tokens on different lines
9831 84         147 my $iline_o = $rLL_new->[$K_opening]->[_LINE_INDEX_];
9832 84         152 my $iline_c = $rLL->[$KK]->[_LINE_INDEX_];
9833 84         147 my $line_diff_containers = $iline_c - $iline_o;
9834 84         145 my $has_multiline_containers = $line_diff_containers > 0;
9835              
9836             # multiline definition 2: first and last commas on different lines
9837 84         156 my $iline_first = $self->[_rfirst_comma_line_index_]->{$type_sequence};
9838 84         129 my $iline_last = $rLL_new->[$Kp]->[_LINE_INDEX_];
9839 84         130 my $has_multiline_commas;
9840 84         119 my $line_diff_commas = 0;
9841 84 50       160 if ( !defined($iline_first) ) {
9842              
9843             # shouldn't happen if caller checked comma count
9844 0         0 my $type_kp = $rLL_new->[$Kp]->[_TYPE_];
9845 0         0 Fault(
9846             "at line $iline_last but line of first comma not defined, at Kp=$Kp, type=$type_kp\n"
9847             ) if (DEVEL_MODE);
9848             }
9849             else {
9850 84         131 $line_diff_commas = $iline_last - $iline_first;
9851 84         129 $has_multiline_commas = $line_diff_commas > 0;
9852             }
9853              
9854             # To avoid instability in edge cases, when adding commas we uses the
9855             # multiline_commas definition, but when deleting we use multiline
9856             # containers. This fixes b1384, b1396, b1397, b1398, b1400.
9857 84 100       174 my $is_multiline =
9858             $if_add ? $has_multiline_commas : $has_multiline_containers;
9859              
9860 84   100     283 my $is_bare_multiline_comma = $is_multiline && $KK == $Kfirst;
9861              
9862 84         131 my $match;
9863              
9864             #----------------------------
9865             # 0 : does not match any list
9866             #----------------------------
9867 84 100 66     456 if ( $trailing_comma_style eq '0' ) {
    100 66        
    100          
    100          
    50          
9868 12         18 $match = 0;
9869             }
9870              
9871             #------------------------------
9872             # '*' or '1' : matches any list
9873             #------------------------------
9874             elsif ( $trailing_comma_style eq '*' || $trailing_comma_style eq '1' ) {
9875 4         7 $match = 1;
9876             }
9877              
9878             #-----------------------------
9879             # 'm' matches a Multiline list
9880             #-----------------------------
9881             elsif ( $trailing_comma_style eq 'm' ) {
9882 20         58 $match = $is_multiline;
9883             }
9884              
9885             #----------------------------------
9886             # 'b' matches a Bare trailing comma
9887             #----------------------------------
9888             elsif ( $trailing_comma_style eq 'b' ) {
9889 16         33 $match = $is_bare_multiline_comma;
9890             }
9891              
9892             #--------------------------------------------------------------------------
9893             # 'h' matches a bare hash list with about 1 comma and 1 fat comma per line.
9894             # 'i' matches a bare stable list with about 1 comma per line.
9895             #--------------------------------------------------------------------------
9896             elsif ( $trailing_comma_style eq 'h' || $trailing_comma_style eq 'i' ) {
9897              
9898             # We can treat these together because they are similar.
9899             # The set of 'i' matches includes the set of 'h' matches.
9900              
9901             # the trailing comma must be bare for both 'h' and 'i'
9902 32 100       101 return if ( !$is_bare_multiline_comma );
9903              
9904             # There must be no more than one comma per line for both 'h' and 'i'
9905             # The new_comma_count here will include the trailing comma.
9906 10         26 my $new_comma_count = $rtype_count->{','};
9907 10 100       36 $new_comma_count += 1 if ($if_add);
9908 10         34 my $excess_commas = $new_comma_count - $line_diff_commas - 1;
9909 10 100       27 if ( $excess_commas > 0 ) {
9910              
9911             # Exception for a special edge case for option 'i': if the trailing
9912             # comma is followed by a blank line or comment, then it cannot be
9913             # covered. Then we can safely accept a small list to avoid
9914             # instability (issue b1443).
9915 2 50 66     79 if ( $trailing_comma_style eq 'i'
    50 33        
      66        
      33        
      33        
      0        
9916             && $iline_c - $rLL_new->[$Kp]->[_LINE_INDEX_] > 1
9917             && $new_comma_count <= 2 )
9918             {
9919 0         0 $match = 1;
9920             }
9921              
9922             # Patch for instability issue b1456: -boc can trick this test; so
9923             # skip it when deleting commas to avoid possible instability
9924             # with option 'h' in combination with -atc -dtc -boc;
9925             elsif (
9926             $trailing_comma_style eq 'h'
9927              
9928             # this is a deletion (due to -dtc)
9929             && !$if_add
9930              
9931             # -atc is also set
9932             && $rOpts_add_trailing_commas
9933              
9934             # -boc is set and active
9935             && $rOpts_break_at_old_comma_breakpoints
9936             && !$rOpts_ignore_old_breakpoints
9937             )
9938             {
9939             # ignore this test
9940             }
9941              
9942             else {
9943 2         11 return;
9944             }
9945             }
9946              
9947             # a list of key=>value pairs with at least 2 fat commas is a match
9948             # for both 'h' and 'i'
9949 8         16 my $fat_comma_count = $rtype_count->{'=>'};
9950 8 100 66     47 if ( !$match && $fat_comma_count && $fat_comma_count >= 2 ) {
      66        
9951              
9952             # comma count (including trailer) and fat comma count must differ by
9953             # by no more than 1. This allows for some small variations.
9954 4         13 my $comma_diff = $new_comma_count - $fat_comma_count;
9955 4   33     26 $match = ( $comma_diff >= -1 && $comma_diff <= 1 );
9956             }
9957              
9958             # For 'i' only, a list that can be shown to be stable is a match
9959 8 100 100     35 if ( !$match && $trailing_comma_style eq 'i' ) {
9960 2   66     11 $match = (
9961             $is_permanently_broken
9962             || ( $rOpts_break_at_old_comma_breakpoints
9963             && !$rOpts_ignore_old_breakpoints )
9964             );
9965             }
9966             }
9967              
9968             #-------------------------------------------------------------------------
9969             # Unrecognized parameter. This should have been caught in the input check.
9970             #-------------------------------------------------------------------------
9971             else {
9972              
9973 0         0 DEVEL_MODE && Fault("Unrecognized parameter '$trailing_comma_style'\n");
9974              
9975             # do not add or delete
9976 0         0 return !$if_add;
9977             }
9978              
9979             # Now do any special paren check
9980 60 0 66     195 if ( $match
      33        
      33        
      0        
9981             && $paren_flag
9982             && $paren_flag ne '1'
9983             && $paren_flag ne '*'
9984             && $closing_token eq ')' )
9985             {
9986 0   0     0 $match &&=
9987             $self->match_paren_control_flag( $type_sequence, $paren_flag,
9988             $rLL_new );
9989             }
9990              
9991             # Fix for b1379, b1380, b1381, b1382, b1384 part 1. Mark trailing commas
9992             # for use by -vtc logic to avoid instability when -dtc and -atc are both
9993             # active.
9994 60 100       121 if ($match) {
9995 23 100 100     156 if ( $if_add && $rOpts_delete_trailing_commas
      66        
      66        
9996             || !$if_add && $rOpts_add_trailing_commas )
9997             {
9998 17         55 $self->[_ris_bare_trailing_comma_by_seqno_]->{$type_sequence} = 1;
9999              
10000             # The combination of -atc and -dtc and -cab=3 can be unstable
10001             # (b1394). So we deactivate -cab=3 in this case.
10002             # A value of '0' or '4' is required for stability of case b1451.
10003 17 50       55 if ( $rOpts_comma_arrow_breakpoints == 3 ) {
10004 0         0 $self->[_roverride_cab3_]->{$type_sequence} = 0;
10005             }
10006             }
10007             }
10008 60         175 return $match;
10009             } ## end sub match_trailing_comma_rule
10010              
10011             sub store_new_token {
10012              
10013 11     11 0 34 my ( $self, $type, $token, $Kp ) = @_;
10014              
10015             # Create and insert a completely new token into the output stream
10016              
10017             # Input parameters:
10018             # $type = the token type
10019             # $token = the token text
10020             # $Kp = index of the previous token in the new list, $rLL_new
10021              
10022             # Returns:
10023             # $Knew = index in $rLL_new of the new token
10024              
10025             # This operation is a little tricky because we are creating a new token and
10026             # we have to take care to follow the requested whitespace rules.
10027              
10028 11         19 my $Ktop = @{$rLL_new} - 1;
  11         24  
10029 11   66     53 my $top_is_space = $Ktop >= 0 && $rLL_new->[$Ktop]->[_TYPE_] eq 'b';
10030 11         22 my $Knew;
10031 11 100 66     37 if ( $top_is_space && $want_left_space{$type} == WS_NO ) {
10032              
10033             #----------------------------------------------------
10034             # Method 1: Convert the top blank into the new token.
10035             #----------------------------------------------------
10036              
10037             # Be Careful: we are working on the top of the new stack, on a token
10038             # which has been stored.
10039              
10040 2         12 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
10041              
10042 2         4 $Knew = $Ktop;
10043 2         4 $rLL_new->[$Knew]->[_TOKEN_] = $token;
10044 2         6 $rLL_new->[$Knew]->[_TOKEN_LENGTH_] = length($token);
10045 2         5 $rLL_new->[$Knew]->[_TYPE_] = $type;
10046              
10047             # NOTE: we are changing the output stack without updating variables
10048             # $last_nonblank_code_type, etc. Future needs might require that
10049             # those variables be updated here. For now, we just update the
10050             # type counts as necessary.
10051              
10052 2 50       7 if ( $is_counted_type{$type} ) {
10053 2         6 my $seqno = $seqno_stack{ $depth_next - 1 };
10054 2 50       5 if ($seqno) {
10055 2         5 $self->[_rtype_count_by_seqno_]->{$seqno}->{$type}++;
10056             }
10057             }
10058              
10059             # Then store a new blank
10060 2         9 $self->store_token($rcopy);
10061             }
10062             else {
10063              
10064             #----------------------------------------
10065             # Method 2: Use the normal storage method
10066             #----------------------------------------
10067              
10068             # Patch for issue c078: keep line indexes in order. If the top
10069             # token is a space that we are keeping (due to '-wls=...) then
10070             # we have to check that old line indexes stay in order.
10071             # In very rare
10072             # instances in which side comments have been deleted and converted
10073             # into blanks, we may have filtered down multiple blanks into just
10074             # one. In that case the top blank may have a higher line number
10075             # than the previous nonblank token. Although the line indexes of
10076             # blanks are not really significant, we need to keep them in order
10077             # in order to pass error checks.
10078 9 50       26 if ($top_is_space) {
10079 0         0 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
10080 0         0 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
10081 0 0       0 if ( $new_top_ix < $old_top_ix ) {
10082 0         0 $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
10083             }
10084             }
10085              
10086 9         35 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], $type, $token );
10087 9         28 $self->store_token($rcopy);
10088 9         21 $Knew = @{$rLL_new} - 1;
  9         28  
10089             }
10090 11         32 return $Knew;
10091             } ## end sub store_new_token
10092              
10093             sub check_Q {
10094              
10095             # Check that a quote looks okay, and report possible problems
10096             # to the logfile.
10097              
10098 1     1 0 5 my ( $self, $KK, $Kfirst, $line_number ) = @_;
10099 1         3 my $token = $rLL->[$KK]->[_TOKEN_];
10100 1 50       24 if ( $token =~ /\t/ ) {
10101 0         0 $self->note_embedded_tab($line_number);
10102             }
10103              
10104             # The remainder of this routine looks for something like
10105             # '$var = s/xxx/yyy/;'
10106             # in case it should have been '$var =~ s/xxx/yyy/;'
10107              
10108             # Start by looking for a token beginning with one of: s y m / tr
10109             return
10110 1 50 33     18 unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
10111             || substr( $token, 0, 2 ) eq 'tr' );
10112              
10113             # ... and preceded by one of: = == !=
10114 0         0 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
10115 0 0       0 return unless ( defined($Kp) );
10116 0         0 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
10117 0 0       0 return unless ( $is_unexpected_equals{$previous_nonblank_type} );
10118 0         0 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
10119              
10120 0         0 my $previous_nonblank_type_2 = 'b';
10121 0         0 my $previous_nonblank_token_2 = EMPTY_STRING;
10122 0         0 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
10123 0 0       0 if ( defined($Kpp) ) {
10124 0         0 $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
10125 0         0 $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
10126             }
10127              
10128 0         0 my $next_nonblank_token = EMPTY_STRING;
10129 0         0 my $Kn = $KK + 1;
10130 0         0 my $Kmax = @{$rLL} - 1;
  0         0  
10131 0 0 0     0 if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
  0         0  
10132 0 0       0 if ( $Kn <= $Kmax ) {
10133 0         0 $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
10134             }
10135              
10136 0         0 my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
10137 0         0 my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
10138              
10139 0 0 0     0 if (
      0        
      0        
      0        
10140              
10141             # preceded by simple scalar
10142             $previous_nonblank_type_2 eq 'i'
10143             && $previous_nonblank_token_2 =~ /^\$/
10144              
10145             # followed by some kind of termination
10146             # (but give complaint if we can not see far enough ahead)
10147             && $next_nonblank_token =~ /^[; \)\}]$/
10148              
10149             # scalar is not declared
10150             ## =~ /^(my|our|local)$/
10151             && !( $type_0 eq 'k' && $is_my_our_local{$token_0} )
10152             )
10153             {
10154 0         0 my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
10155 0         0 my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
10156 0         0 complain(
10157             "Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
10158             );
10159             }
10160 0         0 return;
10161             } ## end sub check_Q
10162              
10163             } ## end closure respace_tokens
10164              
10165             sub copy_token_as_type {
10166              
10167             # This provides a quick way to create a new token by
10168             # slightly modifying an existing token.
10169 293     293 0 769 my ( $rold_token, $type, $token ) = @_;
10170 293 50       843 if ( !defined($token) ) {
10171 0 0       0 if ( $type eq 'b' ) {
    0          
    0          
    0          
    0          
10172 0         0 $token = SPACE;
10173             }
10174             elsif ( $type eq 'q' ) {
10175 0         0 $token = EMPTY_STRING;
10176             }
10177             elsif ( $type eq '->' ) {
10178 0         0 $token = '->';
10179             }
10180             elsif ( $type eq ';' ) {
10181 0         0 $token = ';';
10182             }
10183             elsif ( $type eq ',' ) {
10184 0         0 $token = ',';
10185             }
10186             else {
10187              
10188             # Unexpected type ... this sub will work as long as both $token and
10189             # $type are defined, but we should catch any unexpected types during
10190             # development.
10191 0         0 if (DEVEL_MODE) {
10192             Fault(<<EOM);
10193             sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
10194             EOM
10195             }
10196              
10197             # Shouldn't get here
10198 0         0 $token = $type;
10199             }
10200             }
10201              
10202 293         509 my @rnew_token = @{$rold_token};
  293         1291  
10203 293         687 $rnew_token[_TYPE_] = $type;
10204 293         502 $rnew_token[_TOKEN_] = $token;
10205 293         527 $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING;
10206 293         772 return \@rnew_token;
10207             } ## end sub copy_token_as_type
10208              
10209             sub K_next_code {
10210 513     513 0 1297 my ( $self, $KK, $rLL ) = @_;
10211              
10212             # return the index K of the next nonblank, non-comment token
10213 513 50 33     2304 return unless ( defined($KK) && $KK >= 0 );
10214              
10215             # use the standard array unless given otherwise
10216 513 50       1532 $rLL = $self->[_rLL_] unless ( defined($rLL) );
10217 513         792 my $Num = @{$rLL};
  513         965  
10218 513         994 my $Knnb = $KK + 1;
10219 513         1490 while ( $Knnb < $Num ) {
10220 878 50       2057 if ( !defined( $rLL->[$Knnb] ) ) {
10221              
10222             # We seem to have encountered a gap in our array.
10223             # This shouldn't happen because sub write_line() pushed
10224             # items into the $rLL array.
10225 0         0 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
10226 0         0 return;
10227             }
10228 878 100 100     3128 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
10229             && $rLL->[$Knnb]->[_TYPE_] ne '#' )
10230             {
10231 502         1269 return $Knnb;
10232             }
10233 376         833 $Knnb++;
10234             }
10235 11         72 return;
10236             } ## end sub K_next_code
10237              
10238             sub K_next_nonblank {
10239 541     541 0 1287 my ( $self, $KK, $rLL ) = @_;
10240              
10241             # return the index K of the next nonblank token, or
10242             # return undef if none
10243 541 50 33     2189 return unless ( defined($KK) && $KK >= 0 );
10244              
10245             # The third arg allows this routine to be used on any array. This is
10246             # useful in sub respace_tokens when we are copying tokens from an old $rLL
10247             # to a new $rLL array. But usually the third arg will not be given and we
10248             # will just use the $rLL array in $self.
10249 541 100       1484 $rLL = $self->[_rLL_] unless ( defined($rLL) );
10250 541         816 my $Num = @{$rLL};
  541         927  
10251 541         993 my $Knnb = $KK + 1;
10252 541 100       1386 return unless ( $Knnb < $Num );
10253 540 100       1465 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
10254 454 50       1182 return unless ( ++$Knnb < $Num );
10255 454 50       1682 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
10256              
10257             # Backup loop. Very unlikely to get here; it means we have neighboring
10258             # blanks in the token stream.
10259 0         0 $Knnb++;
10260 0         0 while ( $Knnb < $Num ) {
10261              
10262             # Safety check, this fault shouldn't happen: The $rLL array is the
10263             # main array of tokens, so all entries should be used. It is
10264             # initialized in sub write_line, and then re-initialized by sub
10265             # store_token() within sub respace_tokens. Tokens are pushed on
10266             # so there shouldn't be any gaps.
10267 0 0       0 if ( !defined( $rLL->[$Knnb] ) ) {
10268 0         0 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
10269 0         0 return;
10270             }
10271 0 0       0 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
  0         0  
10272 0         0 $Knnb++;
10273             }
10274 0         0 return;
10275             } ## end sub K_next_nonblank
10276              
10277             sub K_previous_code {
10278              
10279             # return the index K of the previous nonblank, non-comment token
10280             # Call with $KK=undef to start search at the top of the array
10281 2504     2504 0 5167 my ( $self, $KK, $rLL ) = @_;
10282              
10283             # use the standard array unless given otherwise
10284 2504 100       5853 $rLL = $self->[_rLL_] unless ( defined($rLL) );
10285 2504         3569 my $Num = @{$rLL};
  2504         4142  
10286 2504 100       8349 if ( !defined($KK) ) { $KK = $Num }
  1 50       6  
10287             elsif ( $KK > $Num ) {
10288              
10289             # This fault can be caused by a programming error in which a bad $KK is
10290             # given. The caller should make the first call with KK_new=undef to
10291             # avoid this error.
10292 0         0 Fault(
10293             "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
10294             ) if (DEVEL_MODE);
10295 0         0 return;
10296             }
10297 2504         3847 my $Kpnb = $KK - 1;
10298 2504         5218 while ( $Kpnb >= 0 ) {
10299 3563 100 100     11561 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
10300             && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
10301             {
10302 2501         5583 return $Kpnb;
10303             }
10304 1062         2151 $Kpnb--;
10305             }
10306 3         11 return;
10307             } ## end sub K_previous_code
10308              
10309             sub K_previous_nonblank {
10310              
10311             # return index of previous nonblank token before item K;
10312             # Call with $KK=undef to start search at the top of the array
10313 775     775 0 1776 my ( $self, $KK, $rLL ) = @_;
10314              
10315             # use the standard array unless given otherwise
10316 775 100       2041 $rLL = $self->[_rLL_] unless ( defined($rLL) );
10317 775         1170 my $Num = @{$rLL};
  775         1493  
10318 775 100       2264 if ( !defined($KK) ) { $KK = $Num }
  389 50       755  
10319             elsif ( $KK > $Num ) {
10320              
10321             # This fault can be caused by a programming error in which a bad $KK is
10322             # given. The caller should make the first call with KK_new=undef to
10323             # avoid this error.
10324 0         0 Fault(
10325             "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
10326             ) if (DEVEL_MODE);
10327 0         0 return;
10328             }
10329 775         1357 my $Kpnb = $KK - 1;
10330 775 100       1923 return unless ( $Kpnb >= 0 );
10331 766 100       2292 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
10332 534 50       1395 return unless ( --$Kpnb >= 0 );
10333 534 50       1801 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
10334              
10335             # Backup loop. We should not get here unless some routine
10336             # slipped repeated blanks into the token stream.
10337 0 0       0 return unless ( --$Kpnb >= 0 );
10338 0         0 while ( $Kpnb >= 0 ) {
10339 0 0       0 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
  0         0  
10340 0         0 $Kpnb--;
10341             }
10342 0         0 return;
10343             } ## end sub K_previous_nonblank
10344              
10345             sub parent_seqno_by_K {
10346              
10347             # Return the sequence number of the parent container of token K, if any.
10348              
10349 208     208 0 346 my ( $self, $KK ) = @_;
10350 208         336 my $rLL = $self->[_rLL_];
10351              
10352             # The task is to jump forward to the next container token
10353             # and use the sequence number of either it or its parent.
10354              
10355             # For example, consider the following with seqno=5 of the '[' and ']'
10356             # being called with index K of the first token of each line:
10357              
10358             # # result
10359             # push @tests, # -
10360             # [ # -
10361             # sub { 99 }, 'do {&{%s} for 1,2}', # 5
10362             # '(&{})(&{})', undef, # 5
10363             # [ 2, 2, 0 ], 0 # 5
10364             # ]; # -
10365              
10366             # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
10367             # unbalanced files, last sequence number will either be undefined or it may
10368             # be at a deeper level. In either case we will just return SEQ_ROOT to
10369             # have a defined value and allow formatting to proceed.
10370 208         342 my $parent_seqno = SEQ_ROOT;
10371 208         408 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
10372 208 100       377 if ($type_sequence) {
10373 63         151 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
10374             }
10375             else {
10376 145         212 my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
10377 145 100       283 if ( defined($Kt) ) {
10378 122         236 $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
10379 122         232 my $type = $rLL->[$Kt]->[_TYPE_];
10380              
10381             # if next container token is closing, it is the parent seqno
10382 122 100       202 if ( $is_closing_type{$type} ) {
10383 19         33 $parent_seqno = $type_sequence;
10384             }
10385              
10386             # otherwise we want its parent container
10387             else {
10388 103         184 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
10389             }
10390             }
10391             }
10392 208 50       438 $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
10393 208         503 return $parent_seqno;
10394             } ## end sub parent_seqno_by_K
10395              
10396             sub is_in_block_by_i {
10397 316     316 0 961 my ( $self, $i ) = @_;
10398              
10399             # returns true if
10400             # token at i is contained in a BLOCK
10401             # or is at root level
10402             # or there is some kind of error (i.e. unbalanced file)
10403             # returns false otherwise
10404              
10405 316 50       1024 if ( $i < 0 ) {
10406 0         0 DEVEL_MODE && Fault("Bad call, i='$i'\n");
10407 0         0 return 1;
10408             }
10409              
10410 316         775 my $seqno = $parent_seqno_to_go[$i];
10411 316 100 66     3101 return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
10412 141 100       622 return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
10413 107         419 return;
10414             } ## end sub is_in_block_by_i
10415              
10416             sub is_in_list_by_i {
10417 1766     1766 0 4076 my ( $self, $i ) = @_;
10418              
10419             # returns true if token at i is contained in a LIST
10420             # returns false otherwise
10421 1766         3296 my $seqno = $parent_seqno_to_go[$i];
10422 1766 100 66     10039 return unless ( $seqno && $seqno ne SEQ_ROOT );
10423 588 100       1888 if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
10424 157         666 return 1;
10425             }
10426 431         1717 return;
10427             } ## end sub is_in_list_by_i
10428              
10429             sub is_list_by_K {
10430              
10431             # Return true if token K is in a list
10432 165     165 0 294 my ( $self, $KK ) = @_;
10433              
10434 165         319 my $parent_seqno = $self->parent_seqno_by_K($KK);
10435 165 50       345 return unless defined($parent_seqno);
10436 165         343 return $self->[_ris_list_by_seqno_]->{$parent_seqno};
10437             } ## end sub is_list_by_K
10438              
10439             sub is_list_by_seqno {
10440              
10441             # Return true if the immediate contents of a container appears to be a
10442             # list.
10443 46     46 0 81 my ( $self, $seqno ) = @_;
10444 46 50       95 return unless defined($seqno);
10445 46         105 return $self->[_ris_list_by_seqno_]->{$seqno};
10446             } ## end sub is_list_by_seqno
10447              
10448             sub resync_lines_and_tokens {
10449              
10450 552     552 0 1367 my $self = shift;
10451              
10452             # Re-construct the arrays of tokens associated with the original input
10453             # lines since they have probably changed due to inserting and deleting
10454             # blanks and a few other tokens.
10455              
10456             # Return parameters:
10457             # set severe_error = true if processing needs to terminate
10458 552         1111 my $severe_error;
10459 552         1482 my $rqw_lines = [];
10460              
10461 552         1515 my $rLL = $self->[_rLL_];
10462 552         1413 my $Klimit = $self->[_Klimit_];
10463 552         1354 my $rlines = $self->[_rlines_];
10464 552         1327 my @Krange_code_without_comments;
10465             my @Klast_valign_code;
10466              
10467             # This is the next token and its line index:
10468 552         1227 my $Knext = 0;
10469 552 100       1825 my $Kmax = defined($Klimit) ? $Klimit : -1;
10470              
10471             # Verify that old line indexes are in still order. If this error occurs,
10472             # check locations where sub 'respace_tokens' creates new tokens (like
10473             # blank spaces). It must have set a bad old line index.
10474 552         1124 if ( DEVEL_MODE && defined($Klimit) ) {
10475             my $iline = $rLL->[0]->[_LINE_INDEX_];
10476             foreach my $KK ( 1 .. $Klimit ) {
10477             my $iline_last = $iline;
10478             $iline = $rLL->[$KK]->[_LINE_INDEX_];
10479             if ( $iline < $iline_last ) {
10480             my $KK_m = $KK - 1;
10481             my $token_m = $rLL->[$KK_m]->[_TOKEN_];
10482             my $token = $rLL->[$KK]->[_TOKEN_];
10483             my $type_m = $rLL->[$KK_m]->[_TYPE_];
10484             my $type = $rLL->[$KK]->[_TYPE_];
10485             Fault(<<EOM);
10486             Line indexes out of order at index K=$KK:
10487             at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
10488             at KK =$KK: old line=$iline, type='$type', token='$token',
10489             EOM
10490             }
10491             }
10492             }
10493              
10494 552         1390 my $iline = -1;
10495 552         1137 foreach my $line_of_tokens ( @{$rlines} ) {
  552         1708  
10496 7628         10480 $iline++;
10497 7628         12891 my $line_type = $line_of_tokens->{_line_type};
10498 7628 100       14232 if ( $line_type eq 'CODE' ) {
10499              
10500             # Get the old number of tokens on this line
10501 7459         10983 my $rK_range_old = $line_of_tokens->{_rK_range};
10502 7459         9582 my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
  7459         14073  
10503 7459         10577 my $Kdiff_old = 0;
10504 7459 100       13607 if ( defined($Kfirst_old) ) {
10505 6656         9311 $Kdiff_old = $Klast_old - $Kfirst_old;
10506             }
10507              
10508             # Find the range of NEW K indexes for the line:
10509             # $Kfirst = index of first token on line
10510             # $Klast = index of last token on line
10511 7459         10296 my ( $Kfirst, $Klast );
10512              
10513 7459         10089 my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
10514              
10515             # Optimization: Although the actual K indexes may be completely
10516             # changed after respacing, the number of tokens on any given line
10517             # will often be nearly unchanged. So we will see if we can start
10518             # our search by guessing that the new line has the same number
10519             # of tokens as the old line.
10520 7459         10217 my $Knext_guess = $Knext + $Kdiff_old;
10521 7459 100 100     27262 if ( $Knext_guess > $Knext
      100        
10522             && $Knext_guess < $Kmax
10523             && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
10524             {
10525              
10526             # the guess is good, so we can start our search here
10527 4539         6693 $Knext = $Knext_guess + 1;
10528             }
10529              
10530 7459   100     22700 while ($Knext <= $Kmax
10531             && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
10532             {
10533 15982         43301 $Knext++;
10534             }
10535              
10536 7459 100       14216 if ( $Knext > $Knext_beg ) {
10537              
10538 6650         9055 $Klast = $Knext - 1;
10539              
10540             # Delete any terminal blank token
10541 6650 100       12891 if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
  5216         7275  
10542              
10543 6650 50       11240 if ( $Klast < $Knext_beg ) {
10544 0         0 $Klast = undef;
10545             }
10546             else {
10547              
10548 6650         8947 $Kfirst = $Knext_beg;
10549              
10550             # Save ranges of non-comment code. This will be used by
10551             # sub keep_old_line_breaks.
10552 6650 100       15406 if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
10553 5924         14575 push @Krange_code_without_comments, [ $Kfirst, $Klast ];
10554             }
10555              
10556             # Only save ending K indexes of code types which are blank
10557             # or 'VER'. These will be used for a convergence check.
10558             # See related code in sub 'convey_batch_to_vertical_aligner'
10559 6650         12122 my $CODE_type = $line_of_tokens->{_code_type};
10560 6650 100 100     15826 if ( !$CODE_type
10561             || $CODE_type eq 'VER' )
10562             {
10563 5742         9671 push @Klast_valign_code, $Klast;
10564             }
10565             }
10566             }
10567              
10568             # It is only safe to trim the actual line text if the input
10569             # line had a terminal blank token. Otherwise, we may be
10570             # in a quote.
10571 7459 100       15583 if ( $line_of_tokens->{_ended_in_blank_token} ) {
10572 145         1032 $line_of_tokens->{_line_text} =~ s/\s+$//;
10573             }
10574 7459         16134 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
10575              
10576             # Deleting semicolons can create new empty code lines
10577             # which should be marked as blank
10578 7459 100       13810 if ( !defined($Kfirst) ) {
10579 809         1942 my $CODE_type = $line_of_tokens->{_code_type};
10580 809 100       2906 if ( !$CODE_type ) {
10581 1         4 $line_of_tokens->{_code_type} = 'BL';
10582             }
10583             }
10584             else {
10585              
10586             #---------------------------------------------------
10587             # save indexes of all lines with a 'q' at either end
10588             # for later use by sub find_multiline_qw
10589             #---------------------------------------------------
10590 6650 100 100     27221 if ( $rLL->[$Kfirst]->[_TYPE_] eq 'q'
10591             || $rLL->[$Klast]->[_TYPE_] eq 'q' )
10592             {
10593 227         430 push @{$rqw_lines}, $iline;
  227         603  
10594             }
10595             }
10596             }
10597             }
10598              
10599             # There shouldn't be any nodes beyond the last one. This routine is
10600             # relinking lines and tokens after the tokens have been respaced. A fault
10601             # here indicates some kind of bug has been introduced into the above loops.
10602             # There is not good way to keep going; we better stop here.
10603 552 50       3760 if ( $Knext <= $Kmax ) {
10604 0         0 Fault_Warn(
10605             "unexpected tokens at end of file when reconstructing lines");
10606 0         0 $severe_error = 1;
10607 0         0 return ( $severe_error, $rqw_lines );
10608             }
10609 552         1928 $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
10610              
10611             # Setup the convergence test in the FileWriter based on line-ending indexes
10612 552         1333 my $file_writer_object = $self->[_file_writer_object_];
10613 552         4116 $file_writer_object->setup_convergence_test( \@Klast_valign_code );
10614              
10615 552         2201 return ( $severe_error, $rqw_lines );
10616              
10617             } ## end sub resync_lines_and_tokens
10618              
10619             sub check_for_old_break {
10620 32     32 0 48 my ( $self, $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
10621              
10622             # This sub is called to help implement flags:
10623             # --keep-old-breakpoints-before and --keep-old-breakpoints-after
10624             # Given:
10625             # $KK = index of a token,
10626             # $rkeep_break_hash = user control for --keep-old-...
10627             # $rbreak_hash = hash of tokens where breaks are requested
10628             # Set $rbreak_hash as follows if a user break is requested:
10629             # = 1 make a hard break (flush the current batch)
10630             # best for something like leading commas (-kbb=',')
10631             # = 2 make a soft break (keep building current batch)
10632             # best for something like leading ->
10633              
10634 32         54 my $rLL = $self->[_rLL_];
10635              
10636 32         47 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
10637              
10638             # non-container tokens use the type as the key
10639 32 100       53 if ( !$seqno ) {
10640 25         37 my $type = $rLL->[$KK]->[_TYPE_];
10641 25 100       68 if ( $rkeep_break_hash->{$type} ) {
10642 7 50       31 $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
10643             }
10644             }
10645              
10646             # container tokens use the token as the key
10647             else {
10648 7         14 my $token = $rLL->[$KK]->[_TOKEN_];
10649 7         12 my $flag = $rkeep_break_hash->{$token};
10650 7 50       13 if ($flag) {
10651              
10652 0   0     0 my $match = $flag eq '1' || $flag eq '*';
10653              
10654             # check for special matching codes
10655 0 0       0 if ( !$match ) {
10656 0 0 0     0 if ( $token eq '(' || $token eq ')' ) {
    0 0        
10657 0         0 $match = $self->match_paren_control_flag( $seqno, $flag );
10658             }
10659             elsif ( $token eq '{' || $token eq '}' ) {
10660              
10661             # These tentative codes 'b' and 'B' for brace types are
10662             # placeholders for possible future brace types. They
10663             # are not documented and may be changed.
10664 0         0 my $block_type = $self->[_rblock_type_of_seqno_]->{$seqno};
10665 0 0       0 if ( $flag eq 'b' ) { $match = $block_type }
  0 0       0  
10666 0         0 elsif ( $flag eq 'B' ) { $match = !$block_type }
10667             else {
10668             # unknown code - no match
10669             }
10670             }
10671             }
10672 0 0       0 if ($match) {
10673 0         0 my $type = $rLL->[$KK]->[_TYPE_];
10674 0 0       0 $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
10675             }
10676             }
10677             }
10678 32         53 return;
10679             } ## end sub check_for_old_break
10680              
10681             sub keep_old_line_breaks {
10682              
10683             # Called once per file to find and mark any old line breaks which
10684             # should be kept. We will be translating the input hashes into
10685             # token indexes.
10686              
10687             # A flag is set as follows:
10688             # = 1 make a hard break (flush the current batch)
10689             # best for something like leading commas (-kbb=',')
10690             # = 2 make a soft break (keep building current batch)
10691             # best for something like leading ->
10692              
10693 555     555 0 1478 my ($self) = @_;
10694              
10695 555         1500 my $rLL = $self->[_rLL_];
10696 555         1331 my $rKrange_code_without_comments =
10697             $self->[_rKrange_code_without_comments_];
10698 555         1271 my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
10699 555         1264 my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
10700 555         1297 my $rbreak_container = $self->[_rbreak_container_];
10701              
10702             #----------------------------------------
10703             # Apply --break-at-old-method-breakpoints
10704             #----------------------------------------
10705              
10706             # This code moved here from sub break_lists to fix b1120
10707 555 100       2267 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
10708 2         7 foreach my $item ( @{$rKrange_code_without_comments} ) {
  2         6  
10709 16         23 my ( $Kfirst, $Klast ) = @{$item};
  16         31  
10710 16         32 my $type = $rLL->[$Kfirst]->[_TYPE_];
10711 16         34 my $token = $rLL->[$Kfirst]->[_TOKEN_];
10712              
10713             # leading '->' use a value of 2 which causes a soft
10714             # break rather than a hard break
10715 16 100       48 if ( $type eq '->' ) {
    100          
10716 4         23 $rbreak_before_Kfirst->{$Kfirst} = 2;
10717             }
10718              
10719             # leading ')->' use a special flag to insure that both
10720             # opening and closing parens get opened
10721             # Fix for b1120: only for parens, not braces
10722             elsif ( $token eq ')' ) {
10723 2         11 my $Kn = $self->K_next_nonblank($Kfirst);
10724             next
10725 2 50 33     26 unless ( defined($Kn)
      33        
10726             && $Kn <= $Klast
10727             && $rLL->[$Kn]->[_TYPE_] eq '->' );
10728 2         6 my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
10729 2 50       5 next unless ($seqno);
10730              
10731             # Note: in previous versions there was a fix here to avoid
10732             # instability between conflicting -bom and -pvt or -pvtc flags.
10733             # The fix skipped -bom for a small line difference. But this
10734             # was troublesome, and instead the fix has been moved to
10735             # sub set_vertical_tightness_flags where priority is given to
10736             # the -bom flag over -pvt and -pvtc flags. Both opening and
10737             # closing paren flags are involved because even though -bom only
10738             # requests breaking before the closing paren, automated logic
10739             # opens the opening paren when the closing paren opens.
10740             # Relevant cases are b977, b1215, b1270, b1303
10741              
10742 2         6 $rbreak_container->{$seqno} = 1;
10743             }
10744             }
10745             }
10746              
10747             #---------------------------------------------------------------------
10748             # Apply --keep-old-breakpoints-before and --keep-old-breakpoints-after
10749             #---------------------------------------------------------------------
10750              
10751 555 100 66     3373 return unless ( %keep_break_before_type || %keep_break_after_type );
10752              
10753 1         5 foreach my $item ( @{$rKrange_code_without_comments} ) {
  1         5  
10754 16         21 my ( $Kfirst, $Klast ) = @{$item};
  16         29  
10755 16         40 $self->check_for_old_break( $Kfirst, \%keep_break_before_type,
10756             $rbreak_before_Kfirst );
10757 16         30 $self->check_for_old_break( $Klast, \%keep_break_after_type,
10758             $rbreak_after_Klast );
10759             }
10760 1         3 return;
10761             } ## end sub keep_old_line_breaks
10762              
10763             sub weld_containers {
10764              
10765             # Called once per file to do any welding operations requested by --weld*
10766             # flags.
10767 555     555 0 1551 my ($self) = @_;
10768              
10769             # This count is used to eliminate needless calls for weld checks elsewhere
10770 555         1298 $total_weld_count = 0;
10771              
10772 555 100       1839 return if ( $rOpts->{'indent-only'} );
10773 552 100       1789 return unless ($rOpts_add_newlines);
10774              
10775             # Important: sub 'weld_cuddled_blocks' must be called before
10776             # sub 'weld_nested_containers'. This is because the cuddled option needs to
10777             # use the original _LEVEL_ values of containers, but the weld nested
10778             # containers changes _LEVEL_ of welded containers.
10779              
10780             # Here is a good test case to be sure that both cuddling and welding
10781             # are working and not interfering with each other: <<snippets/ce_wn1.in>>
10782              
10783             # perltidy -wn -ce
10784              
10785             # if ($BOLD_MATH) { (
10786             # $labels, $comment,
10787             # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
10788             # ) } else { (
10789             # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
10790             # $after
10791             # ) }
10792              
10793 546 100       1163 $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
  546         1916  
10794              
10795 546 100       2238 if ( $rOpts->{'weld-nested-containers'} ) {
10796              
10797 23         179 $self->weld_nested_containers();
10798              
10799 23         168 $self->weld_nested_quotes();
10800             }
10801              
10802             #-------------------------------------------------------------
10803             # All welding is done. Finish setting up weld data structures.
10804             #-------------------------------------------------------------
10805              
10806 546         1612 my $rLL = $self->[_rLL_];
10807 546         1304 my $rK_weld_left = $self->[_rK_weld_left_];
10808 546         1307 my $rK_weld_right = $self->[_rK_weld_right_];
10809 546         1153 my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
10810              
10811 546         1478 my @K_multi_weld;
10812 546         1116 my @keys = keys %{$rK_weld_right};
  546         1951  
10813 546         1462 $total_weld_count = @keys;
10814              
10815             # First pass to process binary welds.
10816             # This loop is processed in unsorted order for efficiency.
10817 546         1992 foreach my $Kstart (@keys) {
10818 110         199 my $Kend = $rK_weld_right->{$Kstart};
10819              
10820             # An error here would be due to an incorrect initialization introduced
10821             # in one of the above weld routines, like sub weld_nested.
10822 110 50       366 if ( $Kend <= $Kstart ) {
10823 0         0 Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
10824             if (DEVEL_MODE);
10825 0         0 next;
10826             }
10827              
10828             # Set weld values for all tokens this welded pair
10829 110         340 foreach ( $Kstart + 1 .. $Kend ) {
10830 265         804 $rK_weld_left->{$_} = $Kstart;
10831             }
10832 110         343 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
10833 265         540 $rK_weld_right->{$Kx} = $Kend;
10834 265         634 $rweld_len_right_at_K->{$Kx} =
10835             $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
10836             $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
10837             }
10838              
10839             # Remember the leftmost index of welds which continue to the right
10840 110 100 100     454 if ( defined( $rK_weld_right->{$Kend} )
10841             && !defined( $rK_weld_left->{$Kstart} ) )
10842             {
10843 17         58 push @K_multi_weld, $Kstart;
10844             }
10845             }
10846              
10847             # Second pass to process chains of welds (these are rare).
10848             # This has to be processed in sorted order.
10849 546 100       2216 if (@K_multi_weld) {
10850 9         29 my $Kend = -1;
10851 9         51 foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
  8         36  
10852              
10853             # Skip any interior K which was originally missing a left link
10854 17 50       56 next if ( $Kstart <= $Kend );
10855              
10856             # Find the end of this chain
10857 17         45 $Kend = $rK_weld_right->{$Kstart};
10858 17         39 my $Knext = $rK_weld_right->{$Kend};
10859 17         52 while ( defined($Knext) ) {
10860 19         36 $Kend = $Knext;
10861 19         50 $Knext = $rK_weld_right->{$Kend};
10862             }
10863              
10864             # Set weld values this chain
10865 17         51 foreach ( $Kstart + 1 .. $Kend ) {
10866 79         176 $rK_weld_left->{$_} = $Kstart;
10867             }
10868 17         58 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
10869 79         147 $rK_weld_right->{$Kx} = $Kend;
10870 79         195 $rweld_len_right_at_K->{$Kx} =
10871             $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
10872             $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
10873             }
10874             }
10875             }
10876              
10877 546         1410 return;
10878             } ## end sub weld_containers
10879              
10880             sub cumulative_length_before_K {
10881 59     59 0 137 my ( $self, $KK ) = @_;
10882              
10883             # Returns the cumulative character length from the first token to
10884             # token before the token at index $KK.
10885 59         130 my $rLL = $self->[_rLL_];
10886 59 50       241 return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
10887             }
10888              
10889             sub weld_cuddled_blocks {
10890 12     12 0 41 my ($self) = @_;
10891              
10892             # Called once per file to handle cuddled formatting
10893              
10894 12         54 my $rK_weld_left = $self->[_rK_weld_left_];
10895 12         32 my $rK_weld_right = $self->[_rK_weld_right_];
10896 12         33 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10897              
10898             # This routine implements the -cb flag by finding the appropriate
10899             # closing and opening block braces and welding them together.
10900 12 50       24 return unless ( %{$rcuddled_block_types} );
  12         55  
10901              
10902 12         34 my $rLL = $self->[_rLL_];
10903 12 50 33     58 return unless ( defined($rLL) && @{$rLL} );
  12         51  
10904              
10905 12         39 my $rbreak_container = $self->[_rbreak_container_];
10906 12         37 my $ris_broken_container = $self->[_ris_broken_container_];
10907 12         29 my $ris_cuddled_closing_brace = $self->[_ris_cuddled_closing_brace_];
10908 12         63 my $K_closing_container = $self->[_K_closing_container_];
10909              
10910             # A stack to remember open chains at all levels: This is a hash rather than
10911             # an array for safety because negative levels can occur in files with
10912             # errors. This allows us to keep processing with negative levels.
10913             # $in_chain{$level} = [$chain_type, $type_sequence];
10914 12         31 my %in_chain;
10915 12         44 my $CBO = $rOpts->{'cuddled-break-option'};
10916              
10917             # loop over structure items to find cuddled pairs
10918 12         38 my $level = 0;
10919 12         31 my $KNEXT = $self->[_K_first_seq_item_];
10920 12         71 while ( defined($KNEXT) ) {
10921 394         518 my $KK = $KNEXT;
10922 394         1627 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
10923 394         498 my $rtoken_vars = $rLL->[$KK];
10924 394         563 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
10925 394 50       657 if ( !$type_sequence ) {
10926 0 0       0 next if ( $KK == 0 ); # first token in file may not be container
10927              
10928             # A fault here implies that an error was made in the little loop at
10929             # the bottom of sub 'respace_tokens' which set the values of
10930             # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
10931             # loop control lines above.
10932 0         0 Fault("sequence = $type_sequence not defined at K=$KK")
10933             if (DEVEL_MODE);
10934 0         0 next;
10935             }
10936              
10937             # NOTE: we must use the original levels here. They can get changed
10938             # by sub 'weld_nested_containers', so this routine must be called
10939             # before sub 'weld_nested_containers'.
10940 394         521 my $last_level = $level;
10941 394         554 $level = $rtoken_vars->[_LEVEL_];
10942              
10943 394 100       786 if ( $level < $last_level ) { $in_chain{$last_level} = undef }
  72 100       184  
10944 72         197 elsif ( $level > $last_level ) { $in_chain{$level} = undef }
10945              
10946             # We are only looking at code blocks
10947 394         564 my $token = $rtoken_vars->[_TOKEN_];
10948 394         554 my $type = $rtoken_vars->[_TYPE_];
10949 394 100       807 next unless ( $type eq $token );
10950              
10951 218 100       622 if ( $token eq '{' ) {
    100          
10952              
10953 65         154 my $block_type = $rblock_type_of_seqno->{$type_sequence};
10954 65 50       128 if ( !$block_type ) {
10955              
10956             # patch for unrecognized block types which may not be labeled
10957 0         0 my $Kp = $self->K_previous_nonblank($KK);
10958 0   0     0 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
10959 0         0 $Kp = $self->K_previous_nonblank($Kp);
10960             }
10961 0 0       0 next unless $Kp;
10962 0         0 $block_type = $rLL->[$Kp]->[_TOKEN_];
10963             }
10964 65 100       155 if ( $in_chain{$level} ) {
10965              
10966             # we are in a chain and are at an opening block brace.
10967             # See if we are welding this opening brace with the previous
10968             # block brace. Get their identification numbers:
10969 18         59 my $closing_seqno = $in_chain{$level}->[1];
10970 18         47 my $opening_seqno = $type_sequence;
10971              
10972             # The preceding block must be on multiple lines so that its
10973             # closing brace will start a new line.
10974 18 0 33     78 if ( !$ris_broken_container->{$closing_seqno}
10975             && !$rbreak_container->{$closing_seqno} )
10976             {
10977 0 0       0 next unless ( $CBO == 2 );
10978 0         0 $rbreak_container->{$closing_seqno} = 1;
10979             }
10980              
10981             # We can weld the closing brace to its following word ..
10982 18         43 my $Ko = $K_closing_container->{$closing_seqno};
10983 18         27 my $Kon;
10984 18 50       51 if ( defined($Ko) ) {
10985 18         60 $Kon = $self->K_next_nonblank($Ko);
10986             }
10987              
10988             # ..unless it is a comment
10989 18 50 33     104 if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
10990              
10991             # OK to weld these two tokens...
10992 18         71 $rK_weld_right->{$Ko} = $Kon;
10993 18         58 $rK_weld_left->{$Kon} = $Ko;
10994              
10995             # Set flag that we want to break the next container
10996             # so that the cuddled line is balanced.
10997 18 50       114 $rbreak_container->{$opening_seqno} = 1
10998             if ($CBO);
10999              
11000             # Remember which braces are cuddled.
11001             # The closing brace is used to set adjusted indentations.
11002             # The opening brace is not yet used but might eventually
11003             # be needed in setting adjusted indentation.
11004 18         61 $ris_cuddled_closing_brace->{$closing_seqno} = 1;
11005              
11006             }
11007              
11008             }
11009             else {
11010              
11011             # We are not in a chain. Start a new chain if we see the
11012             # starting block type.
11013 47 50       121 if ( $rcuddled_block_types->{$block_type} ) {
11014 0         0 $in_chain{$level} = [ $block_type, $type_sequence ];
11015             }
11016             else {
11017 47         90 $block_type = '*';
11018 47         211 $in_chain{$level} = [ $block_type, $type_sequence ];
11019             }
11020             }
11021             }
11022             elsif ( $token eq '}' ) {
11023 65 50       186 if ( $in_chain{$level} ) {
11024              
11025             # We are in a chain at a closing brace. See if this chain
11026             # continues..
11027 65         171 my $Knn = $self->K_next_code($KK);
11028 65 100       205 next unless $Knn;
11029              
11030 57         142 my $chain_type = $in_chain{$level}->[0];
11031 57         107 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
11032 57 100       174 if (
11033             $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
11034             )
11035             {
11036              
11037             # Note that we do not weld yet because we must wait until
11038             # we we are sure that an opening brace for this follows.
11039 18         63 $in_chain{$level}->[1] = $type_sequence;
11040             }
11041 39         118 else { $in_chain{$level} = undef }
11042             }
11043             }
11044             }
11045 12         55 return;
11046             } ## end sub weld_cuddled_blocks
11047              
11048             sub find_nested_pairs {
11049 23     23 0 71 my $self = shift;
11050              
11051             # This routine is called once per file to do preliminary work needed for
11052             # the --weld-nested option. This information is also needed for adding
11053             # semicolons.
11054              
11055 23         79 my $rLL = $self->[_rLL_];
11056 23 50 33     116 return unless ( defined($rLL) && @{$rLL} );
  23         116  
11057 23         59 my $Num = @{$rLL};
  23         62  
11058              
11059 23         65 my $K_opening_container = $self->[_K_opening_container_];
11060 23         73 my $K_closing_container = $self->[_K_closing_container_];
11061 23         59 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11062              
11063             # We define an array of pairs of nested containers
11064 23         52 my @nested_pairs;
11065              
11066             # Names of calling routines can either be marked as 'i' or 'w',
11067             # and they may invoke a sub call with an '->'. We will consider
11068             # any consecutive string of such types as a single unit when making
11069             # weld decisions. We also allow a leading !
11070 23         142 my $is_name_type = {
11071             'i' => 1,
11072             'w' => 1,
11073             'U' => 1,
11074             '->' => 1,
11075             '!' => 1,
11076             };
11077              
11078             # Loop over all closing container tokens
11079 23         55 foreach my $inner_seqno ( keys %{$K_closing_container} ) {
  23         156  
11080 248         394 my $K_inner_closing = $K_closing_container->{$inner_seqno};
11081              
11082             # See if it is immediately followed by another, outer closing token
11083 248         364 my $K_outer_closing = $K_inner_closing + 1;
11084 248 100 100     911 $K_outer_closing += 1
11085             if ( $K_outer_closing < $Num
11086             && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
11087              
11088 248 100       480 next unless ( $K_outer_closing < $Num );
11089 244         404 my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
11090 244 100       484 next unless ($outer_seqno);
11091 99         205 my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
11092 99 100       292 next unless ( $is_closing_token{$token_outer_closing} );
11093              
11094             # Simple filter: No commas or semicolons in the outer container
11095 77         177 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$outer_seqno};
11096 77 100       211 if ($rtype_count) {
11097 11 100 100     99 next if ( $rtype_count->{','} || $rtype_count->{';'} );
11098             }
11099              
11100             # Now we have to check the opening tokens.
11101 69         145 my $K_outer_opening = $K_opening_container->{$outer_seqno};
11102 69         138 my $K_inner_opening = $K_opening_container->{$inner_seqno};
11103 69 50 33     291 next unless defined($K_outer_opening) && defined($K_inner_opening);
11104              
11105 69         135 my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
11106 69         124 my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
11107              
11108             # Verify that the inner opening token is the next container after the
11109             # outer opening token.
11110 69         133 my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
11111 69 50       168 next unless defined($K_io_check);
11112 69 100       202 if ( $K_io_check != $K_inner_opening ) {
11113              
11114             # The inner opening container does not immediately follow the outer
11115             # opening container, but we may still allow a weld if they are
11116             # separated by a sub signature. For example, we may have something
11117             # like this, where $K_io_check may be at the first 'x' instead of
11118             # 'io'. So we need to hop over the signature and see if we arrive
11119             # at 'io'.
11120              
11121             # oo io
11122             # | x x |
11123             # $obj->then( sub ( $code ) {
11124             # ...
11125             # return $c->render(text => '', status => $code);
11126             # } );
11127             # | |
11128             # ic oc
11129              
11130 8 100 100     63 next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
11131 2 50       15 next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
11132 2         8 my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
11133 2 50       14 next unless defined($seqno_signature);
11134 2         12 my $K_signature_closing = $K_closing_container->{$seqno_signature};
11135 2 50       7 next unless defined($K_signature_closing);
11136 2         9 my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
11137             next
11138 2 50 33     19 unless ( defined($K_test) && $K_test == $K_inner_opening );
11139              
11140             # OK, we have arrived at 'io' in the above diagram. We should put
11141             # a limit on the length or complexity of the signature here. There
11142             # is no perfect way to do this, one way is to put a limit on token
11143             # count. For consistency with older versions, we should allow a
11144             # signature with a single variable to weld, but not with
11145             # multiple variables. A single variable as in 'sub ($code) {' can
11146             # have a $Kdiff of 2 to 4, depending on spacing.
11147              
11148             # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
11149             # 7, depending on spacing. So to keep formatting consistent with
11150             # previous versions, we will also avoid welding if there is a comma
11151             # in the signature.
11152              
11153 2         6 my $Kdiff = $K_signature_closing - $K_io_check;
11154 2 50       7 next if ( $Kdiff > 4 );
11155              
11156             # backup comma count test; but we cannot get here with Kdiff<=4
11157 2         6 my $rtc = $self->[_rtype_count_by_seqno_]->{$seqno_signature};
11158 2 0 33     9 next if ( $rtc && $rtc->{','} );
11159             }
11160              
11161             # Yes .. this is a possible nesting pair.
11162             # They can be separated by a small amount.
11163 63         121 my $K_diff = $K_inner_opening - $K_outer_opening;
11164              
11165             # Count the number of nonblank characters separating them.
11166             # Note: the $nonblank_count includes the inner opening container
11167             # but not the outer opening container, so it will be >= 1.
11168 63 50       176 if ( $K_diff < 0 ) { next } # Shouldn't happen
  0         0  
11169 63         102 my $nonblank_count = 0;
11170 63         112 my $type;
11171             my $is_name;
11172              
11173             # Here is an example of a long identifier chain which counts as a
11174             # single nonblank here (this spans about 10 K indexes):
11175             # if ( !Boucherot::SetOfConnections->new->handler->execute(
11176             # ^--K_o_o ^--K_i_o
11177             # @array) )
11178 63         98 my $Kn_first = $K_outer_opening;
11179 63         109 my $Kn_last_nonblank;
11180             my $saw_comment;
11181              
11182 63         163 foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) {
11183 198 100       459 next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
11184 117 100       267 if ( !$nonblank_count ) { $Kn_first = $Kn }
  64         105  
11185 117 100       289 if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
  60         107  
  60         122  
11186 57         104 $Kn_last_nonblank = $Kn;
11187              
11188             # skip chain of identifier tokens
11189 57         114 my $last_type = $type;
11190 57         87 my $last_is_name = $is_name;
11191 57         106 $type = $rLL->[$Kn]->[_TYPE_];
11192 57 50       160 if ( $type eq '#' ) { $saw_comment = 1; last }
  0         0  
  0         0  
11193 57         110 $is_name = $is_name_type->{$type};
11194 57 100 100     203 next if ( $is_name && $last_is_name );
11195              
11196             # do not count a possible leading - of bareword hash key
11197 48 100 66     141 next if ( $type eq 'm' && !$last_type );
11198              
11199 47         73 $nonblank_count++;
11200 47 100       119 last if ( $nonblank_count > 2 );
11201             }
11202              
11203             # Do not weld across a comment .. fix for c058.
11204 63 50       173 next if ($saw_comment);
11205              
11206             # Patch for b1104: do not weld to a paren preceded by sort/map/grep
11207             # because the special line break rules may cause a blinking state
11208 63 100 100     462 if ( defined($Kn_last_nonblank)
      100        
11209             && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
11210             && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
11211             {
11212 2         12 my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
11213              
11214             # Turn off welding at sort/map/grep (
11215 2 50       9 if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
  0         0  
11216             }
11217              
11218 63         140 my $token_oo = $rLL->[$K_outer_opening]->[_TOKEN_];
11219              
11220 63 50 100     517 if (
      66        
      66        
      100        
      100        
      100        
      66        
      66        
      33        
      100        
11221              
11222             # 1: adjacent opening containers, like: do {{
11223             $nonblank_count == 1
11224              
11225             # 2. anonymous sub + prototype or sig: )->then( sub ($code) {
11226             # ... but it seems best not to stack two structural blocks, like
11227             # this
11228             # sub make_anon_with_my_sub { sub {
11229             # because it probably hides the structure a little too much.
11230             || ( $inner_blocktype
11231             && $inner_blocktype eq 'sub'
11232             && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
11233             && !$outer_blocktype )
11234              
11235             # 3. short item following opening paren, like: fun( yyy (
11236             || $nonblank_count == 2 && $token_oo eq '('
11237              
11238             # 4. weld around fat commas, if requested (git #108), such as
11239             # elf->call_method( method_name_foo => {
11240             || ( $type eq '=>'
11241             && $nonblank_count <= 3
11242             && %weld_fat_comma_rules
11243             && $weld_fat_comma_rules{$token_oo} )
11244             )
11245             {
11246 57         164 push @nested_pairs,
11247             [ $inner_seqno, $outer_seqno, $K_inner_closing ];
11248             }
11249 63         159 next;
11250             }
11251              
11252             # The weld routine expects the pairs in order in the form
11253             # [$seqno_inner, $seqno_outer]
11254             # And they must be in the same order as the inner closing tokens
11255             # (otherwise, welds of three or more adjacent tokens will not work). The K
11256             # value of this inner closing token has temporarily been stored for
11257             # sorting.
11258             @nested_pairs =
11259              
11260             # Drop the K index after sorting (it would cause trouble downstream)
11261 57         224 map { [ $_->[0], $_->[1] ] }
11262              
11263             # Sort on the K values
11264 23         254 sort { $a->[2] <=> $b->[2] } @nested_pairs;
  52         170  
11265              
11266 23         128 return \@nested_pairs;
11267             } ## end sub find_nested_pairs
11268              
11269             sub match_paren_control_flag {
11270              
11271             # Decide if this paren is excluded by user request:
11272             # undef matches no parens
11273             # '*' matches all parens
11274             # 'k' matches only if the previous nonblank token is a perl builtin
11275             # keyword (such as 'if', 'while'),
11276             # 'K' matches if 'k' does not, meaning if the previous token is not a
11277             # keyword.
11278             # 'f' matches if the previous token is a function other than a keyword.
11279             # 'F' matches if 'f' does not.
11280             # 'w' matches if either 'k' or 'f' match.
11281             # 'W' matches if 'w' does not.
11282 6     6 0 18 my ( $self, $seqno, $flag, $rLL ) = @_;
11283              
11284             # Input parameters:
11285             # $seqno = sequence number of the container (should be paren)
11286             # $flag = the flag which defines what matches
11287             # $rLL = an optional alternate token list needed for respace operations
11288 6 50       24 $rLL = $self->[_rLL_] unless ( defined($rLL) );
11289              
11290 6 50       19 return 0 unless ( defined($flag) );
11291 6 50       19 return 0 if $flag eq '0';
11292 6 50       19 return 1 if $flag eq '1';
11293 6 50       16 return 1 if $flag eq '*';
11294 6 50       17 return 0 unless ($seqno);
11295 6         14 my $K_opening = $self->[_K_opening_container_]->{$seqno};
11296 6 50       18 return unless ( defined($K_opening) );
11297              
11298 6         11 my ( $is_f, $is_k, $is_w );
11299 6         14 my $Kp = $self->K_previous_nonblank( $K_opening, $rLL );
11300 6 50       17 if ( defined($Kp) ) {
11301 6         19 my $type_p = $rLL->[$Kp]->[_TYPE_];
11302              
11303             # keyword?
11304 6         17 $is_k = $type_p eq 'k';
11305              
11306             # function call?
11307 6         17 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
11308              
11309             # either keyword or function call?
11310 6   100     21 $is_w = $is_k || $is_f;
11311             }
11312 6         11 my $match;
11313 6 50       24 if ( $flag eq 'k' ) { $match = $is_k }
  0 50       0  
    0          
    0          
    0          
    0          
11314 6         14 elsif ( $flag eq 'K' ) { $match = !$is_k }
11315 0         0 elsif ( $flag eq 'f' ) { $match = $is_f }
11316 0         0 elsif ( $flag eq 'F' ) { $match = !$is_f }
11317 0         0 elsif ( $flag eq 'w' ) { $match = $is_w }
11318 0         0 elsif ( $flag eq 'W' ) { $match = !$is_w }
11319 6         29 return $match;
11320             } ## end sub match_paren_control_flag
11321              
11322             sub is_excluded_weld {
11323              
11324             # decide if this weld is excluded by user request
11325 35     35 0 78 my ( $self, $KK, $is_leading ) = @_;
11326 35         59 my $rLL = $self->[_rLL_];
11327 35         56 my $rtoken_vars = $rLL->[$KK];
11328 35         61 my $token = $rtoken_vars->[_TOKEN_];
11329 35         59 my $rflags = $weld_nested_exclusion_rules{$token};
11330 35 100       120 return 0 unless ( defined($rflags) );
11331 14 100       46 my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
11332 14 100       55 return 0 unless ( defined($flag) );
11333 10 100       36 return 1 if $flag eq '*';
11334 6         12 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
11335 6         21 return $self->match_paren_control_flag( $seqno, $flag );
11336             } ## end sub is_excluded_weld
11337              
11338             # hashes to simplify welding logic
11339             my %type_ok_after_bareword;
11340             my %has_tight_paren;
11341              
11342             BEGIN {
11343              
11344             # types needed for welding RULE 6
11345 38     38   317 my @q = qw# => -> { ( [ #;
11346 38         247 @type_ok_after_bareword{@q} = (1) x scalar(@q);
11347              
11348             # these types do not 'like' to be separated from a following paren
11349 38         159 @q = qw(w i q Q G C Z U);
11350 38         1398 @{has_tight_paren}{@q} = (1) x scalar(@q);
11351             } ## end BEGIN
11352              
11353 38     38   425 use constant DEBUG_WELD => 0;
  38         2651  
  38         252983  
11354              
11355             sub setup_new_weld_measurements {
11356              
11357             # Define quantities to check for excess line lengths when welded.
11358             # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
11359              
11360 55     55 0 134 my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
11361              
11362             # Given indexes of outer and inner opening containers to be welded:
11363             # $Kouter_opening, $Kinner_opening
11364              
11365             # Returns these variables:
11366             # $new_weld_ok = true (new weld ok) or false (do not start new weld)
11367             # $starting_indent = starting indentation
11368             # $starting_lentot = starting cumulative length
11369             # $msg = diagnostic message for debugging
11370              
11371 55         110 my $rLL = $self->[_rLL_];
11372 55         105 my $rlines = $self->[_rlines_];
11373              
11374 55         198 my $starting_level;
11375             my $starting_ci;
11376 55         0 my $starting_lentot;
11377 55         0 my $maximum_text_length;
11378 55         97 my $msg = EMPTY_STRING;
11379              
11380 55         112 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
11381 55         115 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
11382 55         116 my ( $Kfirst, $Klast ) = @{$rK_range};
  55         141  
11383              
11384             #-------------------------------------------------------------------------
11385             # We now define a reference index, '$Kref', from which to start measuring
11386             # This choice turns out to be critical for keeping welds stable during
11387             # iterations, so we go through a number of STEPS...
11388             #-------------------------------------------------------------------------
11389              
11390             # STEP 1: Our starting guess is to use measure from the first token of the
11391             # current line. This is usually a good guess.
11392 55         104 my $Kref = $Kfirst;
11393              
11394             # STEP 2: See if we should go back a little farther
11395 55         174 my $Kprev = $self->K_previous_nonblank($Kfirst);
11396 55 100       236 if ( defined($Kprev) ) {
11397              
11398             # Avoid measuring from between an opening paren and a previous token
11399             # which should stay close to it ... fixes b1185
11400 46         120 my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
11401 46         114 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
11402 46 100 100     390 if ( $Kouter_opening == $Kfirst
    50 66        
      33        
11403             && $token_oo eq '('
11404             && $has_tight_paren{$type_prev} )
11405             {
11406 1         4 $Kref = $Kprev;
11407             }
11408              
11409             # Back up and count length from a token like '=' or '=>' if -lp
11410             # is used (this fixes b520)
11411             # ...or if a break is wanted before there
11412             elsif ($rOpts_line_up_parentheses
11413             || $want_break_before{$type_prev} )
11414             {
11415              
11416             # If there are other sequence items between the start of this line
11417             # and the opening token in question, then do not include tokens on
11418             # the previous line in length calculations. This check added to
11419             # fix case b1174 which had a '?' on the line
11420 0   0     0 my $no_previous_seq_item = $Kref == $Kouter_opening
11421             || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
11422              
11423 0 0 0     0 if ( $no_previous_seq_item
11424             && substr( $type_prev, 0, 1 ) eq '=' )
11425             {
11426 0         0 $Kref = $Kprev;
11427              
11428             # Fix for b1144 and b1112: backup to the first nonblank
11429             # character before the =>, or to the start of its line.
11430 0 0       0 if ( $type_prev eq '=>' ) {
11431 0         0 my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
11432 0         0 my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range};
11433 0         0 my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev};
  0         0  
11434 0         0 foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) {
11435 0 0       0 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
11436 0         0 $Kref = $KK;
11437 0         0 last;
11438             }
11439             }
11440             }
11441             }
11442             }
11443              
11444             # STEP 3: Now look ahead for a ternary and, if found, use it.
11445             # This fixes case b1182.
11446             # Also look for a ')' at the same level and, if found, use it.
11447             # This fixes case b1224.
11448 55 100       194 if ( $Kref < $Kouter_opening ) {
11449 49         113 my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
11450 49         114 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
11451 49         160 while ( $Knext < $Kouter_opening ) {
11452 14 100       67 if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
11453 8 100 66     70 if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
11454             || $rLL->[$Knext]->[_TOKEN_] eq ')' )
11455             {
11456 4         9 $Kref = $Knext;
11457 4         8 last;
11458             }
11459             }
11460 10         25 $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
11461             }
11462             }
11463              
11464             # Define the starting measurements we will need
11465             $starting_lentot =
11466 55 100       192 $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
11467 55         140 $starting_level = $rLL->[$Kref]->[_LEVEL_];
11468 55         119 $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
11469              
11470 55         137 $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
11471             $starting_ci * $rOpts_continuation_indentation;
11472              
11473             # STEP 4: Switch to using the outer opening token as the reference
11474             # point if a line break before it would make a longer line.
11475             # Fixes case b1055 and is also an alternate fix for b1065.
11476 55         100 my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
11477 55 100       158 if ( $Kref < $Kouter_opening ) {
11478 49         106 my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
11479 49         123 my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
11480 49         105 my $maximum_text_length_oo =
11481             $maximum_text_length_at_level[$starting_level_oo] -
11482             $starting_ci_oo * $rOpts_continuation_indentation;
11483              
11484             # The excess length to any cumulative length K = lenK is either
11485             # $excess = $lenk - ($lentot + $maximum_text_length), or
11486             # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
11487             # so the worst case (maximum excess) corresponds to the configuration
11488             # with minimum value of the sum: $lentot + $maximum_text_length
11489 49 100       152 if ( $lentot_oo + $maximum_text_length_oo <
11490             $starting_lentot + $maximum_text_length )
11491             {
11492 1         2 $Kref = $Kouter_opening;
11493 1         2 $starting_level = $starting_level_oo;
11494 1         2 $starting_ci = $starting_ci_oo;
11495 1         1 $starting_lentot = $lentot_oo;
11496 1         3 $maximum_text_length = $maximum_text_length_oo;
11497             }
11498             }
11499              
11500 55         95 my $new_weld_ok = 1;
11501              
11502             # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
11503             # combination -wn -lp -dws -naws does not work well and can cause blinkers.
11504             # It will probably only occur in stress testing. For this situation we
11505             # will only start a new weld if we start at a 'good' location.
11506             # - Added 'if' to fix case b1032.
11507             # - Require blank before certain previous characters to fix b1111.
11508             # - Add ';' to fix case b1139
11509             # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
11510             # - relaxed constraints for b1227
11511             # - added skip if type is 'q' for b1349 and b1350 b1351 b1352 b1353
11512             # - added skip if type is 'Q' for b1447
11513 55 0 66     211 if ( $starting_ci
      33        
      33        
      0        
      0        
      0        
11514             && $rOpts_line_up_parentheses
11515             && $rOpts_delete_old_whitespace
11516             && !$rOpts_add_whitespace
11517             && $rLL->[$Kinner_opening]->[_TYPE_] ne 'q'
11518             && $rLL->[$Kinner_opening]->[_TYPE_] ne 'Q'
11519             && defined($Kprev) )
11520             {
11521 0         0 my $type_first = $rLL->[$Kfirst]->[_TYPE_];
11522 0         0 my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
11523 0         0 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
11524 0         0 my $type_pp = 'b';
11525 0 0       0 if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
  0         0  
11526 0 0 0     0 unless (
      0        
      0        
      0        
      0        
      0        
      0        
      0        
11527             $type_prev =~ /^[\,\.\;]/
11528             || $type_prev =~ /^[=\{\[\(\L]/
11529             && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' )
11530             || $type_first =~ /^[=\,\.\;\{\[\(\L]/
11531             || $type_first eq '||'
11532             || (
11533             $type_first eq 'k'
11534             && ( $token_first eq 'if'
11535             || $token_first eq 'or' )
11536             )
11537             )
11538             {
11539 0         0 $msg =
11540             "Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
11541 0         0 $new_weld_ok = 0;
11542             }
11543             }
11544 55         222 return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
11545             } ## end sub setup_new_weld_measurements
11546              
11547             sub excess_line_length_for_Krange {
11548 10     10 0 48 my ( $self, $Kfirst, $Klast ) = @_;
11549              
11550             # returns $excess_length =
11551             # by how many characters a line composed of tokens $Kfirst .. $Klast will
11552             # exceed the allowed line length
11553              
11554 10         29 my $rLL = $self->[_rLL_];
11555 10 50       59 my $length_before_Kfirst =
11556             $Kfirst <= 0
11557             ? 0
11558             : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
11559              
11560             # backup before a side comment if necessary
11561 10         24 my $Kend = $Klast;
11562 10 50 33     39 if ( $rOpts_ignore_side_comment_lengths
11563             && $rLL->[$Klast]->[_TYPE_] eq '#' )
11564             {
11565 0         0 my $Kprev = $self->K_previous_nonblank($Klast);
11566 0 0 0     0 if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
  0         0  
11567             }
11568              
11569             # get the length of the text
11570 10         32 my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
11571              
11572             # get the size of the text window
11573 10         27 my $level = $rLL->[$Kfirst]->[_LEVEL_];
11574 10         25 my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
11575 10         30 my $max_text_length = $maximum_text_length_at_level[$level] -
11576             $ci_level * $rOpts_continuation_indentation;
11577              
11578 10         23 my $excess_length = $length - $max_text_length;
11579              
11580 10         18 DEBUG_WELD
11581             && print
11582             "Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
11583 10         39 return ($excess_length);
11584             } ## end sub excess_line_length_for_Krange
11585              
11586             sub weld_nested_containers {
11587 23     23 0 73 my ($self) = @_;
11588              
11589             # Called once per file for option '--weld-nested-containers'
11590              
11591 23         78 my $rK_weld_left = $self->[_rK_weld_left_];
11592 23         67 my $rK_weld_right = $self->[_rK_weld_right_];
11593              
11594             # This routine implements the -wn flag by "welding together"
11595             # the nested closing and opening tokens which were previously
11596             # identified by sub 'find_nested_pairs'. "welding" simply
11597             # involves setting certain hash values which will be checked
11598             # later during formatting.
11599              
11600 23         63 my $rLL = $self->[_rLL_];
11601 23         64 my $rlines = $self->[_rlines_];
11602 23         103 my $K_opening_container = $self->[_K_opening_container_];
11603 23         69 my $K_closing_container = $self->[_K_closing_container_];
11604 23         58 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11605 23         61 my $ris_asub_block = $self->[_ris_asub_block_];
11606 23         61 my $rmax_vertical_tightness = $self->[_rmax_vertical_tightness_];
11607              
11608 23         64 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
11609              
11610             # Find nested pairs of container tokens for any welding.
11611 23         154 my $rnested_pairs = $self->find_nested_pairs();
11612              
11613             # Return unless there are nested pairs to weld
11614 23 100 66     133 return unless defined($rnested_pairs) && @{$rnested_pairs};
  23         111  
11615              
11616             # NOTE: It would be nice to apply RULE 5 right here by deleting unwanted
11617             # pairs. But it isn't clear if this is possible because we don't know
11618             # which sequences might actually start a weld.
11619              
11620             my $rOpts_break_at_old_method_breakpoints =
11621 22         84 $rOpts->{'break-at-old-method-breakpoints'};
11622              
11623             # This array will hold the sequence numbers of the tokens to be welded.
11624 22         82 my @welds;
11625              
11626             # Variables needed for estimating line lengths
11627             my $maximum_text_length; # maximum spaces available for text
11628 22         0 my $starting_lentot; # cumulative text to start of current line
11629              
11630 22         50 my $iline_outer_opening = -1;
11631 22         57 my $weld_count_this_start = 0;
11632 22         51 my $weld_starts_in_block = 0;
11633              
11634             # OLD: $single_line_tol added to fix cases b1180 b1181
11635             # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
11636             # NEW: $single_line_tol=0 fixes b1212; and b1180-1181 work ok now
11637             # =1 for -vmll and -lp; fixes b1452, b1453, b1454
11638             # NOTE: the combination -vmll and -lp can be unstable, especially when
11639             # also combined with -wn. It may eventually be necessary to turn off -vmll
11640             # if -lp is set. For now, this works. The value '1' is a minimum which
11641             # works but can be increased if necessary.
11642 22 50 33     108 my $single_line_tol =
11643             $rOpts_variable_maximum_line_length && $rOpts_line_up_parentheses
11644             ? 1
11645             : 0;
11646              
11647 22         124 my $multiline_tol = $single_line_tol + 1 +
11648             max( $rOpts_indent_columns, $rOpts_continuation_indentation );
11649              
11650             # Define a welding cutoff level: do not start a weld if the inside
11651             # container level equals or exceeds this level.
11652              
11653             # We use the minimum of two criteria, either of which may be more
11654             # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
11655             # the 'beta' value is more restrictive in other cases (b1243).
11656             # Reduced beta term from beta+3 to beta+2 to fix b1401. Previously:
11657             # my $weld_cutoff_level = min($stress_level_alpha, $stress_level_beta + 2);
11658             # This is now '$high_stress_level'.
11659              
11660             # The vertical tightness flags can throw off line length calculations.
11661             # This patch was added to fix instability issue b1284.
11662             # It works to always use a tol of 1 for 1 line block length tests, but
11663             # this restricted value keeps test case wn6.wn working as before.
11664             # It may be necessary to include '[' and '{' here in the future.
11665 22 50       153 my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
11666              
11667             # Abbreviations:
11668             # _oo=outer opening, i.e. first of { {
11669             # _io=inner opening, i.e. second of { {
11670             # _oc=outer closing, i.e. second of } {
11671             # _ic=inner closing, i.e. first of } }
11672              
11673 22         53 my $previous_pair;
11674              
11675             # Main loop over nested pairs...
11676             # We are working from outermost to innermost pairs so that
11677             # level changes will be complete when we arrive at the inner pairs.
11678 22         74 while ( my $item = pop( @{$rnested_pairs} ) ) {
  79         310  
11679 57         95 my ( $inner_seqno, $outer_seqno ) = @{$item};
  57         159  
11680              
11681 57         121 my $Kouter_opening = $K_opening_container->{$outer_seqno};
11682 57         112 my $Kinner_opening = $K_opening_container->{$inner_seqno};
11683 57         120 my $Kouter_closing = $K_closing_container->{$outer_seqno};
11684 57         122 my $Kinner_closing = $K_closing_container->{$inner_seqno};
11685              
11686             # RULE: do not weld if inner container has <= 3 tokens unless the next
11687             # token is a heredoc (so we know there will be multiple lines)
11688 57 100       186 if ( $Kinner_closing - $Kinner_opening <= 4 ) {
11689 4         22 my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
11690 4 50       31 next unless defined($Knext_nonblank);
11691 4         19 my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
11692 4 50       26 next unless ( $type eq 'h' );
11693             }
11694              
11695 53         123 my $outer_opening = $rLL->[$Kouter_opening];
11696 53         95 my $inner_opening = $rLL->[$Kinner_opening];
11697 53         111 my $outer_closing = $rLL->[$Kouter_closing];
11698 53         99 my $inner_closing = $rLL->[$Kinner_closing];
11699              
11700             # RULE: do not weld to a hash brace. The reason is that it has a very
11701             # strong bond strength to the next token, so a line break after it
11702             # may not work. Previously we allowed welding to something like @{
11703             # but that caused blinking states (cases b751, b779).
11704 53 100       159 if ( $inner_opening->[_TYPE_] eq 'L' ) {
11705 1         3 next;
11706             }
11707              
11708             # RULE: do not weld to a square bracket which does not contain commas
11709 52 50       169 if ( $inner_opening->[_TYPE_] eq '[' ) {
11710 0         0 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
11711 0 0 0     0 next unless ( $rtype_count && $rtype_count->{','} );
11712              
11713             # Do not weld if there is text before a '[' such as here:
11714             # curr_opt ( @beg [2,5] )
11715             # It will not break into the desired sandwich structure.
11716             # This fixes case b109, 110.
11717 0         0 my $Kdiff = $Kinner_opening - $Kouter_opening;
11718 0 0       0 next if ( $Kdiff > 2 );
11719             next
11720 0 0 0     0 if ( $Kdiff == 2
11721             && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
11722              
11723             }
11724              
11725             # RULE: Avoid welding under stress. The idea is that we need to have a
11726             # little space* within a welded container to avoid instability. Note
11727             # that after each weld the level values are reduced, so long multiple
11728             # welds can still be made. This rule will seldom be a limiting factor
11729             # in actual working code. Fixes b1206, b1243.
11730 52         112 my $inner_level = $inner_opening->[_LEVEL_];
11731 52 50       153 if ( $inner_level >= $high_stress_level ) { next }
  0         0  
11732              
11733             # Set flag saying if this pair starts a new weld
11734 52   100     296 my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
11735              
11736             # Set flag saying if this pair is adjacent to the previous nesting pair
11737             # (even if previous pair was rejected as a weld)
11738 52   100     211 my $touch_previous_pair =
11739             defined($previous_pair) && $outer_seqno == $previous_pair->[0];
11740 52         101 $previous_pair = $item;
11741              
11742 52         100 my $do_not_weld_rule = 0;
11743 52         98 my $Msg = EMPTY_STRING;
11744 52         83 my $is_one_line_weld;
11745              
11746 52         111 my $iline_oo = $outer_opening->[_LINE_INDEX_];
11747 52         103 my $iline_io = $inner_opening->[_LINE_INDEX_];
11748 52         93 my $iline_ic = $inner_closing->[_LINE_INDEX_];
11749 52         101 my $iline_oc = $outer_closing->[_LINE_INDEX_];
11750 52         126 my $token_oo = $outer_opening->[_TOKEN_];
11751 52         109 my $token_io = $inner_opening->[_TOKEN_];
11752              
11753             # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
11754             # Added for case b973. Moved here from below to fix b1423.
11755 52 50 66     286 if ( !$do_not_weld_rule
      66        
11756             && $rOpts_break_at_old_method_breakpoints
11757             && $iline_io > $iline_oo )
11758             {
11759              
11760 0         0 foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
11761 0         0 my $rK_range = $rlines->[$iline]->{_rK_range};
11762 0 0       0 next unless defined($rK_range);
11763 0         0 my ( $Kfirst, $Klast ) = @{$rK_range};
  0         0  
11764 0 0       0 next unless defined($Kfirst);
11765 0 0       0 if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
11766 0         0 $do_not_weld_rule = 7;
11767 0         0 last;
11768             }
11769             }
11770             }
11771 52 50       146 next if ($do_not_weld_rule);
11772              
11773             # Turn off vertical tightness at possible one-line welds. Fixes b1402,
11774             # b1419, b1421, b1424, b1425. This also fixes issues b1338, b1339,
11775             # b1340, b1341, b1342, b1343, which previously used a separate fix.
11776             # Issue c161 is the latest and simplest check, using
11777             # $iline_ic==$iline_io as the test.
11778 52 50 66     271 if ( %opening_vertical_tightness
      66        
11779             && $iline_ic == $iline_io
11780             && $opening_vertical_tightness{$token_oo} )
11781             {
11782 0         0 $rmax_vertical_tightness->{$outer_seqno} = 0;
11783             }
11784              
11785 52   100     271 my $is_multiline_weld =
11786             $iline_oo == $iline_io
11787             && $iline_ic == $iline_oc
11788             && $iline_io != $iline_ic;
11789              
11790 52         91 if (DEBUG_WELD) {
11791             my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
11792             my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
11793             $Msg .= <<EOM;
11794             Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
11795             Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
11796             tokens '$token_oo' .. '$token_io'
11797             EOM
11798             }
11799              
11800             # DO-NOT-WELD RULE 0:
11801             # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
11802             # by one line). This can produce instabilities (fixes b1250 b1251
11803             # 1256).
11804 52 0 66     270 if ( !$is_multiline_weld
      33        
      33        
11805             && $iline_ic == $iline_io + 1
11806             && $token_oo eq '('
11807             && $token_io eq '(' )
11808             {
11809 0         0 if (DEBUG_WELD) {
11810             $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
11811             print $Msg;
11812             }
11813 0         0 next;
11814             }
11815              
11816             # If this pair is not adjacent to the previous pair (skipped or not),
11817             # then measure lengths from the start of line of oo.
11818 52 100 33     238 if (
      66        
11819             !$touch_previous_pair
11820              
11821             # Also do this if restarting at a new line; fixes case b965, s001
11822             || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
11823             )
11824             {
11825              
11826             # Remember the line we are using as a reference
11827 48         91 $iline_outer_opening = $iline_oo;
11828 48         93 $weld_count_this_start = 0;
11829 48         103 $weld_starts_in_block = 0;
11830              
11831 48         224 ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
11832             = $self->setup_new_weld_measurements( $Kouter_opening,
11833             $Kinner_opening );
11834              
11835 48 0 0     170 if (
      33        
11836             !$new_weld_ok
11837             && ( $iline_oo != $iline_io
11838             || $iline_ic != $iline_oc )
11839             )
11840             {
11841 0         0 if (DEBUG_WELD) { print $msg}
11842 0         0 next;
11843             }
11844              
11845 48         121 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
11846 48         88 my ( $Kfirst, $Klast ) = @{$rK_range};
  48         159  
11847              
11848             # An existing one-line weld is a line in which
11849             # (1) the containers are all on one line, and
11850             # (2) the line does not exceed the allowable length
11851 48 100       165 if ( $iline_oo == $iline_oc ) {
11852              
11853             # All the tokens are on one line, now check their length.
11854             # Start with the full line index range. We will reduce this
11855             # in the coding below in some cases.
11856 4         14 my $Kstart = $Kfirst;
11857 4         12 my $Kstop = $Klast;
11858              
11859             # Note that the following minimal choice for measuring will
11860             # work and will not cause any instabilities because it is
11861             # invariant:
11862              
11863             ## my $Kstart = $Kouter_opening;
11864             ## my $Kstop = $Kouter_closing;
11865              
11866             # But that can lead to some undesirable welds. So a little
11867             # more complicated method has been developed.
11868              
11869             # We are trying to avoid creating bad two-line welds when we are
11870             # working on long, previously un-welded input text, such as
11871              
11872             # INPUT (example of a long input line weld candidate):
11873             ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
11874              
11875             # GOOD two-line break: (not welded; result marked too long):
11876             ## $mutation->transpos(
11877             ## $self->RNA->position($mutation->label, $atg_label));
11878              
11879             # BAD two-line break: (welded; result if we weld):
11880             ## $mutation->transpos($self->RNA->position(
11881             ## $mutation->label, $atg_label));
11882              
11883             # We can only get an approximate estimate of the final length,
11884             # since the line breaks may change, and for -lp mode because
11885             # even the indentation is not yet known.
11886              
11887 4         14 my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
11888 4         22 my $level_last = $rLL->[$Klast]->[_LEVEL_];
11889 4         14 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
11890 4         12 my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
11891              
11892             # - measure to the end of the original line if balanced
11893             # - measure to the closing container if unbalanced (fixes b1230)
11894             #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
11895 4 100       20 if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
  1         8  
11896              
11897             # - measure from the start of the original line if balanced
11898             # - measure from the most previous token with same level
11899             # if unbalanced (b1232)
11900 4 100 100     34 if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
11901 1         4 $Kstart = $Kouter_opening;
11902              
11903 1         5 foreach
11904             my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) )
11905             {
11906 1 50       12 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
11907 1 50       6 last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
11908 0         0 $Kstart = $KK;
11909             }
11910             }
11911              
11912 4         20 my $excess =
11913             $self->excess_line_length_for_Krange( $Kstart, $Kstop );
11914              
11915             # Coding simplified here for case b1219.
11916             # Increased tol from 0 to 1 when pvt>0 to fix b1284.
11917 4         14 $is_one_line_weld = $excess <= $one_line_tol;
11918             }
11919              
11920             # DO-NOT-WELD RULE 1:
11921             # Do not weld something that looks like the start of a two-line
11922             # function call, like this: <<snippets/wn6.in>>
11923             # $trans->add_transformation(
11924             # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
11925             # We will look for a semicolon after the closing paren.
11926              
11927             # We want to weld something complex, like this though
11928             # my $compass = uc( opposite_direction( line_to_canvas_direction(
11929             # @{ $coords[0] }, @{ $coords[1] } ) ) );
11930             # Otherwise we will get a 'blinker'. For example, the following
11931             # would become a blinker without this rule:
11932             # $Self->_Add( $SortOrderDisplay{ $Field
11933             # ->GenerateFieldForSelectSQL() } );
11934             # But it is okay to weld a two-line statement if it looks like
11935             # it was already welded, meaning that the two opening containers are
11936             # on a different line that the two closing containers. This is
11937             # necessary to prevent blinking of something like this with
11938             # perltidy -wn -pbp (starting indentation two levels deep):
11939              
11940             # $top_label->set_text( gettext(
11941             # "Unable to create personal directory - check permissions.") );
11942 48 100 100     212 if ( $iline_oc == $iline_oo + 1
      66        
11943             && $iline_io == $iline_ic
11944             && $token_oo eq '(' )
11945             {
11946              
11947             # Look for following semicolon...
11948 1         6 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
11949 1 50       6 my $next_nonblank_type =
11950             defined($Knext_nonblank)
11951             ? $rLL->[$Knext_nonblank]->[_TYPE_]
11952             : 'b';
11953 1 50       5 if ( $next_nonblank_type eq ';' ) {
11954              
11955             # Then do not weld if no other containers between inner
11956             # opening and closing.
11957 1         3 my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
11958 1 50       11 if ( $Knext_seq_item == $Kinner_closing ) {
11959 0         0 $do_not_weld_rule = 1;
11960             }
11961             }
11962             }
11963             } ## end starting new weld sequence
11964              
11965             else {
11966              
11967             # set the 1-line flag if continuing a weld sequence; fixes b1239
11968 4         13 $is_one_line_weld = ( $iline_oo == $iline_oc );
11969             }
11970              
11971             # DO-NOT-WELD RULE 2:
11972             # Do not weld an opening paren to an inner one line brace block
11973             # We will just use old line numbers for this test and require
11974             # iterations if necessary for convergence
11975              
11976             # For example, otherwise we could cause the opening paren
11977             # in the following example to separate from the caller name
11978             # as here:
11979              
11980             # $_[0]->code_handler
11981             # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
11982              
11983             # Here is another example where we do not want to weld:
11984             # $wrapped->add_around_modifier(
11985             # sub { push @tracelog => 'around 1'; $_[0]->(); } );
11986              
11987             # If the one line sub block gets broken due to length or by the
11988             # user, then we can weld. The result will then be:
11989             # $wrapped->add_around_modifier( sub {
11990             # push @tracelog => 'around 1';
11991             # $_[0]->();
11992             # } );
11993              
11994             # Updated to fix cases b1082 b1102 b1106 b1115:
11995             # Also, do not weld to an intact inner block if the outer opening token
11996             # is on a different line. For example, this prevents oscillation
11997             # between these two states in case b1106:
11998              
11999             # return map{
12000             # ($_,[$self->$_(@_[1..$#_])])
12001             # }@every;
12002              
12003             # return map { (
12004             # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
12005             # ) } @every;
12006              
12007             # The effect of this change on typical code is very minimal. Sometimes
12008             # it may take a second iteration to converge, but this gives protection
12009             # against blinking.
12010 52 100 66     387 if ( !$do_not_weld_rule
      100        
12011             && !$is_one_line_weld
12012             && $iline_ic == $iline_io )
12013             {
12014 6 50 66     38 $do_not_weld_rule = 2
12015             if ( $token_oo eq '(' || $iline_oo != $iline_io );
12016             }
12017              
12018             # DO-NOT-WELD RULE 2A:
12019             # Do not weld an opening asub brace in -lp mode if -asbl is set. This
12020             # helps avoid instabilities in one-line block formation, and fixes
12021             # b1241. Previously, the '$is_one_line_weld' flag was tested here
12022             # instead of -asbl, and this fixed most cases. But it turns out that
12023             # the real problem was the -asbl flag, and switching to this was
12024             # necessary to fixe b1268. This also fixes b1269, b1277, b1278.
12025 52 0 66     244 if ( !$do_not_weld_rule
      33        
      33        
12026             && $rOpts_line_up_parentheses
12027             && $rOpts_asbl
12028             && $ris_asub_block->{$outer_seqno} )
12029             {
12030 0         0 $do_not_weld_rule = '2A';
12031             }
12032              
12033             # DO-NOT-WELD RULE 3:
12034             # Do not weld if this makes our line too long.
12035             # Use a tolerance which depends on if the old tokens were welded
12036             # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
12037 52 100       140 if ( !$do_not_weld_rule ) {
12038              
12039             # Measure to a little beyond the inner opening token if it is
12040             # followed by a bare word, which may have unusual line break rules.
12041              
12042             # NOTE: Originally this was OLD RULE 6: do not weld to a container
12043             # which is followed on the same line by an unknown bareword token.
12044             # This can cause blinkers (cases b626, b611). But OK to weld one
12045             # line welds to fix cases b1057 b1064. For generality, OLD RULE 6
12046             # has been merged into RULE 3 here to also fix cases b1078 b1091.
12047              
12048 46         85 my $K_for_length = $Kinner_opening;
12049 46         157 my $Knext_io = $self->K_next_nonblank($Kinner_opening);
12050 46 50       171 next unless ( defined($Knext_io) ); # shouldn't happen
12051 46         131 my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
12052              
12053             # Note: may need to eventually also include other types here,
12054             # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
12055 46 100       149 if ( $type_io_next eq 'w' ) {
12056 7         28 my $Knext_io2 = $self->K_next_nonblank($Knext_io);
12057 7 50       38 next unless ( defined($Knext_io2) );
12058 7         23 my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
12059 7 50       32 if ( !$type_ok_after_bareword{$type_io_next2} ) {
12060 0         0 $K_for_length = $Knext_io2;
12061             }
12062             }
12063              
12064             # Use a tolerance for welds over multiple lines to avoid blinkers.
12065             # We can use zero tolerance if it looks like we are working on an
12066             # existing weld.
12067 46 100 100     221 my $tol =
12068             $is_one_line_weld || $is_multiline_weld
12069             ? $single_line_tol
12070             : $multiline_tol;
12071              
12072             # By how many characters does this exceed the text window?
12073 46         153 my $excess =
12074             $self->cumulative_length_before_K($K_for_length) -
12075             $starting_lentot + 1 + $tol -
12076             $maximum_text_length;
12077              
12078             # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
12079             # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
12080             # Revised patch: New tolerance definition allows going back to '> 0'
12081             # here. This fixes case b1124. See also cases b1087 and b1087a.
12082 46 50       140 if ( $excess > 0 ) { $do_not_weld_rule = 3 }
  0         0  
12083              
12084 46         76 if (DEBUG_WELD) {
12085             $Msg .=
12086             "RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
12087             }
12088             }
12089              
12090             # DO-NOT-WELD RULE 4; implemented for git#10:
12091             # Do not weld an opening -ce brace if the next container is on a single
12092             # line, different from the opening brace. (This is very rare). For
12093             # example, given the following with -ce, we will avoid joining the {
12094             # and [
12095              
12096             # } else {
12097             # [ $_, length($_) ]
12098             # }
12099              
12100             # because this would produce a terminal one-line block:
12101              
12102             # } else { [ $_, length($_) ] }
12103              
12104             # which may not be what is desired. But given this input:
12105              
12106             # } else { [ $_, length($_) ] }
12107              
12108             # then we will do the weld and retain the one-line block
12109 52 100 100     284 if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
12110 2         6 my $block_type = $rblock_type_of_seqno->{$outer_seqno};
12111 2 100 66     10 if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
12112 1         3 my $io_line = $inner_opening->[_LINE_INDEX_];
12113 1         1 my $ic_line = $inner_closing->[_LINE_INDEX_];
12114 1         2 my $oo_line = $outer_opening->[_LINE_INDEX_];
12115 1 50 33     81 if ( $oo_line < $io_line && $ic_line == $io_line ) {
12116 0         0 $do_not_weld_rule = 4;
12117             }
12118             }
12119             }
12120              
12121             # DO-NOT-WELD RULE 5: do not include welds excluded by user
12122 52 100 100     265 if (
      100        
      100        
12123             !$do_not_weld_rule
12124             && %weld_nested_exclusion_rules
12125             && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
12126             || $self->is_excluded_weld( $Kinner_opening, 0 ) )
12127             )
12128             {
12129 6         12 $do_not_weld_rule = 5;
12130             }
12131              
12132             # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
12133              
12134 52 100       212 if ($do_not_weld_rule) {
    100          
12135              
12136             # After neglecting a pair, we start measuring from start of point
12137             # io ... but not if previous type does not like to be separated
12138             # from its container (fixes case b1184)
12139 12         43 my $Kprev = $self->K_previous_nonblank($Kinner_opening);
12140 12 50       66 my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
12141 12 100       52 if ( !$has_tight_paren{$type_prev} ) {
12142 11         25 my $starting_level = $inner_opening->[_LEVEL_];
12143 11         26 my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
12144 11         37 $starting_lentot =
12145             $self->cumulative_length_before_K($Kinner_opening);
12146 11         37 $maximum_text_length =
12147             $maximum_text_length_at_level[$starting_level] -
12148             $starting_ci_level * $rOpts_continuation_indentation;
12149             }
12150              
12151 12         23 if (DEBUG_WELD) {
12152             $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
12153             print $Msg;
12154             }
12155              
12156             # Normally, a broken pair should not decrease indentation of
12157             # intermediate tokens:
12158             ## if ( $last_pair_broken ) { next }
12159             # However, for long strings of welded tokens, such as '{{{{{{...'
12160             # we will allow broken pairs to also remove indentation.
12161             # This will keep very long strings of opening and closing
12162             # braces from marching off to the right. We will do this if the
12163             # number of tokens in a weld before the broken weld is 4 or more.
12164             # This rule will mainly be needed for test scripts, since typical
12165             # welds have fewer than about 4 welded tokens.
12166 12 50 66     63 if ( !@welds || @{ $welds[-1] } < 4 ) { next }
  7         26  
  12         37  
12167             }
12168              
12169             # otherwise start new weld ...
12170             elsif ($starting_new_weld) {
12171 36         67 $weld_count_this_start++;
12172 36         61 if (DEBUG_WELD) {
12173             $Msg .= "Starting new weld\n";
12174             print $Msg;
12175             }
12176 36         87 push @welds, $item;
12177              
12178 36         140 my $parent_seqno = $self->parent_seqno_by_K($Kouter_closing);
12179             $weld_starts_in_block = $parent_seqno == SEQ_ROOT
12180 36   100     164 || $rblock_type_of_seqno->{$parent_seqno};
12181              
12182 36         144 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
12183 36         121 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
12184              
12185 36         129 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
12186 36         161 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
12187             }
12188              
12189             # ... or extend current weld
12190             else {
12191 4         13 $weld_count_this_start++;
12192 4         5 if (DEBUG_WELD) {
12193             $Msg .= "Extending current weld\n";
12194             print $Msg;
12195             }
12196 4         11 unshift @{ $welds[-1] }, $inner_seqno;
  4         19  
12197 4         13 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
12198 4         17 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
12199              
12200 4         12 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
12201 4         10 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
12202              
12203             # Keep a broken container broken at multiple welds. This might
12204             # also be useful for simple welds, but for now it is restricted
12205             # to multiple welds to minimize changes to existing coding. This
12206             # fixes b1429, b1430. Updated for issue c198: but allow a
12207             # line differences of 1 (simple shear) so that a simple shear
12208             # can remain or become a single line.
12209 4 100       21 if ( $iline_ic - $iline_io > 1 ) {
12210              
12211             # Only set this break if it is the last possible weld in this
12212             # chain. This will keep some extreme test cases unchanged.
12213 3   100     6 my $is_chain_end = !@{$rnested_pairs}
12214             || $rnested_pairs->[-1]->[1] != $inner_seqno;
12215 3 100       12 if ($is_chain_end) {
12216 2         7 $self->[_rbreak_container_]->{$inner_seqno} = 1;
12217             }
12218             }
12219             }
12220              
12221             # After welding, reduce the indentation level if all intermediate tokens
12222 40         109 my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
12223 40 50       137 if ( $dlevel != 0 ) {
12224 40         87 my $Kstart = $Kinner_opening;
12225 40         89 my $Kstop = $Kinner_closing;
12226 40         138 foreach my $KK ( $Kstart .. $Kstop ) {
12227 1143         1769 $rLL->[$KK]->[_LEVEL_] += $dlevel;
12228             }
12229              
12230             # Copy opening ci level to help break at = for -lp mode (case b1124)
12231 40         182 $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
12232             $rLL->[$Kouter_opening]->[_CI_LEVEL_];
12233              
12234             # But only copy the closing ci level if the outer container is
12235             # in a block; otherwise poor results can be produced.
12236 40 100       170 if ($weld_starts_in_block) {
12237 39         132 $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
12238             $rLL->[$Kouter_closing]->[_CI_LEVEL_];
12239             }
12240             }
12241             }
12242              
12243 22         113 return;
12244             } ## end sub weld_nested_containers
12245              
12246             sub weld_nested_quotes {
12247              
12248             # Called once per file for option '--weld-nested-containers'. This
12249             # does welding on qw quotes.
12250              
12251 23     23 0 65 my $self = shift;
12252              
12253             # See if quotes are excluded from welding
12254 23         75 my $rflags = $weld_nested_exclusion_rules{'q'};
12255 23 100 66     114 return if ( defined($rflags) && defined( $rflags->[1] ) );
12256              
12257 22         64 my $rK_weld_left = $self->[_rK_weld_left_];
12258 22         68 my $rK_weld_right = $self->[_rK_weld_right_];
12259              
12260 22         62 my $rLL = $self->[_rLL_];
12261 22 50 33     100 return unless ( defined($rLL) && @{$rLL} );
  22         90  
12262 22         56 my $Num = @{$rLL};
  22         55  
12263              
12264 22         50 my $K_opening_container = $self->[_K_opening_container_];
12265 22         52 my $K_closing_container = $self->[_K_closing_container_];
12266 22         64 my $rlines = $self->[_rlines_];
12267              
12268 22         51 my $starting_lentot;
12269             my $maximum_text_length;
12270              
12271             my $is_single_quote = sub {
12272 7     7   23 my ( $Kbeg, $Kend, $quote_type ) = @_;
12273 7         28 foreach my $K ( $Kbeg .. $Kend ) {
12274 71         119 my $test_type = $rLL->[$K]->[_TYPE_];
12275 71 100       171 next if ( $test_type eq 'b' );
12276 32 50       72 return if ( $test_type ne $quote_type );
12277             }
12278 7         37 return 1;
12279 22         175 };
12280              
12281             # Length tolerance - same as previously used for sub weld_nested
12282 22         109 my $multiline_tol =
12283             1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
12284              
12285             # look for single qw quotes nested in containers
12286 22         69 my $KNEXT = $self->[_K_first_seq_item_];
12287 22         115 while ( defined($KNEXT) ) {
12288 468         624 my $KK = $KNEXT;
12289 468         672 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
12290 468         596 my $rtoken_vars = $rLL->[$KK];
12291 468         632 my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
12292 468 50       783 if ( !$outer_seqno ) {
12293 0 0       0 next if ( $KK == 0 ); # first token in file may not be container
12294              
12295             # A fault here implies that an error was made in the little loop at
12296             # the bottom of sub 'respace_tokens' which set the values of
12297             # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
12298             # loop control lines above.
12299 0         0 Fault("sequence = $outer_seqno not defined at K=$KK")
12300             if (DEVEL_MODE);
12301 0         0 next;
12302             }
12303              
12304 468         644 my $token = $rtoken_vars->[_TOKEN_];
12305 468 100       1064 if ( $is_opening_token{$token} ) {
12306              
12307             # see if the next token is a quote of some type
12308 230         348 my $Kn = $KK + 1;
12309 230 100 66     758 $Kn += 1
12310             if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
12311 230 50       442 next unless ( $Kn < $Num );
12312              
12313 230         395 my $next_token = $rLL->[$Kn]->[_TOKEN_];
12314 230         339 my $next_type = $rLL->[$Kn]->[_TYPE_];
12315             next
12316 230 100 100     1048 unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
      100        
12317             && substr( $next_token, 0, 1 ) eq 'q' );
12318              
12319             # The token before the closing container must also be a quote
12320 7         40 my $Kouter_closing = $K_closing_container->{$outer_seqno};
12321 7         27 my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
12322 7 50       53 next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
12323              
12324             # This is an inner opening container
12325 7         20 my $Kinner_opening = $Kn;
12326              
12327             # Do not weld to single-line quotes. Nothing is gained, and it may
12328             # look bad.
12329 7 50       31 next if ( $Kinner_closing == $Kinner_opening );
12330              
12331             # Only weld to quotes delimited with container tokens. This is
12332             # because welding to arbitrary quote delimiters can produce code
12333             # which is less readable than without welding.
12334 7         27 my $closing_delimiter =
12335             substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
12336             next
12337 7 50 33     35 unless ( $is_closing_token{$closing_delimiter}
12338             || $closing_delimiter eq '>' );
12339              
12340             # Now make sure that there is just a single quote in the container
12341             next
12342             unless (
12343 7 50       32 $is_single_quote->(
12344             $Kinner_opening + 1,
12345             $Kinner_closing - 1,
12346             $next_type
12347             )
12348             );
12349              
12350             # OK: This is a candidate for welding
12351 7         32 my $Msg = EMPTY_STRING;
12352 7         46 my $do_not_weld;
12353              
12354 7         21 my $Kouter_opening = $K_opening_container->{$outer_seqno};
12355 7         22 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
12356 7         18 my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
12357 7         20 my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
12358 7         20 my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
12359 7   66     43 my $is_old_weld =
12360             ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
12361              
12362             # Fix for case b1189. If quote is marked as type 'Q' then only weld
12363             # if the two closing tokens are on the same input line. Otherwise,
12364             # the closing line will be output earlier in the pipeline than
12365             # other CODE lines and welding will not actually occur. This will
12366             # leave a half-welded structure with potential formatting
12367             # instability. This might be fixed by adding a check for a weld on
12368             # a closing Q token and sending it down the normal channel, but it
12369             # would complicate the code and is potentially risky.
12370             next
12371 7 50 66     48 if (!$is_old_weld
      33        
12372             && $next_type eq 'Q'
12373             && $iline_ic != $iline_oc );
12374              
12375             # If welded, the line must not exceed allowed line length
12376 7         30 ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
12377             = $self->setup_new_weld_measurements( $Kouter_opening,
12378             $Kinner_opening );
12379 7 50       38 if ( !$ok_to_weld ) {
12380 0         0 if (DEBUG_WELD) { print $msg}
12381 0         0 next;
12382             }
12383              
12384 7         27 my $length =
12385             $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
12386 7         17 my $excess = $length + $multiline_tol - $maximum_text_length;
12387              
12388 7 100       25 my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
12389 7 50       31 if ( $excess >= $excess_max ) {
12390 0         0 $do_not_weld = 1;
12391             }
12392              
12393 7         14 if (DEBUG_WELD) {
12394             if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING }
12395             $Msg .=
12396             "excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
12397             }
12398              
12399             # Check weld exclusion rules for outer container
12400 7 50       34 if ( !$do_not_weld ) {
12401 7         24 my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
12402 7 100       38 if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
12403 1         3 if (DEBUG_WELD) {
12404             $Msg .=
12405             "No qw weld due to weld exclusion rules for outer container\n";
12406             }
12407 1         4 $do_not_weld = 1;
12408             }
12409             }
12410              
12411             # Check the length of the last line (fixes case b1039)
12412 7 100       37 if ( !$do_not_weld ) {
12413 6         26 my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
12414 6         15 my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
  6         18  
12415 6         26 my $excess_ic =
12416             $self->excess_line_length_for_Krange( $Kfirst_ic,
12417             $Kouter_closing );
12418              
12419             # Allow extra space for additional welded closing container(s)
12420             # and a space and comma or semicolon.
12421             # NOTE: weld len has not been computed yet. Use 2 spaces
12422             # for now, correct for a single weld. This estimate could
12423             # be made more accurate if necessary.
12424             my $weld_len =
12425 6 100       32 defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
12426 6 50       31 if ( $excess_ic + $weld_len + 2 > 0 ) {
12427 0         0 if (DEBUG_WELD) {
12428             $Msg .=
12429             "No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
12430             }
12431 0         0 $do_not_weld = 1;
12432             }
12433             }
12434              
12435 7 100       24 if ($do_not_weld) {
12436 1         2 if (DEBUG_WELD) {
12437             $Msg .= "Not Welding QW\n";
12438             print $Msg;
12439             }
12440 1         4 next;
12441             }
12442              
12443             # OK to weld
12444 6         15 if (DEBUG_WELD) {
12445             $Msg .= "Welding QW\n";
12446             print $Msg;
12447             }
12448              
12449 6         17 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
12450 6         21 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
12451              
12452 6         19 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
12453 6         18 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
12454              
12455             # Undo one indentation level if an extra level was added to this
12456             # multiline quote
12457             my $qw_seqno =
12458 6         18 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
12459 6 50 33     46 if ( $qw_seqno
12460             && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
12461             {
12462 0         0 foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
12463 0         0 $rLL->[$K]->[_LEVEL_] -= 1;
12464             }
12465 0         0 $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
12466 0         0 $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
12467             }
12468              
12469             # undo CI for other welded quotes
12470             else {
12471              
12472 6         24 foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
12473 74         152 $rLL->[$K]->[_CI_LEVEL_] = 0;
12474             }
12475             }
12476              
12477             # Change the level of a closing qw token to be that of the outer
12478             # containing token. This will allow -lp indentation to function
12479             # correctly in the vertical aligner.
12480             # Patch to fix c002: but not if it contains text
12481 6 50       62 if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
12482 6         25 $rLL->[$Kinner_closing]->[_LEVEL_] =
12483             $rLL->[$Kouter_closing]->[_LEVEL_];
12484             }
12485             }
12486             }
12487 22         287 return;
12488             } ## end sub weld_nested_quotes
12489              
12490             sub is_welded_at_seqno {
12491              
12492 83     83 0 223 my ( $self, $seqno ) = @_;
12493              
12494             # given a sequence number:
12495             # return true if it is welded either left or right
12496             # return false otherwise
12497 83 50 33     386 return unless ( $total_weld_count && defined($seqno) );
12498 83         227 my $KK_o = $self->[_K_opening_container_]->{$seqno};
12499 83 50       231 return unless defined($KK_o);
12500             return defined( $self->[_rK_weld_left_]->{$KK_o} )
12501 83   100     538 || defined( $self->[_rK_weld_right_]->{$KK_o} );
12502             } ## end sub is_welded_at_seqno
12503              
12504             sub mark_short_nested_blocks {
12505              
12506             # This routine looks at the entire file and marks any short nested blocks
12507             # which should not be broken. The results are stored in the hash
12508             # $rshort_nested->{$type_sequence}
12509             # which will be true if the container should remain intact.
12510             #
12511             # For example, consider the following line:
12512              
12513             # sub cxt_two { sort { $a <=> $b } test_if_list() }
12514              
12515             # The 'sort' block is short and nested within an outer sub block.
12516             # Normally, the existence of the 'sort' block will force the sub block to
12517             # break open, but this is not always desirable. Here we will set a flag for
12518             # the sort block to prevent this. To give the user control, we will
12519             # follow the input file formatting. If either of the blocks is broken in
12520             # the input file then we will allow it to remain broken. Otherwise we will
12521             # set a flag to keep it together in later formatting steps.
12522              
12523             # The flag which is set here will be checked in two places:
12524             # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
12525              
12526 555     555 0 1324 my $self = shift;
12527 555 100       2454 return if $rOpts->{'indent-only'};
12528              
12529 552         1352 my $rLL = $self->[_rLL_];
12530 552 100 66     2198 return unless ( defined($rLL) && @{$rLL} );
  552         2124  
12531              
12532 549 100       2176 return unless ( $rOpts->{'one-line-block-nesting'} );
12533              
12534 1         3 my $K_opening_container = $self->[_K_opening_container_];
12535 1         3 my $K_closing_container = $self->[_K_closing_container_];
12536 1         3 my $rbreak_container = $self->[_rbreak_container_];
12537 1         2 my $ris_broken_container = $self->[_ris_broken_container_];
12538 1         2 my $rshort_nested = $self->[_rshort_nested_];
12539 1         3 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
12540              
12541             # Variables needed for estimating line lengths
12542 1         2 my $maximum_text_length;
12543             my $starting_lentot;
12544 1         3 my $length_tol = 1;
12545              
12546             my $excess_length_to_K = sub {
12547 2     2   4 my ($K) = @_;
12548              
12549             # Estimate the length from the line start to a given token
12550 2         6 my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
12551 2         5 my $excess_length = $length + $length_tol - $maximum_text_length;
12552 2         7 return ($excess_length);
12553 1         6 };
12554              
12555             # loop over all containers
12556 1         2 my @open_block_stack;
12557 1         3 my $iline = -1;
12558 1         3 my $KNEXT = $self->[_K_first_seq_item_];
12559 1         4 while ( defined($KNEXT) ) {
12560 4         6 my $KK = $KNEXT;
12561 4         8 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
12562 4         7 my $rtoken_vars = $rLL->[$KK];
12563 4         7 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
12564 4 50       9 if ( !$type_sequence ) {
12565 0 0       0 next if ( $KK == 0 ); # first token in file may not be container
12566              
12567             # A fault here implies that an error was made in the little loop at
12568             # the bottom of sub 'respace_tokens' which set the values of
12569             # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
12570             # loop control lines above.
12571 0         0 Fault("sequence = $type_sequence not defined at K=$KK")
12572             if (DEVEL_MODE);
12573 0         0 next;
12574             }
12575              
12576             # Patch: do not mark short blocks with welds.
12577             # In some cases blinkers can form (case b690).
12578 4 50 33     12 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
12579 0         0 next;
12580             }
12581              
12582             # We are just looking at code blocks
12583 4         6 my $token = $rtoken_vars->[_TOKEN_];
12584 4         7 my $type = $rtoken_vars->[_TYPE_];
12585 4 50       11 next unless ( $type eq $token );
12586 4 50       12 next unless ( $rblock_type_of_seqno->{$type_sequence} );
12587              
12588             # Keep a stack of all acceptable block braces seen.
12589             # Only consider blocks entirely on one line so dump the stack when line
12590             # changes.
12591 4         7 my $iline_last = $iline;
12592 4         5 $iline = $rLL->[$KK]->[_LINE_INDEX_];
12593 4 100       10 if ( $iline != $iline_last ) { @open_block_stack = () }
  1         3  
12594              
12595 4 100       10 if ( $token eq '}' ) {
12596 2 50       5 if (@open_block_stack) { pop @open_block_stack }
  2         4  
12597             }
12598 4 100       11 next unless ( $token eq '{' );
12599              
12600             # block must be balanced (bad scripts may be unbalanced)
12601 2         6 my $K_opening = $K_opening_container->{$type_sequence};
12602 2         3 my $K_closing = $K_closing_container->{$type_sequence};
12603 2 50 33     10 next unless ( defined($K_opening) && defined($K_closing) );
12604              
12605             # require that this block be entirely on one line
12606             next
12607             if ( $ris_broken_container->{$type_sequence}
12608 2 50 33     11 || $rbreak_container->{$type_sequence} );
12609              
12610             # See if this block fits on one line of allowed length (which may
12611             # be different from the input script)
12612 2 50       8 $starting_lentot =
12613             $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
12614 2         4 my $level = $rLL->[$KK]->[_LEVEL_];
12615 2         17 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
12616 2         7 $maximum_text_length =
12617             $maximum_text_length_at_level[$level] -
12618             $ci_level * $rOpts_continuation_indentation;
12619              
12620             # Dump the stack if block is too long and skip this block
12621 2 50       6 if ( $excess_length_to_K->($K_closing) > 0 ) {
12622 0         0 @open_block_stack = ();
12623 0         0 next;
12624             }
12625              
12626             # OK, Block passes tests, remember it
12627 2         6 push @open_block_stack, $type_sequence;
12628              
12629             # We are only marking nested code blocks,
12630             # so check for a previous block on the stack
12631 2 100       7 next unless ( @open_block_stack > 1 );
12632              
12633             # Looks OK, mark this as a short nested block
12634 1         8 $rshort_nested->{$type_sequence} = 1;
12635              
12636             }
12637 1         7 return;
12638             } ## end sub mark_short_nested_blocks
12639              
12640             sub special_indentation_adjustments {
12641              
12642 555     555 0 1534 my ($self) = @_;
12643              
12644             # Called once per file to define the levels to be used for computing
12645             # actual indentation. These levels are initialized to be the structural
12646             # levels and then are adjusted if necessary for special purposes.
12647             # The adjustments are made either by changing _CI_LEVEL_ directly or
12648             # by setting modified levels in the array $self->[_radjusted_levels_].
12649              
12650             # NOTE: This routine is called after the weld routines, which may have
12651             # already adjusted the initial values of _LEVEL_, so we are making
12652             # adjustments on top of those levels. It would be nicer to have the
12653             # weld routines also use this adjustment, but that gets complicated
12654             # when we combine -gnu -wn and also have some welded quotes.
12655 555         1505 my $Klimit = $self->[_Klimit_];
12656 555         1258 my $rLL = $self->[_rLL_];
12657 555         1326 my $radjusted_levels = $self->[_radjusted_levels_];
12658              
12659 555 100       1751 return unless ( defined($Klimit) );
12660              
12661             # Initialize the adjusted levels to be the structural levels
12662 552         2028 foreach my $KK ( 0 .. $Klimit ) {
12663 58381         103578 $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
12664             }
12665              
12666             # First set adjusted levels for any non-indenting braces.
12667 552         5592 $self->do_non_indenting_braces();
12668              
12669             # Adjust breaks and indentation list containers
12670 552         3471 $self->break_before_list_opening_containers();
12671              
12672             # Set adjusted levels for the whitespace cycle option.
12673 552         2831 $self->whitespace_cycle_adjustment();
12674              
12675 552         3099 $self->braces_left_setup();
12676              
12677             # Adjust continuation indentation if -bli is set
12678 552         2708 $self->bli_adjustment();
12679              
12680 552 100       1847 $self->extended_ci()
12681             if ($rOpts_extended_continuation_indentation);
12682              
12683             # Now clip any adjusted levels to be non-negative
12684 552         2465 $self->clip_adjusted_levels();
12685              
12686 552         1147 return;
12687             } ## end sub special_indentation_adjustments
12688              
12689             sub clip_adjusted_levels {
12690              
12691             # Replace any negative adjusted levels with zero.
12692             # Negative levels can occur in files with brace errors.
12693 552     552 0 1435 my ($self) = @_;
12694 552         1485 my $radjusted_levels = $self->[_radjusted_levels_];
12695 552 50 33     2096 return unless defined($radjusted_levels) && @{$radjusted_levels};
  552         2051  
12696 552         1222 my $min = min( @{$radjusted_levels} ); # fast check for min
  552         3422  
12697 552 50       2143 if ( $min < 0 ) {
12698              
12699             # slow loop, but rarely needed
12700 0 0       0 foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
  0         0  
  0         0  
12701             }
12702 552         1099 return;
12703             } ## end sub clip_adjusted_levels
12704              
12705             sub do_non_indenting_braces {
12706              
12707             # Called once per file to handle the --non-indenting-braces parameter.
12708             # Remove indentation within marked braces if requested
12709 552     552 0 1572 my ($self) = @_;
12710              
12711             # Any non-indenting braces have been found by sub find_non_indenting_braces
12712             # and are defined by the following hash:
12713 552         1542 my $rseqno_non_indenting_brace_by_ix =
12714             $self->[_rseqno_non_indenting_brace_by_ix_];
12715 552 100       1019 return unless ( %{$rseqno_non_indenting_brace_by_ix} );
  552         2199  
12716              
12717 2         4 my $rlines = $self->[_rlines_];
12718 2         5 my $K_opening_container = $self->[_K_opening_container_];
12719 2         4 my $K_closing_container = $self->[_K_closing_container_];
12720 2         4 my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
12721 2         4 my $radjusted_levels = $self->[_radjusted_levels_];
12722              
12723             # First locate all of the marked blocks
12724 2         4 my @K_stack;
12725 2         4 foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) {
  2         8  
12726 6         15 my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix};
12727 6         11 my $KK = $K_opening_container->{$seqno};
12728 6         14 my $line_of_tokens = $rlines->[$ix];
12729 6         9 my $rK_range = $line_of_tokens->{_rK_range};
12730 6         9 my ( $Kfirst, $Klast ) = @{$rK_range};
  6         13  
12731 6         16 $rspecial_side_comment_type->{$Klast} = 'NIB';
12732 6         14 push @K_stack, [ $KK, 1 ];
12733 6         13 my $Kc = $K_closing_container->{$seqno};
12734 6 50       20 push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
12735             }
12736 2 50       9 return unless (@K_stack);
12737 2         16 @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
  19         34  
12738              
12739             # Then loop to remove indentation within marked blocks
12740 2         6 my $KK_last = 0;
12741 2         5 my $ndeep = 0;
12742 2         6 foreach my $item (@K_stack) {
12743 12         14 my ( $KK, $inc ) = @{$item};
  12         25  
12744 12 100       26 if ( $ndeep > 0 ) {
12745              
12746 8         16 foreach ( $KK_last + 1 .. $KK ) {
12747 52         75 $radjusted_levels->[$_] -= $ndeep;
12748             }
12749              
12750             # We just subtracted the old $ndeep value, which only applies to a
12751             # '{'. The new $ndeep applies to a '}', so we undo the error.
12752 8 100       17 if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
  6         8  
12753             }
12754              
12755 12         18 $ndeep += $inc;
12756 12         21 $KK_last = $KK;
12757             }
12758 2         8 return;
12759             } ## end sub do_non_indenting_braces
12760              
12761             sub whitespace_cycle_adjustment {
12762              
12763 552     552 0 1251 my $self = shift;
12764              
12765             # Called once per file to implement the --whitespace-cycle option
12766 552         1366 my $rLL = $self->[_rLL_];
12767 552 50 33     2345 return unless ( defined($rLL) && @{$rLL} );
  552         2235  
12768 552         1467 my $radjusted_levels = $self->[_radjusted_levels_];
12769 552         1481 my $maximum_level = $self->[_maximum_level_];
12770              
12771 552 50 66     2259 if ( $rOpts_whitespace_cycle
      66        
12772             && $rOpts_whitespace_cycle > 0
12773             && $rOpts_whitespace_cycle < $maximum_level )
12774             {
12775              
12776 2         5 my $Kmax = @{$rLL} - 1;
  2         9  
12777              
12778 2         5 my $whitespace_last_level = -1;
12779 2         5 my @whitespace_level_stack = ();
12780 2         7 my $last_nonblank_type = 'b';
12781 2         4 my $last_nonblank_token = EMPTY_STRING;
12782 2         8 foreach my $KK ( 0 .. $Kmax ) {
12783 234         302 my $level_abs = $radjusted_levels->[$KK];
12784 234         287 my $level = $level_abs;
12785 234 100       392 if ( $level_abs < $whitespace_last_level ) {
12786 26         39 pop(@whitespace_level_stack);
12787             }
12788 234 100       441 if ( !@whitespace_level_stack ) {
    100          
12789 2         7 push @whitespace_level_stack, $level_abs;
12790             }
12791             elsif ( $level_abs > $whitespace_last_level ) {
12792 26         39 $level = $whitespace_level_stack[-1] +
12793             ( $level_abs - $whitespace_last_level );
12794              
12795 26 50 100     165 if (
      66        
      33        
      66        
      33        
12796             # 1 Try to break at a block brace
12797             (
12798             $level > $rOpts_whitespace_cycle
12799             && $last_nonblank_type eq '{'
12800             && $last_nonblank_token eq '{'
12801             )
12802              
12803             # 2 Then either a brace or bracket
12804             || ( $level > $rOpts_whitespace_cycle + 1
12805             && $last_nonblank_token =~ /^[\{\[]$/ )
12806              
12807             # 3 Then a paren too
12808             || $level > $rOpts_whitespace_cycle + 2
12809             )
12810             {
12811 1         4 $level = 1;
12812             }
12813 26         47 push @whitespace_level_stack, $level;
12814             }
12815 234         308 $level = $whitespace_level_stack[-1];
12816 234         286 $radjusted_levels->[$KK] = $level;
12817              
12818 234         304 $whitespace_last_level = $level_abs;
12819 234         364 my $type = $rLL->[$KK]->[_TYPE_];
12820 234         315 my $token = $rLL->[$KK]->[_TOKEN_];
12821 234 100       493 if ( $type ne 'b' ) {
12822 150         178 $last_nonblank_type = $type;
12823 150         280 $last_nonblank_token = $token;
12824             }
12825             }
12826             }
12827 552         1144 return;
12828             } ## end sub whitespace_cycle_adjustment
12829              
12830 38     38   426 use constant DEBUG_BBX => 0;
  38         124  
  38         62688  
12831              
12832             sub break_before_list_opening_containers {
12833              
12834 552     552 0 1575 my ($self) = @_;
12835              
12836             # This routine is called once per batch to implement parameters
12837             # --break-before-hash-brace=n and similar -bbx=n flags
12838             # and their associated indentation flags:
12839             # --break-before-hash-brace-and-indent and similar -bbxi=n
12840              
12841             # Nothing to do if none of the -bbx=n parameters has been set
12842 552 100       1723 return unless %break_before_container_types;
12843              
12844 7         19 my $rLL = $self->[_rLL_];
12845 7 50 33     29 return unless ( defined($rLL) && @{$rLL} );
  7         31  
12846              
12847             # Loop over all opening container tokens
12848 7         18 my $K_opening_container = $self->[_K_opening_container_];
12849 7         16 my $K_closing_container = $self->[_K_closing_container_];
12850 7         16 my $ris_broken_container = $self->[_ris_broken_container_];
12851 7         16 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
12852 7         15 my $rhas_list = $self->[_rhas_list_];
12853 7         30 my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
12854 7         15 my $radjusted_levels = $self->[_radjusted_levels_];
12855 7         15 my $rparent_of_seqno = $self->[_rparent_of_seqno_];
12856 7         23 my $rlines = $self->[_rlines_];
12857 7         17 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
12858 7         30 my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
12859 7         18 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
12860 7         15 my $rK_weld_right = $self->[_rK_weld_right_];
12861 7         17 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
12862              
12863 7         50 my $length_tol =
12864             max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
12865 7 50       20 if ($rOpts_ignore_old_breakpoints) {
12866              
12867             # Patch suggested by b1231; the old tol was excessive.
12868             ## $length_tol += $rOpts_maximum_line_length;
12869 0         0 $length_tol *= 2;
12870             }
12871              
12872 7         20 my $rbreak_before_container_by_seqno = {};
12873 7         14 my $rwant_reduced_ci = {};
12874 7         18 foreach my $seqno ( keys %{$K_opening_container} ) {
  7         32  
12875              
12876             #----------------------------------------------------------------
12877             # Part 1: Examine any -bbx=n flags
12878             #----------------------------------------------------------------
12879              
12880 47 100       91 next if ( $rblock_type_of_seqno->{$seqno} );
12881 45         73 my $KK = $K_opening_container->{$seqno};
12882              
12883             # This must be a list or contain a list.
12884             # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
12885             # Note2: 'has_list' holds the depth to the sub-list. We will require
12886             # a depth of just 1
12887 45         91 my $is_list = $self->is_list_by_seqno($seqno);
12888 45         77 my $has_list = $rhas_list->{$seqno};
12889              
12890             # Fix for b1173: if welded opening container, use flag of innermost
12891             # seqno. Otherwise, the restriction $has_list==1 prevents triple and
12892             # higher welds from following the -BBX parameters.
12893 45 50       84 if ($total_weld_count) {
12894 0         0 my $KK_test = $rK_weld_right->{$KK};
12895 0 0       0 if ( defined($KK_test) ) {
12896 0         0 my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
12897 0   0     0 $is_list ||= $self->is_list_by_seqno($seqno_inner);
12898 0         0 $has_list = $rhas_list->{$seqno_inner};
12899             }
12900             }
12901              
12902 45 100 66     113 next unless ( $is_list || $has_list && $has_list == 1 );
      66        
12903              
12904 41         62 my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
12905              
12906             # Only for types of container tokens with a non-default break option
12907 41         69 my $token = $rLL->[$KK]->[_TOKEN_];
12908 41         65 my $break_option = $break_before_container_types{$token};
12909 41 100       91 next unless ($break_option);
12910              
12911             # Do not use -bbx under stress for stability ... fixes b1300
12912             # TODO: review this; do we also need to look at stress_level_lalpha?
12913 16         31 my $level = $rLL->[$KK]->[_LEVEL_];
12914 16 50       56 if ( $level >= $stress_level_beta ) {
12915 0         0 DEBUG_BBX
12916             && print
12917             "BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
12918 0         0 next;
12919             }
12920              
12921             # Require previous nonblank to be '=' or '=>'
12922 16         63 my $Kprev = $KK - 1;
12923 16 50       67 next if ( $Kprev < 0 );
12924 16         49 my $prev_type = $rLL->[$Kprev]->[_TYPE_];
12925 16 50       43 if ( $prev_type eq 'b' ) {
12926 16         24 $Kprev--;
12927 16 50       40 next if ( $Kprev < 0 );
12928 16         34 $prev_type = $rLL->[$Kprev]->[_TYPE_];
12929             }
12930 16 100       46 next unless ( $is_equal_or_fat_comma{$prev_type} );
12931              
12932 14         30 my $ci = $rLL->[$KK]->[_CI_LEVEL_];
12933              
12934             #--------------------------------------------
12935             # New coding for option 2 (break if complex).
12936             #--------------------------------------------
12937             # This new coding uses clues which are invariant under formatting to
12938             # decide if a list is complex. For now it is only applied when -lp
12939             # and -vmll are used, but eventually it may become the standard method.
12940             # Fixes b1274, b1275, and others, including b1099.
12941 14 100       63 if ( $break_option == 2 ) {
12942              
12943 2 50 33     9 if ( $rOpts_line_up_parentheses
12944             || $rOpts_variable_maximum_line_length )
12945             {
12946              
12947             # Start with the basic definition of a complex list...
12948 0   0     0 my $is_complex = $is_list && $has_list;
12949              
12950             # and it is also complex if the parent is a list
12951 0 0       0 if ( !$is_complex ) {
12952 0         0 my $parent = $rparent_of_seqno->{$seqno};
12953 0 0       0 if ( $self->is_list_by_seqno($parent) ) {
12954 0         0 $is_complex = 1;
12955             }
12956             }
12957              
12958             # finally, we will call it complex if there are inner opening
12959             # and closing container tokens, not parens, within the outer
12960             # container tokens.
12961 0 0       0 if ( !$is_complex ) {
12962 0         0 my $Kp = $self->K_next_nonblank($KK);
12963 0 0       0 my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
12964 0 0 0     0 if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
12965              
12966 0         0 my $Kc = $K_closing_container->{$seqno};
12967 0         0 my $Km = $self->K_previous_nonblank($Kc);
12968 0 0       0 my $token_m =
12969             defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
12970              
12971             # ignore any optional ending comma
12972 0 0       0 if ( $token_m eq ',' ) {
12973 0         0 $Km = $self->K_previous_nonblank($Km);
12974 0 0       0 $token_m =
12975             defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
12976             }
12977              
12978             $is_complex ||=
12979 0   0     0 $is_closing_token{$token_m} && $token_m ne ')';
      0        
12980             }
12981             }
12982              
12983             # Convert to option 3 (always break) if complex
12984 0 0       0 next unless ($is_complex);
12985 0         0 $break_option = 3;
12986             }
12987             }
12988              
12989             # Fix for b1231: the has_list_with_lec does not cover all cases.
12990             # A broken container containing a list and with line-ending commas
12991             # will stay broken, so can be treated as if it had a list with lec.
12992             $has_list_with_lec ||=
12993             $has_list
12994             && $ris_broken_container->{$seqno}
12995 14   66     83 && $rlec_count_by_seqno->{$seqno};
      100        
12996              
12997 14         19 DEBUG_BBX
12998             && print STDOUT
12999             "BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
13000              
13001             # -bbx=1 = stable, try to follow input
13002 14 50       60 if ( $break_option == 1 ) {
    100          
    50          
13003              
13004 0         0 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
13005 0         0 my $rK_range = $rlines->[$iline]->{_rK_range};
13006 0         0 my ( $Kfirst, $Klast ) = @{$rK_range};
  0         0  
13007 0 0       0 next unless ( $KK == $Kfirst );
13008             }
13009              
13010             # -bbx=2 => apply this style only for a 'complex' list
13011             elsif ( $break_option == 2 ) {
13012              
13013             # break if this list contains a broken list with line-ending comma
13014 2         3 my $ok_to_break;
13015 2         4 my $Msg = EMPTY_STRING;
13016 2 100       13 if ($has_list_with_lec) {
13017 1         3 $ok_to_break = 1;
13018 1         4 DEBUG_BBX && do { $Msg = "has list with lec;" };
13019             }
13020              
13021 2 100       5 if ( !$ok_to_break ) {
13022              
13023             # Turn off -xci if -bbx=2 and this container has a sublist but
13024             # not a broken sublist. This avoids creating blinkers. The
13025             # problem is that -xci can cause one-line lists to break open,
13026             # and thereby creating formatting instability.
13027             # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
13028             # b1045 b1046 b1047 b1051 b1052 b1061.
13029 1 50       3 if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
  0         0  
13030              
13031 1         3 my $parent = $rparent_of_seqno->{$seqno};
13032 1 50       5 if ( $self->is_list_by_seqno($parent) ) {
13033 1         4 DEBUG_BBX && do { $Msg = "parent is list" };
13034 1         2 $ok_to_break = 1;
13035             }
13036             }
13037              
13038 2 50       6 if ( !$ok_to_break ) {
13039 0         0 DEBUG_BBX
13040             && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
13041 0         0 next;
13042             }
13043              
13044             DEBUG_BBX
13045 2         2 && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
13046              
13047             # Patch: turn off -xci if -bbx=2 and -lp
13048             # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
13049 2 50       5 $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
13050             }
13051              
13052             # -bbx=3 = always break
13053             elsif ( $break_option == 3 ) {
13054              
13055             # ok to break
13056             }
13057              
13058             # Shouldn't happen! Bad flag, but make behavior same as 3
13059             else {
13060             # ok to break
13061             }
13062              
13063             # Set a flag for actual implementation later in
13064             # sub insert_breaks_before_list_opening_containers
13065 14         33 $rbreak_before_container_by_seqno->{$seqno} = 1;
13066 14         18 DEBUG_BBX
13067             && print STDOUT "BBX: ok to break at seqno=$seqno\n";
13068              
13069             # -bbxi=0: Nothing more to do if the ci value remains unchanged
13070 14         32 my $ci_flag = $container_indentation_options{$token};
13071 14 100       38 next unless ($ci_flag);
13072              
13073             # -bbxi=1: This option removes ci and is handled in
13074             # later sub get_final_indentation
13075 4 100       11 if ( $ci_flag == 1 ) {
13076 2         6 $rwant_reduced_ci->{$seqno} = 1;
13077 2         5 next;
13078             }
13079              
13080             # -bbxi=2: This option changes the level ...
13081             # This option can conflict with -xci in some cases. We can turn off
13082             # -xci for this container to avoid blinking. For now, only do this if
13083             # -vmll is set. ( fixes b1335, b1336 )
13084 2 50       4 if ($rOpts_variable_maximum_line_length) {
13085 0         0 $rno_xci_by_seqno->{$seqno} = 1;
13086             }
13087              
13088             #----------------------------------------------------------------
13089             # Part 2: Perform tests before committing to changing ci and level
13090             #----------------------------------------------------------------
13091              
13092             # Before changing the ci level of the opening container, we need
13093             # to be sure that the container will be broken in the later stages of
13094             # formatting. We have to do this because we are working early in the
13095             # formatting pipeline. A problem can occur if we change the ci or
13096             # level of the opening token but do not actually break the container
13097             # open as expected. In most cases it wouldn't make any difference if
13098             # we changed ci or not, but there are some edge cases where this
13099             # can cause blinking states, so we need to try to only change ci if
13100             # the container will really be broken.
13101              
13102             # Only consider containers already broken
13103 2 50       6 next if ( !$ris_broken_container->{$seqno} );
13104              
13105             # Patch to fix issue b1305: the combination of -naws and ci>i appears
13106             # to cause an instability. It should almost never occur in practice.
13107             next
13108 2 50 33     9 if (!$rOpts_add_whitespace
13109             && $rOpts_continuation_indentation > $rOpts_indent_columns );
13110              
13111             # Always ok to change ci for permanently broken containers
13112 2 50       8 if ( $ris_permanently_broken->{$seqno} ) { }
    100          
13113              
13114             # Always OK if this list contains a broken sub-container with
13115             # a non-terminal line-ending comma
13116             elsif ($has_list_with_lec) { }
13117              
13118             # Otherwise, we are considering a single container...
13119             else {
13120              
13121             # A single container must have at least 1 line-ending comma:
13122 1 50       4 next unless ( $rlec_count_by_seqno->{$seqno} );
13123              
13124 1         3 my $OK;
13125              
13126             # Since it has a line-ending comma, it will stay broken if the
13127             # -boc flag is set
13128 1 50       3 if ($rOpts_break_at_old_comma_breakpoints) { $OK = 1 }
  0         0  
13129              
13130             # OK if the container contains multiple fat commas
13131             # Better: multiple lines with fat commas
13132 1 50 33     7 if ( !$OK && !$rOpts_ignore_old_breakpoints ) {
13133 1         4 my $rtype_count = $rtype_count_by_seqno->{$seqno};
13134 1 50       3 next unless ($rtype_count);
13135 1         3 my $fat_comma_count = $rtype_count->{'=>'};
13136 1         2 DEBUG_BBX
13137             && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
13138 1 50 33     6 if ( $fat_comma_count && $fat_comma_count >= 2 ) { $OK = 1 }
  1         2  
13139             }
13140              
13141             # The last check we can make is to see if this container could
13142             # fit on a single line. Use the least possible indentation
13143             # estimate, ci=0, so we are not subtracting $ci *
13144             # $rOpts_continuation_indentation from tabulated
13145             # $maximum_text_length value.
13146 1 50       4 if ( !$OK ) {
13147 0         0 my $maximum_text_length = $maximum_text_length_at_level[$level];
13148 0         0 my $K_closing = $K_closing_container->{$seqno};
13149 0         0 my $length = $self->cumulative_length_before_K($K_closing) -
13150             $self->cumulative_length_before_K($KK);
13151 0         0 my $excess_length = $length - $maximum_text_length;
13152 0         0 DEBUG_BBX
13153             && print STDOUT
13154             "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
13155              
13156             # OK if the net container definitely breaks on length
13157 0 0       0 if ( $excess_length > $length_tol ) {
13158 0         0 $OK = 1;
13159 0         0 DEBUG_BBX
13160             && print STDOUT "BBX: excess_length=$excess_length\n";
13161             }
13162              
13163             # Otherwise skip it
13164 0         0 else { next }
13165             }
13166             }
13167              
13168             #------------------------------------------------------------
13169             # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
13170             #------------------------------------------------------------
13171              
13172 2         3 DEBUG_BBX && print STDOUT "BBX: OK to break\n";
13173              
13174             # -bbhbi=n
13175             # -bbsbi=n
13176             # -bbpi=n
13177              
13178             # where:
13179              
13180             # n=0 default indentation (usually one ci)
13181             # n=1 outdent one ci
13182             # n=2 indent one level (minus one ci)
13183             # n=3 indent one extra ci [This may be dropped]
13184              
13185             # NOTE: We are adjusting indentation of the opening container. The
13186             # closing container will normally follow the indentation of the opening
13187             # container automatically, so this is not currently done.
13188 2 50       5 next unless ($ci);
13189              
13190             # option 1: outdent
13191 2 50       7 if ( $ci_flag == 1 ) {
    50          
13192 0         0 $ci -= 1;
13193             }
13194              
13195             # option 2: indent one level
13196             elsif ( $ci_flag == 2 ) {
13197 2         4 $ci -= 1;
13198 2         4 $radjusted_levels->[$KK] += 1;
13199             }
13200              
13201             # unknown option
13202             else {
13203             # Shouldn't happen - leave ci unchanged
13204             }
13205              
13206 2 50       8 $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
13207             }
13208              
13209 7         27 $self->[_rbreak_before_container_by_seqno_] =
13210             $rbreak_before_container_by_seqno;
13211 7         20 $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
13212 7         18 return;
13213             } ## end sub break_before_list_opening_containers
13214              
13215 38     38   473 use constant DEBUG_XCI => 0;
  38         164  
  38         93002  
13216              
13217             sub extended_ci {
13218              
13219             # This routine implements the -xci (--extended-continuation-indentation)
13220             # flag. We add CI to interior tokens of a container which itself has CI but
13221             # only if a token does not already have CI.
13222              
13223             # To do this, we will locate opening tokens which themselves have
13224             # continuation indentation (CI). We track them with their sequence
13225             # numbers. These sequence numbers are called 'controlling sequence
13226             # numbers'. They apply continuation indentation to the tokens that they
13227             # contain. These inner tokens remember their controlling sequence numbers.
13228             # Later, when these inner tokens are output, they have to see if the output
13229             # lines with their controlling tokens were output with CI or not. If not,
13230             # then they must remove their CI too.
13231              
13232             # The controlling CI concept works hierarchically. But CI itself is not
13233             # hierarchical; it is either on or off. There are some rare instances where
13234             # it would be best to have hierarchical CI too, but not enough to be worth
13235             # the programming effort.
13236              
13237             # The operations to remove unwanted CI are done in sub 'undo_ci'.
13238              
13239 6     6 0 21 my ($self) = @_;
13240              
13241 6         26 my $rLL = $self->[_rLL_];
13242 6 50 33     30 return unless ( defined($rLL) && @{$rLL} );
  6         29  
13243              
13244 6         19 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
13245 6         16 my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
13246 6         41 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
13247 6         18 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
13248 6         19 my $ris_bli_container = $self->[_ris_bli_container_];
13249 6         16 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13250              
13251 6         13 my %available_space;
13252              
13253             # Loop over all opening container tokens
13254 6         18 my $K_opening_container = $self->[_K_opening_container_];
13255 6         15 my $K_closing_container = $self->[_K_closing_container_];
13256 6         27 my @seqno_stack;
13257             my $seqno_top;
13258 6         0 my $KLAST;
13259 6         15 my $KNEXT = $self->[_K_first_seq_item_];
13260              
13261             # The following variable can be used to allow a little extra space to
13262             # avoid blinkers. A value $len_tol = 20 fixed the following
13263             # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
13264             # It turned out that the real problem was mis-parsing a list brace as
13265             # a code block in a 'use' statement when the line length was extremely
13266             # small. A value of 0 works now, but a slightly larger value can
13267             # be used to minimize the chance of a blinker.
13268 6         15 my $len_tol = 0;
13269              
13270 6         27 while ( defined($KNEXT) ) {
13271              
13272             # Fix all tokens up to the next sequence item if we are changing CI
13273 204 100       346 if ($seqno_top) {
13274              
13275 150         245 my $is_list = $ris_list_by_seqno->{$seqno_top};
13276 150         228 my $space = $available_space{$seqno_top};
13277 150         210 my $count = 0;
13278 150         294 foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) {
13279              
13280 626 100       1161 next if ( $rLL->[$Kt]->[_CI_LEVEL_] );
13281              
13282             # But do not include tokens which might exceed the line length
13283             # and are not in a list.
13284             # ... This fixes case b1031
13285 304 50 66     867 if ( $is_list
      33        
13286             || $rLL->[$Kt]->[_TOKEN_LENGTH_] < $space
13287             || $rLL->[$Kt]->[_TYPE_] eq '#' )
13288             {
13289 304         397 $rLL->[$Kt]->[_CI_LEVEL_] = 1;
13290 304         1082 $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
13291 304         503 $count++;
13292             }
13293             }
13294 150         287 $ris_seqno_controlling_ci->{$seqno_top} += $count;
13295             }
13296              
13297 204         259 $KLAST = $KNEXT;
13298 204         270 my $KK = $KNEXT;
13299 204         293 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
13300              
13301 204         293 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
13302              
13303             # see if we have reached the end of the current controlling container
13304 204 100 100     552 if ( $seqno_top && $seqno == $seqno_top ) {
13305 62         108 $seqno_top = pop @seqno_stack;
13306             }
13307              
13308             # Patch to fix some block types...
13309             # Certain block types arrive from the tokenizer without CI but should
13310             # have it for this option. These include anonymous subs and
13311             # do sort map grep eval
13312 204         311 my $block_type = $rblock_type_of_seqno->{$seqno};
13313 204 100 100     623 if ( $block_type && $is_block_with_ci{$block_type} ) {
    100          
13314 24         46 $rLL->[$KK]->[_CI_LEVEL_] = 1;
13315 24 100       57 if ($seqno_top) {
13316 16         53 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
13317 16         30 $ris_seqno_controlling_ci->{$seqno_top}++;
13318             }
13319             }
13320              
13321             # If this does not have ci, update ci if necessary and continue looking
13322             elsif ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
13323 66 100       131 if ($seqno_top) {
13324 50         83 $rLL->[$KK]->[_CI_LEVEL_] = 1;
13325 50         187 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
13326 50         72 $ris_seqno_controlling_ci->{$seqno_top}++;
13327             }
13328 66         131 next;
13329             }
13330              
13331             # We are looking for opening container tokens with ci
13332 138         200 my $K_opening = $K_opening_container->{$seqno};
13333 138 100 100     514 next unless ( defined($K_opening) && $KK == $K_opening );
13334              
13335             # Make sure there is a corresponding closing container
13336             # (could be missing if the script has a brace error)
13337 62         102 my $K_closing = $K_closing_container->{$seqno};
13338 62 50       128 next unless defined($K_closing);
13339              
13340             # Skip if requested by -bbx to avoid blinkers
13341 62 50       126 next if ( $rno_xci_by_seqno->{$seqno} );
13342              
13343             # Skip if this is a -bli container (this fixes case b1065) Note: case
13344             # b1065 is also fixed by the update for b1055, so this update is not
13345             # essential now. But there does not seem to be a good reason to add
13346             # xci and bli together, so the update is retained.
13347 62 50       141 next if ( $ris_bli_container->{$seqno} );
13348              
13349             # Require different input lines. This will filter out a large number
13350             # of small hash braces and array brackets. If we accidentally filter
13351             # out an important container, it will get fixed on the next pass.
13352 62 50 66     225 if (
13353             $rLL->[$K_opening]->[_LINE_INDEX_] ==
13354             $rLL->[$K_closing]->[_LINE_INDEX_]
13355             && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
13356             $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
13357             $rOpts_maximum_line_length )
13358             )
13359             {
13360 0         0 DEBUG_XCI
13361             && print "XCI: Skipping seqno=$seqno, require different lines\n";
13362 0         0 next;
13363             }
13364              
13365             # Do not apply -xci if adding extra ci will put the container contents
13366             # beyond the line length limit (fixes cases b899 b935)
13367 62         101 my $level = $rLL->[$K_opening]->[_LEVEL_];
13368 62         90 my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
13369 62         102 my $maximum_text_length =
13370             $maximum_text_length_at_level[$level] -
13371             $ci_level * $rOpts_continuation_indentation;
13372              
13373             # Fix for b1197 b1198 b1199 b1200 b1201 b1202
13374             # Do not apply -xci if we are running out of space
13375             # TODO: review this; do we also need to look at stress_level_alpha?
13376 62 50       128 if ( $level >= $stress_level_beta ) {
13377 0         0 DEBUG_XCI
13378             && print
13379             "XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
13380 0         0 next;
13381             }
13382              
13383             # remember how much space is available for patch b1031 above
13384 62         95 my $space =
13385             $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
13386              
13387 62 50       116 if ( $space < 0 ) {
13388 0         0 DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
13389 0         0 next;
13390             }
13391 62         79 DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
13392              
13393 62         149 $available_space{$seqno} = $space;
13394              
13395             # This becomes the next controlling container
13396 62 100       139 push @seqno_stack, $seqno_top if ($seqno_top);
13397 62         134 $seqno_top = $seqno;
13398             }
13399 6         33 return;
13400             } ## end sub extended_ci
13401              
13402             sub braces_left_setup {
13403              
13404             # Called once per file to mark all -bl, -sbl, and -asbl containers
13405 552     552 0 1283 my $self = shift;
13406              
13407 552         1580 my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
13408 552         1359 my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
13409 552         1385 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
13410 552 100 100     4048 return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
      66        
13411              
13412 23         61 my $rLL = $self->[_rLL_];
13413 23 50 33     113 return unless ( defined($rLL) && @{$rLL} );
  23         115  
13414              
13415             # We will turn on this hash for braces controlled by these flags:
13416 23         80 my $rbrace_left = $self->[_rbrace_left_];
13417              
13418 23         56 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13419 23         56 my $ris_asub_block = $self->[_ris_asub_block_];
13420 23         53 my $ris_sub_block = $self->[_ris_sub_block_];
13421 23         51 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
  23         147  
13422              
13423 62         137 my $block_type = $rblock_type_of_seqno->{$seqno};
13424              
13425             # use -asbl flag for an anonymous sub block
13426 62 100       153 if ( $ris_asub_block->{$seqno} ) {
    100          
13427 14 100       35 if ($rOpts_asbl) {
13428 10         36 $rbrace_left->{$seqno} = 1;
13429             }
13430             }
13431              
13432             # use -sbl flag for a named sub
13433             elsif ( $ris_sub_block->{$seqno} ) {
13434 4 50       23 if ($rOpts_sbl) {
13435 4         18 $rbrace_left->{$seqno} = 1;
13436             }
13437             }
13438              
13439             # use -bl flag if not a sub block of any type
13440             else {
13441 44 100 100     658 if ( $rOpts_bl
      100        
13442             && $block_type =~ /$bl_pattern/
13443             && $block_type !~ /$bl_exclusion_pattern/ )
13444             {
13445 21         56 $rbrace_left->{$seqno} = 1;
13446             }
13447             }
13448             }
13449 23         71 return;
13450             } ## end sub braces_left_setup
13451              
13452             sub bli_adjustment {
13453              
13454             # Called once per file to implement the --brace-left-and-indent option.
13455             # If -bli is set, adds one continuation indentation for certain braces
13456 552     552 0 1362 my $self = shift;
13457 552 100       1934 return unless ( $rOpts->{'brace-left-and-indent'} );
13458 6         21 my $rLL = $self->[_rLL_];
13459 6 50 33     49 return unless ( defined($rLL) && @{$rLL} );
  6         28  
13460              
13461 6         18 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13462 6         20 my $ris_bli_container = $self->[_ris_bli_container_];
13463 6         19 my $rbrace_left = $self->[_rbrace_left_];
13464 6         14 my $K_opening_container = $self->[_K_opening_container_];
13465 6         14 my $K_closing_container = $self->[_K_closing_container_];
13466              
13467 6         22 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
  6         43  
13468 49         94 my $block_type = $rblock_type_of_seqno->{$seqno};
13469 49 100 66     747 if ( $block_type
      100        
13470             && $block_type =~ /$bli_pattern/
13471             && $block_type !~ /$bli_exclusion_pattern/ )
13472             {
13473 25         67 $ris_bli_container->{$seqno} = 1;
13474 25         45 $rbrace_left->{$seqno} = 1;
13475 25         42 my $Ko = $K_opening_container->{$seqno};
13476 25         36 my $Kc = $K_closing_container->{$seqno};
13477 25 50 33     121 if ( defined($Ko) && defined($Kc) ) {
13478 25         73 $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
13479             }
13480             }
13481             }
13482 6         24 return;
13483             } ## end sub bli_adjustment
13484              
13485             sub find_multiline_qw {
13486              
13487 555     555 0 2050 my ( $self, $rqw_lines ) = @_;
13488              
13489             # Multiline qw quotes are not sequenced items like containers { [ (
13490             # but behave in some respects in a similar way. So this routine finds them
13491             # and creates a separate sequence number system for later use.
13492              
13493             # This is straightforward because they always begin at the end of one line
13494             # and end at the beginning of a later line. This is true no matter how we
13495             # finally make our line breaks, so we can find them before deciding on new
13496             # line breaks.
13497              
13498             # Input parameter:
13499             # if $rqw_lines is defined it is a ref to array of all line index numbers
13500             # for which there is a type 'q' qw quote at either end of the line. This
13501             # was defined by sub resync_lines_and_tokens for efficiency.
13502             #
13503              
13504 555         1767 my $rlines = $self->[_rlines_];
13505              
13506             # if $rqw_lines is not defined (this will occur with -io option) then we
13507             # will have to scan all lines.
13508 555 100       1883 if ( !defined($rqw_lines) ) {
13509 3         9 $rqw_lines = [ 0 .. @{$rlines} - 1 ];
  3         13  
13510             }
13511              
13512             # if $rqw_lines is defined but empty, just return because there are no
13513             # multiline qw's
13514             else {
13515 552 100       1129 if ( !@{$rqw_lines} ) { return }
  552         2039  
  503         1342  
13516             }
13517              
13518 52         179 my $rstarting_multiline_qw_seqno_by_K = {};
13519 52         141 my $rending_multiline_qw_seqno_by_K = {};
13520 52         125 my $rKrange_multiline_qw_by_seqno = {};
13521 52         125 my $rmultiline_qw_has_extra_level = {};
13522              
13523 52         1938 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
13524              
13525 52         126 my $rLL = $self->[_rLL_];
13526 52         129 my $qw_seqno;
13527 52         148 my $num_qw_seqno = 0;
13528 52         114 my $K_start_multiline_qw;
13529              
13530             # For reference, here is the old loop, before $rqw_lines became available:
13531             ## foreach my $line_of_tokens ( @{$rlines} ) {
13532 52         135 foreach my $iline ( @{$rqw_lines} ) {
  52         176  
13533 246         437 my $line_of_tokens = $rlines->[$iline];
13534              
13535             # Note that these first checks are required in case we have to scan
13536             # all lines, not just lines with type 'q' at the ends.
13537 246         488 my $line_type = $line_of_tokens->{_line_type};
13538 246 50       560 next unless ( $line_type eq 'CODE' );
13539 246         456 my $rK_range = $line_of_tokens->{_rK_range};
13540 246         373 my ( $Kfirst, $Klast ) = @{$rK_range};
  246         492  
13541 246 100 66     922 next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
13542              
13543             # Continuing a sequence of qw lines ...
13544 243 100       558 if ( defined($K_start_multiline_qw) ) {
13545 137         277 my $type = $rLL->[$Kfirst]->[_TYPE_];
13546              
13547             # shouldn't happen
13548 137 50       363 if ( $type ne 'q' ) {
13549 0         0 DEVEL_MODE && print STDERR <<EOM;
13550             STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
13551             EOM
13552 0         0 $K_start_multiline_qw = undef;
13553 0         0 next;
13554             }
13555 137         498 my $Kprev = $self->K_previous_nonblank($Kfirst);
13556 137         326 my $Knext = $self->K_next_nonblank($Kfirst);
13557 137 50       426 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
13558 137 50       344 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
13559 137 100 66     581 if ( $type_m eq 'q' && $type_p ne 'q' ) {
13560 32         198 $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
13561 32         192 $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
13562             [ $K_start_multiline_qw, $Kfirst ];
13563 32         85 $K_start_multiline_qw = undef;
13564 32         85 $qw_seqno = undef;
13565             }
13566             }
13567              
13568             # Starting a new a sequence of qw lines ?
13569 243 100 100     1017 if ( !defined($K_start_multiline_qw)
13570             && $rLL->[$Klast]->[_TYPE_] eq 'q' )
13571             {
13572 41         167 my $Kprev = $self->K_previous_nonblank($Klast);
13573 41         202 my $Knext = $self->K_next_nonblank($Klast);
13574 41 50       172 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
13575 41 50       145 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
13576 41 100 100     254 if ( $type_m ne 'q' && $type_p eq 'q' ) {
13577 32         79 $num_qw_seqno++;
13578 32         93 $qw_seqno = 'q' . $num_qw_seqno;
13579 32         79 $K_start_multiline_qw = $Klast;
13580 32         157 $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
13581             }
13582             }
13583             }
13584              
13585             # Give multiline qw lists extra indentation instead of CI. This option
13586             # works well but is currently only activated when the -xci flag is set.
13587             # The reason is to avoid unexpected changes in formatting.
13588 52 100       295 if ($rOpts_extended_continuation_indentation) {
13589 1         3 while ( my ( $qw_seqno_x, $rKrange ) =
13590 2         11 each %{$rKrange_multiline_qw_by_seqno} )
13591             {
13592 1         3 my ( $Kbeg, $Kend ) = @{$rKrange};
  1         4  
13593              
13594             # require isolated closing token
13595 1         4 my $token_end = $rLL->[$Kend]->[_TOKEN_];
13596             next
13597             unless ( length($token_end) == 1
13598 1 50 33     13 && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
      33        
13599              
13600             # require isolated opening token
13601 1         4 my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
13602              
13603             # allow space(s) after the qw
13604 1 50 33     8 if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
13605             {
13606 0         0 $token_beg =~ s/\s+//;
13607             }
13608              
13609 1 50       6 next unless ( length($token_beg) == 3 );
13610              
13611 1         6 foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
13612 5         10 $rLL->[$KK]->[_LEVEL_]++;
13613 5         8 $rLL->[$KK]->[_CI_LEVEL_] = 0;
13614             }
13615              
13616             # set flag for -wn option, which will remove the level
13617 1         9 $rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1;
13618             }
13619             }
13620              
13621             # For the -lp option we need to mark all parent containers of
13622             # multiline quotes
13623 52 100 66     313 if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
13624              
13625 1         3 while ( my ( $qw_seqno_x, $rKrange ) =
13626 1         7 each %{$rKrange_multiline_qw_by_seqno} )
13627             {
13628 0         0 my ( $Kbeg, $Kend ) = @{$rKrange};
  0         0  
13629 0         0 my $parent_seqno = $self->parent_seqno_by_K($Kend);
13630 0 0       0 next unless ($parent_seqno);
13631              
13632             # If the parent container exactly surrounds this qw, then -lp
13633             # formatting seems to work so we will not mark it.
13634 0         0 my $is_tightly_contained;
13635 0         0 my $Kn = $self->K_next_nonblank($Kend);
13636 0 0       0 my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
13637 0 0 0     0 if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
13638              
13639 0         0 my $Kp = $self->K_previous_nonblank($Kbeg);
13640 0 0       0 my $seqno_p =
13641             defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
13642 0 0 0     0 if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
13643 0         0 $is_tightly_contained = 1;
13644             }
13645             }
13646              
13647 0 0       0 $ris_excluded_lp_container->{$parent_seqno} = 1
13648             unless ($is_tightly_contained);
13649              
13650             # continue up the tree marking parent containers
13651 0         0 while (1) {
13652 0         0 $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
13653             last
13654 0 0 0     0 unless ( defined($parent_seqno)
13655             && $parent_seqno ne SEQ_ROOT );
13656 0         0 $ris_excluded_lp_container->{$parent_seqno} = 1;
13657             }
13658             }
13659             }
13660              
13661 52         151 $self->[_rstarting_multiline_qw_seqno_by_K_] =
13662             $rstarting_multiline_qw_seqno_by_K;
13663 52         150 $self->[_rending_multiline_qw_seqno_by_K_] =
13664             $rending_multiline_qw_seqno_by_K;
13665 52         140 $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
13666 52         129 $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
13667              
13668 52         184 return;
13669             } ## end sub find_multiline_qw
13670              
13671 38     38   432 use constant DEBUG_COLLAPSED_LENGTHS => 0;
  38         142  
  38         2869  
13672              
13673             # Minimum space reserved for contents of a code block. A value of 40 has given
13674             # reasonable results. With a large line length, say -l=120, this will not
13675             # normally be noticeable but it will prevent making a mess in some edge cases.
13676 38     38   360 use constant MIN_BLOCK_LEN => 40;
  38         103  
  38         5341  
13677              
13678             my %is_handle_type;
13679              
13680 0         0 BEGIN {
13681 38     38   260 my @q = qw( w C U G i k => );
13682 38         263 @is_handle_type{@q} = (1) x scalar(@q);
13683              
13684 38         263673 my $i = 0;
13685             use constant {
13686 38         4419 _max_prong_len_ => $i++,
13687             _handle_len_ => $i++,
13688             _seqno_o_ => $i++,
13689             _iline_o_ => $i++,
13690             _K_o_ => $i++,
13691             _K_c_ => $i++,
13692             _interrupted_list_rule_ => $i++,
13693 38     38   386 };
  38         170  
13694             } ## end BEGIN
13695              
13696             sub is_fragile_block_type {
13697 0     0 0 0 my ( $self, $block_type, $seqno ) = @_;
13698              
13699             # Given:
13700             # $block_type = the block type of a token, and
13701             # $seqno = its sequence number
13702              
13703             # Return:
13704             # true if this block type stays broken after being broken,
13705             # false otherwise
13706              
13707             # This sub has been added to isolate a tricky decision needed
13708             # to fix issue b1428.
13709              
13710             # The coding here needs to agree with:
13711             # - sub process_line where variable '$rbrace_follower' is set
13712             # - sub process_line_inner_loop where variable '$is_opening_BLOCK' is set,
13713              
13714 0 0 0     0 if ( $is_sort_map_grep_eval{$block_type}
      0        
13715             || $block_type eq 't'
13716             || $self->[_rshort_nested_]->{$seqno} )
13717             {
13718 0         0 return 0;
13719             }
13720              
13721 0         0 return 1;
13722              
13723             } ## end sub is_fragile_block_type
13724              
13725             { ## closure xlp_collapsed_lengths
13726              
13727             my $max_prong_len;
13728             my $len;
13729             my $last_nonblank_type;
13730             my @stack;
13731              
13732             sub xlp_collapsed_lengths_initialize {
13733              
13734 4     4 0 12 $max_prong_len = 0;
13735 4         8 $len = 0;
13736 4         8 $last_nonblank_type = 'b';
13737 4         13 @stack = ();
13738              
13739 4         20 push @stack, [
13740             0, # $max_prong_len,
13741             0, # $handle_len,
13742             SEQ_ROOT, # $seqno,
13743             undef, # $iline,
13744             undef, # $KK,
13745             undef, # $K_c,
13746             undef, # $interrupted_list_rule
13747             ];
13748              
13749 4         6 return;
13750             } ## end sub xlp_collapsed_lengths_initialize
13751              
13752             sub cumulative_length_to_comma {
13753 24     24 0 43 my ( $self, $KK, $K_comma, $K_closing ) = @_;
13754              
13755             # Given:
13756             # $KK = index of starting token, or blank before start
13757             # $K_comma = index of line-ending comma
13758             # $K_closing = index of the container closing token
13759              
13760             # Return:
13761             # $length = cumulative length of the term
13762              
13763 24         38 my $rLL = $self->[_rLL_];
13764 24 50       71 if ( $rLL->[$KK]->[_TYPE_] eq 'b' ) { $KK++ }
  0         0  
13765 24         31 my $length = 0;
13766 24 100 33     197 if (
      66        
      66        
      66        
13767             $KK < $K_comma
13768             && $rLL->[$K_comma]->[_TYPE_] eq ',' # should be true
13769              
13770             # Ignore if terminal comma, causes instability (b1297,
13771             # b1330)
13772             && (
13773             $K_closing - $K_comma > 2
13774             || ( $K_closing - $K_comma == 2
13775             && $rLL->[ $K_comma + 1 ]->[_TYPE_] ne 'b' )
13776             )
13777              
13778             # The comma should be in this container
13779             && ( $rLL->[$K_comma]->[_LEVEL_] - 1 ==
13780             $rLL->[$K_closing]->[_LEVEL_] )
13781             )
13782             {
13783              
13784             # An additional check: if line ends in ), and the ) has vtc then
13785             # skip this estimate. Otherwise, vtc can give oscillating results.
13786             # Fixes b1448. For example, this could be unstable:
13787              
13788             # ( $os ne 'win' ? ( -selectcolor => "red" ) : () ),
13789             # | |^--K_comma
13790             # | ^-- K_prev
13791             # ^--- KK
13792              
13793             # An alternative, possibly better strategy would be to try to turn
13794             # off -vtc locally, but it turns out to be difficult to locate the
13795             # appropriate closing token when it is not on the same line as its
13796             # opening token.
13797              
13798 18         52 my $K_prev = $self->K_previous_nonblank($K_comma);
13799 18 50 33     99 if ( defined($K_prev)
      33        
13800             && $K_prev >= $KK
13801             && $rLL->[$K_prev]->[_TYPE_SEQUENCE_] )
13802             {
13803 0         0 my $token = $rLL->[$K_prev]->[_TOKEN_];
13804 0         0 my $type = $rLL->[$K_prev]->[_TYPE_];
13805 0 0 0     0 if ( $closing_vertical_tightness{$token} && $type ne 'R' ) {
13806             ## type 'R' does not normally get broken, so ignore
13807             ## skip length calculation
13808 0         0 return 0;
13809             }
13810             }
13811 18 50       50 my $starting_len =
13812             $KK >= 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
13813 18         43 $length = $rLL->[$K_comma]->[_CUMULATIVE_LENGTH_] - $starting_len;
13814             }
13815 24         37 return $length;
13816             } ## end sub cumulative_length_to_comma
13817              
13818             sub xlp_collapsed_lengths {
13819              
13820 4     4 0 9 my $self = shift;
13821              
13822             #----------------------------------------------------------------
13823             # Define the collapsed lengths of containers for -xlp indentation
13824             #----------------------------------------------------------------
13825              
13826             # We need an estimate of the minimum required line length starting at
13827             # any opening container for the -xlp style. This is needed to avoid
13828             # using too much indentation space for lower level containers and
13829             # thereby running out of space for outer container tokens due to the
13830             # maximum line length limit.
13831              
13832             # The basic idea is that at each node in the tree we imagine that we
13833             # have a fork with a handle and collapsible prongs:
13834             #
13835             # |------------
13836             # |--------
13837             # ------------|-------
13838             # handle |------------
13839             # |--------
13840             # prongs
13841             #
13842             # Each prong has a minimum collapsed length. The collapsed length at a
13843             # node is the maximum of these minimum lengths, plus the handle length.
13844             # Each of the prongs may itself be a tree node.
13845              
13846             # This is just a rough calculation to get an approximate starting point
13847             # for indentation. Later routines will be more precise. It is
13848             # important that these estimates be independent of the line breaks of
13849             # the input stream in order to avoid instabilities.
13850              
13851 4         9 my $rLL = $self->[_rLL_];
13852 4         9 my $rlines = $self->[_rlines_];
13853 4         8 my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
13854 4         9 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
13855              
13856 4         7 my $K_start_multiline_qw;
13857 4         8 my $level_start_multiline_qw = 0;
13858              
13859 4         22 xlp_collapsed_lengths_initialize();
13860              
13861             #--------------------------------
13862             # Loop over all lines in the file
13863             #--------------------------------
13864 4         16 my $iline = -1;
13865 4         10 my $skip_next_line;
13866 4         14 foreach my $line_of_tokens ( @{$rlines} ) {
  4         13  
13867 172         234 $iline++;
13868 172 50       273 if ($skip_next_line) {
13869 0         0 $skip_next_line = 0;
13870 0         0 next;
13871             }
13872 172         312 my $line_type = $line_of_tokens->{_line_type};
13873 172 100       292 next if ( $line_type ne 'CODE' );
13874 170         238 my $CODE_type = $line_of_tokens->{_code_type};
13875              
13876             # Always skip blank lines
13877 170 100       336 next if ( $CODE_type eq 'BL' );
13878              
13879             # Note on other line types:
13880             # 'FS' (Format Skipping) lines may contain opening/closing tokens so
13881             # we have to process them to keep the stack correctly sequenced
13882             # 'VB' (Verbatim) lines could be skipped, but testing shows that
13883             # results look better if we include their lengths.
13884              
13885             # Also note that we could exclude -xlp formatting of containers with
13886             # 'FS' and 'VB' lines, but in testing that was not really beneficial
13887              
13888             # So we process tokens in 'FS' and 'VB' lines like all the rest...
13889              
13890 133         182 my $rK_range = $line_of_tokens->{_rK_range};
13891 133         182 my ( $K_first, $K_last ) = @{$rK_range};
  133         230  
13892 133 50 33     414 next unless ( defined($K_first) && defined($K_last) );
13893              
13894 133         244 my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
13895              
13896             # Always ignore block comments
13897 133 100 100     279 next if ( $has_comment && $K_first == $K_last );
13898              
13899             # Handle an intermediate line of a multiline qw quote. These may
13900             # require including some -ci or -i spaces. See cases c098/x063.
13901             # Updated to check all lines (not just $K_first==$K_last) to fix
13902             # b1316
13903 126         161 my $K_begin_loop = $K_first;
13904 126 50       260 if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
13905              
13906 0         0 my $KK = $K_first;
13907 0         0 my $level = $rLL->[$KK]->[_LEVEL_];
13908 0         0 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
13909              
13910             # remember the level of the start
13911 0 0       0 if ( !defined($K_start_multiline_qw) ) {
13912 0         0 $K_start_multiline_qw = $K_first;
13913 0         0 $level_start_multiline_qw = $level;
13914             my $seqno_qw =
13915             $self->[_rstarting_multiline_qw_seqno_by_K_]
13916 0         0 ->{$K_start_multiline_qw};
13917 0 0       0 if ( !$seqno_qw ) {
13918 0         0 my $Kp = $self->K_previous_nonblank($K_first);
13919 0 0 0     0 if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
13920              
13921 0         0 $K_start_multiline_qw = $Kp;
13922 0         0 $level_start_multiline_qw =
13923             $rLL->[$K_start_multiline_qw]->[_LEVEL_];
13924             }
13925             else {
13926              
13927             # Fix for b1319, b1320
13928 0         0 $K_start_multiline_qw = undef;
13929             }
13930             }
13931             }
13932              
13933 0 0       0 if ( defined($K_start_multiline_qw) ) {
13934 0         0 $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
13935             $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
13936              
13937             # We may have to add the spaces of one level or ci level
13938             # ... it depends depends on the -xci flag, the -wn flag,
13939             # and if the qw uses a container token as the quote
13940             # delimiter.
13941              
13942             # First rule: add ci if there is a $ci_level
13943 0 0       0 if ($ci_level) {
    0          
13944 0         0 $len += $rOpts_continuation_indentation;
13945             }
13946              
13947             # Second rule: otherwise, look for an extra indentation
13948             # level from the start and add one indentation level if
13949             # found.
13950             elsif ( $level > $level_start_multiline_qw ) {
13951 0         0 $len += $rOpts_indent_columns;
13952             }
13953              
13954 0 0       0 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  0         0  
13955              
13956 0         0 $last_nonblank_type = 'q';
13957              
13958 0         0 $K_begin_loop = $K_first + 1;
13959              
13960             # We can skip to the next line if more tokens
13961 0 0       0 next if ( $K_begin_loop > $K_last );
13962             }
13963             }
13964              
13965 126         169 $K_start_multiline_qw = undef;
13966              
13967             # Find the terminal token, before any side comment
13968 126         167 my $K_terminal = $K_last;
13969 126 100       204 if ($has_comment) {
13970 5         12 $K_terminal -= 1;
13971 5 50 33     25 $K_terminal -= 1
13972             if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
13973             && $K_terminal > $K_first );
13974             }
13975              
13976             # Use length to terminal comma if interrupted list rule applies
13977 126 100 66     370 if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
13978 50         95 my $K_c = $stack[-1]->[_K_c_];
13979 50 50       102 if ( defined($K_c) ) {
13980              
13981             #----------------------------------------------------------
13982             # BEGIN patch for issue b1408: If this line ends in an
13983             # opening token, look for the closing token and comma at
13984             # the end of the next line. If so, combine the two lines to
13985             # get the correct sums. This problem seems to require -xlp
13986             # -vtc=2 and blank lines to occur. Use %is_opening_type to
13987             # fix b1431.
13988             #----------------------------------------------------------
13989 50 100 66     117 if ( $is_opening_type{ $rLL->[$K_terminal]->[_TYPE_] }
13990             && !$has_comment )
13991             {
13992 2         4 my $seqno_end = $rLL->[$K_terminal]->[_TYPE_SEQUENCE_];
13993 2         8 my $Kc_test = $rLL->[$K_terminal]->[_KNEXT_SEQ_ITEM_];
13994              
13995             # We are looking for a short broken remnant on the next
13996             # line; something like the third line here (b1408):
13997              
13998             # parent =>
13999             # Moose::Util::TypeConstraints::find_type_constraint(
14000             # 'RefXX' ),
14001             # or this
14002             #
14003             # Help::WorkSubmitter->_filter_chores_and_maybe_warn_user(
14004             # $story_set_all_chores),
14005             # or this (b1431):
14006             # $issue->{
14007             # 'borrowernumber'}, # borrowernumber
14008 2 50 66     35 if ( defined($Kc_test)
      66        
14009             && $seqno_end == $rLL->[$Kc_test]->[_TYPE_SEQUENCE_]
14010             && $rLL->[$Kc_test]->[_LINE_INDEX_] == $iline + 1 )
14011             {
14012 0         0 my $line_of_tokens_next = $rlines->[ $iline + 1 ];
14013             my $rtype_count =
14014 0         0 $rtype_count_by_seqno->{$seqno_end};
14015             my ( $K_first_next, $K_terminal_next ) =
14016 0         0 @{ $line_of_tokens_next->{_rK_range} };
  0         0  
14017              
14018             # backup at a side comment
14019 0 0 0     0 if ( defined($K_terminal_next)
14020             && $rLL->[$K_terminal_next]->[_TYPE_] eq '#' )
14021             {
14022 0         0 my $Kprev =
14023             $self->K_previous_nonblank($K_terminal_next);
14024 0 0 0     0 if ( defined($Kprev)
14025             && $Kprev >= $K_first_next )
14026             {
14027 0         0 $K_terminal_next = $Kprev;
14028             }
14029             }
14030              
14031 0 0 0     0 if (
      0        
      0        
      0        
      0        
      0        
14032             defined($K_terminal_next)
14033              
14034             # next line ends with a comma
14035             && $rLL->[$K_terminal_next]->[_TYPE_] eq ','
14036              
14037             # which follows the closing container token
14038             && (
14039             $K_terminal_next - $Kc_test == 1
14040             || ( $K_terminal_next - $Kc_test == 2
14041             && $rLL->[ $K_terminal_next - 1 ]
14042             ->[_TYPE_] eq 'b' )
14043             )
14044              
14045             # no commas in the container
14046             && ( !defined($rtype_count)
14047             || !$rtype_count->{','} )
14048              
14049             # for now, restrict this to a container with
14050             # just 1 or two tokens
14051             && $K_terminal_next - $K_terminal <= 5
14052              
14053             )
14054             {
14055              
14056             # combine the next line with the current line
14057 0         0 $K_terminal = $K_terminal_next;
14058 0         0 $skip_next_line = 1;
14059 0         0 if (DEBUG_COLLAPSED_LENGTHS) {
14060             print "Combining lines at line $iline\n";
14061             }
14062             }
14063             }
14064             }
14065              
14066             #--------------------------
14067             # END patch for issue b1408
14068             #--------------------------
14069 50 100       90 if ( $rLL->[$K_terminal]->[_TYPE_] eq ',' ) {
14070              
14071 24         57 my $length =
14072             $self->cumulative_length_to_comma( $K_first,
14073             $K_terminal, $K_c );
14074              
14075             # Fix for b1331: at a broken => item, include the
14076             # length of the previous half of the item plus one for
14077             # the missing space
14078 24 50       54 if ( $last_nonblank_type eq '=>' ) {
14079 0         0 $length += $len + 1;
14080             }
14081 24 100       55 if ( $length > $max_prong_len ) {
14082 17         28 $max_prong_len = $length;
14083             }
14084             }
14085             }
14086             }
14087              
14088             #----------------------------------
14089             # Loop over all tokens on this line
14090             #----------------------------------
14091 126         302 $self->xlp_collapse_lengths_inner_loop( $iline, $K_begin_loop,
14092             $K_terminal, $K_last );
14093              
14094             # Now take care of any side comment;
14095 126 100       295 if ($has_comment) {
14096 5 50       32 if ($rOpts_ignore_side_comment_lengths) {
14097 0         0 $len = 0;
14098             }
14099             else {
14100              
14101             # For a side comment when -iscl is not set, measure length from
14102             # the start of the previous nonblank token
14103 5 50       24 my $len0 =
14104             $K_terminal > 0
14105             ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
14106             : 0;
14107 5         7 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
14108 5 100       14 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  2         4  
14109             }
14110             }
14111              
14112             } ## end loop over lines
14113              
14114 4         12 if (DEBUG_COLLAPSED_LENGTHS) {
14115             print "\nCollapsed lengths--\n";
14116             foreach
14117             my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
14118             {
14119             my $clen = $rcollapsed_length_by_seqno->{$key};
14120             print "$key -> $clen\n";
14121             }
14122             }
14123              
14124 4         8 return;
14125             } ## end sub xlp_collapsed_lengths
14126              
14127             sub xlp_collapse_lengths_inner_loop {
14128              
14129 126     126 0 219 my ( $self, $iline, $K_begin_loop, $K_terminal, $K_last ) = @_;
14130              
14131 126         184 my $rLL = $self->[_rLL_];
14132 126         186 my $K_closing_container = $self->[_K_closing_container_];
14133              
14134 126         199 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
14135 126         172 my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
14136 126         160 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
14137 126         175 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
14138 126         161 my $rhas_broken_list = $self->[_rhas_broken_list_];
14139 126         170 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
14140              
14141             #----------------------------------
14142             # Loop over tokens on this line ...
14143             #----------------------------------
14144 126         231 foreach my $KK ( $K_begin_loop .. $K_terminal ) {
14145              
14146 665         1005 my $type = $rLL->[$KK]->[_TYPE_];
14147 665 100       1133 next if ( $type eq 'b' );
14148              
14149             #------------------------
14150             # Handle sequenced tokens
14151             #------------------------
14152 471         655 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
14153 471 100       732 if ($seqno) {
14154              
14155 120         196 my $token = $rLL->[$KK]->[_TOKEN_];
14156              
14157             #----------------------------
14158             # Entering a new container...
14159             #----------------------------
14160 120 100 66     444 if ( $is_opening_token{$token}
    50 33        
14161             && defined( $K_closing_container->{$seqno} ) )
14162             {
14163              
14164             # save current prong length
14165 60         92 $stack[-1]->[_max_prong_len_] = $max_prong_len;
14166 60         90 $max_prong_len = 0;
14167              
14168             # Start new prong one level deeper
14169 60         82 my $handle_len = 0;
14170 60 100       162 if ( $rblock_type_of_seqno->{$seqno} ) {
    100          
14171              
14172             # code blocks do not use -lp indentation, but behave as
14173             # if they had a handle of one indentation length
14174 10         15 $handle_len = $rOpts_indent_columns;
14175              
14176             }
14177             elsif ( $is_handle_type{$last_nonblank_type} ) {
14178 40         60 $handle_len = $len;
14179 40 100 66     204 $handle_len += 1
14180             if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
14181             }
14182              
14183             # Set a flag if the 'Interrupted List Rule' will be applied
14184             # (see sub copy_old_breakpoints).
14185             # - Added check on has_broken_list to fix issue b1298
14186              
14187             my $interrupted_list_rule =
14188             $ris_permanently_broken->{$seqno}
14189             && $ris_list_by_seqno->{$seqno}
14190 60   66     223 && !$rhas_broken_list->{$seqno}
14191             && !$rOpts_ignore_old_breakpoints;
14192              
14193             # NOTES: Since we are looking at old line numbers we have
14194             # to be very careful not to introduce an instability.
14195              
14196             # This following causes instability (b1288-b1296):
14197             # $interrupted_list_rule ||=
14198             # $rOpts_break_at_old_comma_breakpoints;
14199              
14200             # - We could turn off the interrupted list rule if there is
14201             # a broken sublist, to follow 'Compound List Rule 1'.
14202             # - We could use the _rhas_broken_list_ flag for this.
14203             # - But it seems safer not to do this, to avoid
14204             # instability, since the broken sublist could be
14205             # temporary. It seems better to let the formatting
14206             # stabilize by itself after one or two iterations.
14207             # - So, not doing this for now
14208              
14209             # Turn off the interrupted list rule if -vmll is set and a
14210             # list has '=>' characters. This avoids instabilities due
14211             # to dependence on old line breaks; issue b1325.
14212 60 50 66     174 if ( $interrupted_list_rule
14213             && $rOpts_variable_maximum_line_length )
14214             {
14215 0         0 my $rtype_count = $rtype_count_by_seqno->{$seqno};
14216 0 0 0     0 if ( $rtype_count && $rtype_count->{'=>'} ) {
14217 0         0 $interrupted_list_rule = 0;
14218             }
14219             }
14220              
14221 60         90 my $K_c = $K_closing_container->{$seqno};
14222              
14223             # Add length of any terminal list item if interrupted
14224             # so that the result is the same as if the term is
14225             # in the next line (b1446).
14226              
14227 60 50 66     134 if (
      33        
14228             $interrupted_list_rule
14229             && $KK < $K_terminal
14230              
14231             # The line should end in a comma
14232             # NOTE: this currently assumes break after comma.
14233             # As long as the other call to cumulative_length..
14234             # makes the same assumption we should remain stable.
14235             && $rLL->[$K_terminal]->[_TYPE_] eq ','
14236              
14237             )
14238             {
14239 0         0 $max_prong_len =
14240             $self->cumulative_length_to_comma( $KK + 1,
14241             $K_terminal, $K_c );
14242             }
14243              
14244 60         208 push @stack, [
14245              
14246             $max_prong_len,
14247             $handle_len,
14248             $seqno,
14249             $iline,
14250             $KK,
14251             $K_c,
14252             $interrupted_list_rule
14253             ];
14254              
14255             }
14256              
14257             #--------------------
14258             # Exiting a container
14259             #--------------------
14260             elsif ( $is_closing_token{$token} && @stack ) {
14261              
14262             # The current prong ends - get its handle
14263 60         113 my $item = pop @stack;
14264 60         94 my $handle_len = $item->[_handle_len_];
14265 60         90 my $seqno_o = $item->[_seqno_o_];
14266 60         76 my $iline_o = $item->[_iline_o_];
14267 60         83 my $K_o = $item->[_K_o_];
14268 60         83 my $K_c_expect = $item->[_K_c_];
14269 60         79 my $collapsed_len = $max_prong_len;
14270              
14271 60 50       118 if ( $seqno_o ne $seqno ) {
14272              
14273             # This can happen if input file has brace errors.
14274             # Otherwise it shouldn't happen. Not fatal but -lp
14275             # formatting could get messed up.
14276 0         0 if ( DEVEL_MODE && !get_saw_brace_error() ) {
14277             Fault(<<EOM);
14278             sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
14279             EOM
14280             }
14281             }
14282              
14283             #------------------------------------------
14284             # Rules to avoid scrunching code blocks ...
14285             #------------------------------------------
14286             # Some test cases:
14287             # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
14288 60         95 my $block_type = $rblock_type_of_seqno->{$seqno};
14289 60 100       117 if ($block_type) {
14290              
14291 10         20 my $K_c = $KK;
14292 10         17 my $block_length = MIN_BLOCK_LEN;
14293 10         19 my $is_one_line_block;
14294 10         19 my $level = $rLL->[$K_o]->[_LEVEL_];
14295 10 50 33     44 if ( defined($K_o) && defined($K_c) ) {
14296              
14297             # note: fixed 3 May 2022 (removed 'my')
14298 10         19 $block_length =
14299             $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
14300             $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
14301 10         18 $is_one_line_block = $iline == $iline_o;
14302             }
14303              
14304             # Code block rule 1: Use the total block length if
14305             # it is less than the minimum.
14306 10 100 33     45 if ( $block_length < MIN_BLOCK_LEN ) {
    50 33        
    50          
14307 6         13 $collapsed_len = $block_length;
14308             }
14309              
14310             # Code block rule 2: Use the full length of a
14311             # one-line block to avoid breaking it, unless
14312             # extremely long. We do not need to do a precise
14313             # check here, because if it breaks then it will
14314             # stay broken on later iterations.
14315             elsif (
14316             $is_one_line_block
14317             && $block_length <
14318             $maximum_line_length_at_level[$level]
14319              
14320             # But skip this for blocks types which can reform,
14321             # like sort/map/grep/eval blocks, to avoid
14322             # instability (b1345, b1428)
14323             && $self->is_fragile_block_type( $block_type,
14324             $seqno )
14325             )
14326             {
14327 0         0 $collapsed_len = $block_length;
14328             }
14329              
14330             # Code block rule 3: Otherwise the length should be
14331             # at least MIN_BLOCK_LEN to avoid scrunching code
14332             # blocks.
14333             elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
14334 0         0 $collapsed_len = MIN_BLOCK_LEN;
14335             }
14336             }
14337              
14338             # Store the result. Some extra space, '2', allows for
14339             # length of an opening token, inside space, comma, ...
14340             # This constant has been tuned to give good overall
14341             # results.
14342 60         79 $collapsed_len += 2;
14343 60         118 $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
14344              
14345             # Restart scanning the lower level prong
14346 60 50       105 if (@stack) {
14347 60         86 $max_prong_len = $stack[-1]->[_max_prong_len_];
14348 60         102 $collapsed_len += $handle_len;
14349 60 100       130 if ( $collapsed_len > $max_prong_len ) {
14350 33         65 $max_prong_len = $collapsed_len;
14351             }
14352             }
14353             }
14354              
14355             # it is a ternary - no special processing for these yet
14356             else {
14357              
14358             }
14359              
14360 120         192 $len = 0;
14361 120         161 $last_nonblank_type = $type;
14362 120         228 next;
14363             }
14364              
14365             #----------------------------
14366             # Handle non-container tokens
14367             #----------------------------
14368 351         461 my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
14369              
14370             # Count lengths of things like 'xx => yy' as a single item
14371 351 100       751 if ( $type eq '=>' ) {
    100          
    100          
14372 11         26 $len += $token_length + 1;
14373 11 100       31 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  3         16  
14374             }
14375             elsif ( $last_nonblank_type eq '=>' ) {
14376 9         20 $len += $token_length;
14377 9 100       24 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  1         9  
14378              
14379             # but only include one => per item
14380 9         12 $len = $token_length;
14381             }
14382              
14383             # include everything to end of line after a here target
14384             elsif ( $type eq 'h' ) {
14385 1         4 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
14386             $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
14387 1 50       4 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  1         11  
14388             }
14389              
14390             # for everything else just use the token length
14391             else {
14392 330         398 $len = $token_length;
14393 330 100       557 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  58         77  
14394             }
14395 351         569 $last_nonblank_type = $type;
14396              
14397             } ## end loop over tokens on this line
14398              
14399 126         216 return;
14400              
14401             } ## end sub xlp_collapse_lengths_inner_loop
14402              
14403             } ## end closure xlp_collapsed_lengths
14404              
14405             sub is_excluded_lp {
14406              
14407             # Decide if this container is excluded by user request:
14408             # returns true if this token is excluded (i.e., may not use -lp)
14409             # returns false otherwise
14410              
14411             # The control hash can either describe:
14412             # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or
14413             # what to include: $line_up_parentheses_control_is_lxpl = 0
14414              
14415             # Input parameter:
14416             # $KK = index of the container opening token
14417              
14418 320     320 0 531 my ( $self, $KK ) = @_;
14419 320         450 my $rLL = $self->[_rLL_];
14420 320         436 my $rtoken_vars = $rLL->[$KK];
14421 320         485 my $token = $rtoken_vars->[_TOKEN_];
14422 320         465 my $rflags = $line_up_parentheses_control_hash{$token};
14423              
14424             #-----------------------------------------------
14425             # TEST #1: check match to listed container types
14426             #-----------------------------------------------
14427 320 100       583 if ( !defined($rflags) ) {
14428              
14429             # There is no entry for this container, so we are done
14430 241         683 return !$line_up_parentheses_control_is_lxpl;
14431             }
14432              
14433 79         110 my ( $flag1, $flag2 ) = @{$rflags};
  79         158  
14434              
14435             #-----------------------------------------------------------
14436             # TEST #2: check match to flag1, the preceding nonblank word
14437             #-----------------------------------------------------------
14438 79   66     214 my $match_flag1 = !defined($flag1) || $flag1 eq '*';
14439 79 100       149 if ( !$match_flag1 ) {
14440              
14441             # Find the previous token
14442 39         59 my ( $is_f, $is_k, $is_w );
14443 39         91 my $Kp = $self->K_previous_nonblank($KK);
14444 39 50       80 if ( defined($Kp) ) {
14445 39         61 my $type_p = $rLL->[$Kp]->[_TYPE_];
14446 39         60 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
14447              
14448             # keyword?
14449 39         54 $is_k = $type_p eq 'k';
14450              
14451             # function call?
14452 39         67 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
14453              
14454             # either keyword or function call?
14455 39   100     106 $is_w = $is_k || $is_f;
14456             }
14457              
14458             # Check for match based on flag1 and the previous token:
14459 39 50       127 if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
  0 50       0  
    100          
    100          
    50          
    50          
14460 0         0 elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
14461 13         20 elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
14462 13         20 elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
14463 0         0 elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
14464 13         20 elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
14465             ## else { no match found }
14466             }
14467              
14468             # See if we can exclude this based on the flag1 test...
14469 79 100       120 if ($line_up_parentheses_control_is_lxpl) {
14470 66 100       153 return 1 if ($match_flag1);
14471             }
14472             else {
14473 13 100       30 return 1 if ( !$match_flag1 );
14474             }
14475              
14476             #-------------------------------------------------------------
14477             # TEST #3: exclusion based on flag2 and the container contents
14478             #-------------------------------------------------------------
14479              
14480             # Note that this is an exclusion test for both -lpxl or -lpil input methods
14481             # The options are:
14482             # 0 or blank: ignore container contents
14483             # 1 exclude non-lists or lists with sublists
14484             # 2 same as 1 but also exclude lists with code blocks
14485              
14486 30         37 my $match_flag2;
14487 30 50       56 if ($flag2) {
14488              
14489 30         44 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
14490              
14491 30         52 my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
14492 30         46 my $has_list = $self->[_rhas_list_]->{$seqno};
14493 30         48 my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
14494 30         45 my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
14495              
14496 30 100 100     187 if ( !$is_list
      100        
      100        
      100        
14497             || $has_list
14498             || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
14499             {
14500 13         36 $match_flag2 = 1;
14501             }
14502             }
14503 30         88 return $match_flag2;
14504             } ## end sub is_excluded_lp
14505              
14506             sub set_excluded_lp_containers {
14507              
14508 555     555 0 1575 my ($self) = @_;
14509 555 100       1816 return unless ($rOpts_line_up_parentheses);
14510 31         89 my $rLL = $self->[_rLL_];
14511 31 50 33     157 return unless ( defined($rLL) && @{$rLL} );
  31         143  
14512              
14513 31         107 my $K_opening_container = $self->[_K_opening_container_];
14514 31         93 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
14515 31         78 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
14516              
14517 31         71 foreach my $seqno ( keys %{$K_opening_container} ) {
  31         180  
14518              
14519             # code blocks are always excluded by the -lp coding so we can skip them
14520 363 100       723 next if ( $rblock_type_of_seqno->{$seqno} );
14521              
14522 320         483 my $KK = $K_opening_container->{$seqno};
14523 320 50       1429 next unless defined($KK);
14524              
14525             # see if a user exclusion rule turns off -lp for this container
14526 320 100       649 if ( $self->is_excluded_lp($KK) ) {
14527 71         180 $ris_excluded_lp_container->{$seqno} = 1;
14528             }
14529             }
14530 31         127 return;
14531             } ## end sub set_excluded_lp_containers
14532              
14533             ######################################
14534             # CODE SECTION 6: Process line-by-line
14535             ######################################
14536              
14537             sub process_all_lines {
14538              
14539             #----------------------------------------------------------
14540             # Main loop to format all lines of a file according to type
14541             #----------------------------------------------------------
14542              
14543 555     555 0 1377 my $self = shift;
14544 555         1357 my $rlines = $self->[_rlines_];
14545 555         1557 my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
14546 555         1363 my $file_writer_object = $self->[_file_writer_object_];
14547 555         1363 my $logger_object = $self->[_logger_object_];
14548 555         1281 my $vertical_aligner_object = $self->[_vertical_aligner_object_];
14549 555         1325 my $save_logfile = $self->[_save_logfile_];
14550              
14551             # Flag to prevent blank lines when POD occurs in a format skipping sect.
14552 555         1176 my $in_format_skipping_section;
14553              
14554             # set locations for blanks around long runs of keywords
14555 555         3014 my $rwant_blank_line_after = $self->keyword_group_scan();
14556              
14557 555         1366 my $line_type = EMPTY_STRING;
14558 555         1218 my $i_last_POD_END = -10;
14559 555         1228 my $i = -1;
14560 555         1106 foreach my $line_of_tokens ( @{$rlines} ) {
  555         1650  
14561              
14562             # insert blank lines requested for keyword sequences
14563 7647 100 100     21432 if ( defined( $rwant_blank_line_after->{$i} )
14564             && $rwant_blank_line_after->{$i} == 1 )
14565             {
14566 12         46 $self->want_blank_line();
14567             }
14568              
14569 7647         12235 $i++;
14570              
14571 7647         11885 my $last_line_type = $line_type;
14572 7647         19673 $line_type = $line_of_tokens->{_line_type};
14573 7647         16908 my $input_line = $line_of_tokens->{_line_text};
14574              
14575             # _line_type codes are:
14576             # SYSTEM - system-specific code before hash-bang line
14577             # CODE - line of perl code (including comments)
14578             # POD_START - line starting pod, such as '=head'
14579             # POD - pod documentation text
14580             # POD_END - last line of pod section, '=cut'
14581             # HERE - text of here-document
14582             # HERE_END - last line of here-doc (target word)
14583             # FORMAT - format section
14584             # FORMAT_END - last line of format section, '.'
14585             # SKIP - code skipping section
14586             # SKIP_END - last line of code skipping section, '#>>V'
14587             # DATA_START - __DATA__ line
14588             # DATA - unidentified text following __DATA__
14589             # END_START - __END__ line
14590             # END - unidentified text following __END__
14591             # ERROR - we are in big trouble, probably not a perl script
14592              
14593             # put a blank line after an =cut which comes before __END__ and __DATA__
14594             # (required by podchecker)
14595 7647 100 100     18001 if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
14596 13         36 $i_last_POD_END = $i;
14597 13         61 $file_writer_object->reset_consecutive_blank_lines();
14598 13 50 66     146 if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
14599 0         0 $self->want_blank_line();
14600             }
14601             }
14602              
14603             # handle line of code..
14604 7647 100       15214 if ( $line_type eq 'CODE' ) {
14605              
14606 7478         15282 my $CODE_type = $line_of_tokens->{_code_type};
14607 7478         12132 $in_format_skipping_section = $CODE_type eq 'FS';
14608              
14609             # Handle blank lines
14610 7478 100       13864 if ( $CODE_type eq 'BL' ) {
14611              
14612             # Keep this blank? Start with the flag -kbl=n, where
14613             # n=0 ignore all old blank lines
14614             # n=1 stable: keep old blanks, but limited by -mbl=n
14615             # n=2 keep all old blank lines, regardless of -mbl=n
14616             # If n=0 we delete all old blank lines and let blank line
14617             # rules generate any needed blank lines.
14618 807         1885 my $kgb_keep = $rOpts_keep_old_blank_lines;
14619              
14620             # Then delete lines requested by the keyword-group logic if
14621             # allowed
14622 807 100 100     4603 if ( $kgb_keep == 1
      100        
14623             && defined( $rwant_blank_line_after->{$i} )
14624             && $rwant_blank_line_after->{$i} == 2 )
14625             {
14626 3         7 $kgb_keep = 0;
14627             }
14628              
14629             # But always keep a blank line following an =cut
14630 807 50 66     2751 if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
14631 0         0 $kgb_keep = 1;
14632             }
14633              
14634 807 100       1907 if ($kgb_keep) {
14635 779         2799 $self->flush($CODE_type);
14636 779         3624 $file_writer_object->write_blank_code_line(
14637             $rOpts_keep_old_blank_lines == 2 );
14638 779         1699 $self->[_last_line_leading_type_] = 'b';
14639             }
14640 807         3013 next;
14641             }
14642             else {
14643              
14644             # Let logger see all non-blank lines of code. This is a slow
14645             # operation so we avoid it if it is not going to be saved.
14646 6671 100 66     15209 if ( $save_logfile && $logger_object ) {
14647 6         29 $logger_object->black_box( $line_of_tokens,
14648             $vertical_aligner_object->get_output_line_number );
14649             }
14650             }
14651              
14652             # Handle Format Skipping (FS) and Verbatim (VB) Lines
14653 6671 100 100     23518 if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
14654 98         413 $self->write_unindented_line("$input_line");
14655 98         386 $file_writer_object->reset_consecutive_blank_lines();
14656 98         221 next;
14657             }
14658              
14659             # Handle all other lines of code
14660 6573         16972 $self->process_line_of_CODE($line_of_tokens);
14661             }
14662              
14663             # handle line of non-code..
14664             else {
14665              
14666             # set special flags
14667 169         288 my $skip_line = 0;
14668 169 100 100     944 if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
    100          
    100          
14669              
14670             # Pod docs should have a preceding blank line. But stay
14671             # out of __END__ and __DATA__ sections, because
14672             # the user may be using this section for any purpose whatsoever
14673 63 100       164 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
  12         19  
14674 63 100       135 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
  6         40  
14675 63 100 100     354 if ( !$skip_line
      100        
      100        
14676             && !$in_format_skipping_section
14677             && $line_type eq 'POD_START'
14678             && !$self->[_saw_END_or_DATA_] )
14679             {
14680 8         44 $self->want_blank_line();
14681             }
14682             }
14683              
14684             # leave the blank counters in a predictable state
14685             # after __END__ or __DATA__
14686             elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
14687 7         34 $file_writer_object->reset_consecutive_blank_lines();
14688 7         35 $self->[_saw_END_or_DATA_] = 1;
14689             }
14690              
14691             # Patch to avoid losing blank lines after a code-skipping block;
14692             # fixes case c047.
14693             elsif ( $line_type eq 'SKIP_END' ) {
14694 2         15 $file_writer_object->reset_consecutive_blank_lines();
14695             }
14696              
14697             # write unindented non-code line
14698 169 100       405 if ( !$skip_line ) {
14699 157         416 $self->write_unindented_line($input_line);
14700             }
14701             }
14702             }
14703 555         2426 return;
14704              
14705             } ## end sub process_all_lines
14706              
14707             { ## closure keyword_group_scan
14708              
14709             # this is the return var
14710             my $rhash_of_desires;
14711              
14712             # user option variables for -kgb
14713             my (
14714              
14715             $rOpts_kgb_after,
14716             $rOpts_kgb_before,
14717             $rOpts_kgb_delete,
14718             $rOpts_kgb_inside,
14719             $rOpts_kgb_size_max,
14720             $rOpts_kgb_size_min,
14721              
14722             );
14723              
14724             # group variables, initialized by kgb_initialize_group_vars
14725             my ( $ibeg, $iend, $count, $level_beg, $K_closing );
14726             my ( @iblanks, @group, @subgroup );
14727              
14728             # line variables, updated by sub keyword_group_scan
14729             my ( $line_type, $CODE_type, $K_first, $K_last );
14730             my $number_of_groups_seen;
14731              
14732             #------------------------
14733             # -kgb helper subroutines
14734             #------------------------
14735              
14736             sub kgb_initialize_options {
14737              
14738             # check and initialize user options for -kgb
14739             # return error flag:
14740             # true for some input error, do not continue
14741             # false if ok
14742              
14743             # Local copies of the various control parameters
14744 543     543 0 1541 $rOpts_kgb_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
14745 543         1344 $rOpts_kgb_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
14746 543         1349 $rOpts_kgb_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
14747 543         2662 $rOpts_kgb_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
14748              
14749             # A range of sizes can be input with decimal notation like 'min.max'
14750             # with any number of dots between the two numbers. Examples:
14751             # string => min max matches
14752             # 1.1 1 1 exactly 1
14753             # 1.3 1 3 1,2, or 3
14754             # 1..3 1 3 1,2, or 3
14755             # 5 5 - 5 or more
14756             # 6. 6 - 6 or more
14757             # .2 - 2 up to 2
14758             # 1.0 1 0 nothing
14759 543         1367 my $rOpts_kgb_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
14760 543         3152 ( $rOpts_kgb_size_min, $rOpts_kgb_size_max ) = split /\.+/,
14761             $rOpts_kgb_size;
14762 543 50 33     7990 if ( $rOpts_kgb_size_min && $rOpts_kgb_size_min !~ /^\d+$/
      33        
      33        
14763             || $rOpts_kgb_size_max && $rOpts_kgb_size_max !~ /^\d+$/ )
14764             {
14765 0         0 Warn(<<EOM);
14766             Unexpected value for -kgbs: '$rOpts_kgb_size'; expecting 'min' or 'min.max';
14767             ignoring all -kgb flags
14768             EOM
14769              
14770             # Turn this option off so that this message does not keep repeating
14771             # during iterations and other files.
14772 0         0 $rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING;
14773 0         0 return $rhash_of_desires;
14774             }
14775 543 50       2031 $rOpts_kgb_size_min = 1 unless ($rOpts_kgb_size_min);
14776              
14777 543 50 33     2193 if ( $rOpts_kgb_size_max && $rOpts_kgb_size_max < $rOpts_kgb_size_min )
14778             {
14779 0         0 return $rhash_of_desires;
14780             }
14781              
14782             # check codes for $rOpts_kgb_before and
14783             # $rOpts_kgb_after:
14784             # 0 = never (delete if exist)
14785             # 1 = stable (keep unchanged)
14786             # 2 = always (insert if missing)
14787 543 50 33     8511 return $rhash_of_desires
      33        
14788             unless $rOpts_kgb_size_min > 0
14789             && ( $rOpts_kgb_before != 1
14790             || $rOpts_kgb_after != 1
14791             || $rOpts_kgb_inside
14792             || $rOpts_kgb_delete );
14793              
14794 6         24 return;
14795             } ## end sub kgb_initialize_options
14796              
14797             sub kgb_initialize_group_vars {
14798              
14799             # Definitions:
14800             # $ibeg = first line index of this entire group
14801             # $iend = last line index of this entire group
14802             # $count = total number of keywords seen in this entire group
14803             # $level_beg = indentation level of this group
14804             # @group = [ $i, $token, $count ] =list of all keywords & blanks
14805             # @subgroup = $j, index of group where token changes
14806             # @iblanks = line indexes of blank lines in input stream in this group
14807             # where i=starting line index
14808             # token (the keyword)
14809             # count = number of this token in this subgroup
14810             # j = index in group where token changes
14811 31     31 0 51 $ibeg = -1;
14812 31         42 $iend = undef;
14813 31         47 $level_beg = -1;
14814 31         52 $K_closing = undef;
14815 31         55 $count = 0;
14816 31         78 @group = ();
14817 31         48 @subgroup = ();
14818 31         51 @iblanks = ();
14819 31         46 return;
14820             } ## end sub kgb_initialize_group_vars
14821              
14822             sub kgb_initialize_line_vars {
14823 187     187 0 286 $CODE_type = EMPTY_STRING;
14824 187         232 $K_first = undef;
14825 187         241 $K_last = undef;
14826 187         278 $line_type = EMPTY_STRING;
14827 187         253 return;
14828             } ## end sub kgb_initialize_line_vars
14829              
14830             sub kgb_initialize {
14831              
14832             # initialize all closure variables for -kgb
14833             # return:
14834             # true to cause immediate exit (something is wrong)
14835             # false to continue ... all is okay
14836              
14837             # This is the return variable:
14838 543     543 0 1766 $rhash_of_desires = {};
14839              
14840             # initialize and check user options;
14841 543         2296 my $quit = kgb_initialize_options();
14842 543 100       1977 if ($quit) { return $quit }
  537         1505  
14843              
14844             # initialize variables for the current group and subgroups:
14845 6         27 kgb_initialize_group_vars();
14846              
14847             # initialize variables for the most recently seen line:
14848 6         26 kgb_initialize_line_vars();
14849              
14850 6         14 $number_of_groups_seen = 0;
14851              
14852             # all okay
14853 6         14 return;
14854             } ## end sub kgb_initialize
14855              
14856             sub kgb_insert_blank_after {
14857 12     12 0 22 my ($i) = @_;
14858 12         36 $rhash_of_desires->{$i} = 1;
14859 12         25 my $ip = $i + 1;
14860 12 50 33     40 if ( defined( $rhash_of_desires->{$ip} )
14861             && $rhash_of_desires->{$ip} == 2 )
14862             {
14863 0         0 $rhash_of_desires->{$ip} = 0;
14864             }
14865 12         31 return;
14866             } ## end sub kgb_insert_blank_after
14867              
14868             sub kgb_split_into_sub_groups {
14869              
14870             # place blanks around long sub-groups of keywords
14871             # ...if requested
14872 9 50   9 0 23 return unless ($rOpts_kgb_inside);
14873              
14874             # loop over sub-groups, index k
14875 9         23 push @subgroup, scalar @group;
14876 9         21 my $kbeg = 1;
14877 9         19 my $kend = @subgroup - 1;
14878 9         28 foreach my $k ( $kbeg .. $kend ) {
14879              
14880             # index j runs through all keywords found
14881 23         36 my $j_b = $subgroup[ $k - 1 ];
14882 23         39 my $j_e = $subgroup[$k] - 1;
14883              
14884             # index i is the actual line number of a keyword
14885 23         39 my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
  23         43  
14886 23         39 my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
  23         41  
14887 23         43 my $num = $count_e - $count_b + 1;
14888              
14889             # This subgroup runs from line $ib to line $ie-1, but may contain
14890             # blank lines
14891 23 100       64 if ( $num >= $rOpts_kgb_size_min ) {
14892              
14893             # if there are blank lines, we require that at least $num lines
14894             # be non-blank up to the boundary with the next subgroup.
14895 5         14 my $nog_b = my $nog_e = 1;
14896 5 50 33     22 if ( @iblanks && !$rOpts_kgb_delete ) {
14897 0         0 my $j_bb = $j_b + $num - 1;
14898 0         0 my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
  0         0  
14899 0         0 $nog_b = $count_bb - $count_b + 1 == $num;
14900              
14901 0         0 my $j_ee = $j_e - ( $num - 1 );
14902 0         0 my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
  0         0  
14903 0         0 $nog_e = $count_e - $count_ee + 1 == $num;
14904             }
14905 5 100 66     38 if ( $nog_b && $k > $kbeg ) {
14906 3         12 kgb_insert_blank_after( $i_b - 1 );
14907             }
14908 5 100 66     40 if ( $nog_e && $k < $kend ) {
14909             my ( $i_ep, $tok_ep, $count_ep ) =
14910 2         5 @{ $group[ $j_e + 1 ] };
  2         5  
14911 2         10 kgb_insert_blank_after( $i_ep - 1 );
14912             }
14913             }
14914             }
14915 9         23 return;
14916             } ## end sub kgb_split_into_sub_groups
14917              
14918             sub kgb_delete_if_blank {
14919 0     0 0 0 my ( $self, $i ) = @_;
14920              
14921             # delete line $i if it is blank
14922 0         0 my $rlines = $self->[_rlines_];
14923 0 0 0     0 return unless ( $i >= 0 && $i < @{$rlines} );
  0         0  
14924 0 0       0 return if ( $rlines->[$i]->{_line_type} ne 'CODE' );
14925 0         0 my $code_type = $rlines->[$i]->{_code_type};
14926 0 0       0 if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
  0         0  
14927 0         0 return;
14928             } ## end sub kgb_delete_if_blank
14929              
14930             sub kgb_delete_inner_blank_lines {
14931              
14932             # always remove unwanted trailing blank lines from our list
14933 6 100   6 0 22 return unless (@iblanks);
14934 1         6 while ( my $ibl = pop(@iblanks) ) {
14935 1 50       6 if ( $ibl < $iend ) { push @iblanks, $ibl; last }
  1         4  
  1         4  
14936 0         0 $iend = $ibl;
14937             }
14938              
14939             # now mark mark interior blank lines for deletion if requested
14940 1 50       7 return unless ($rOpts_kgb_delete);
14941              
14942 1         5 while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
  3         12  
14943              
14944 1         3 return;
14945             } ## end sub kgb_delete_inner_blank_lines
14946              
14947             sub kgb_end_group {
14948              
14949             # end a group of keywords
14950 25     25 0 53 my ( $self, $bad_ending ) = @_;
14951 25 100 66     97 if ( defined($ibeg) && $ibeg >= 0 ) {
14952              
14953             # then handle sufficiently large groups
14954 9 100       33 if ( $count >= $rOpts_kgb_size_min ) {
14955              
14956 6         20 $number_of_groups_seen++;
14957              
14958             # do any blank deletions regardless of the count
14959 6         23 kgb_delete_inner_blank_lines();
14960              
14961 6         19 my $rlines = $self->[_rlines_];
14962 6 50       29 if ( $ibeg > 0 ) {
14963 6         27 my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
14964              
14965             # patch for hash bang line which is not currently marked as
14966             # a comment; mark it as a comment
14967 6 100 100     33 if ( $ibeg == 1 && !$code_type ) {
14968 2         8 my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
14969 2 100 66     18 $code_type = 'BC'
14970             if ( $line_text && $line_text =~ /^#/ );
14971             }
14972              
14973             # Do not insert a blank after a comment
14974             # (this could be subject to a flag in the future)
14975 6 100       36 if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
14976 4 50       17 if ( $rOpts_kgb_before == INSERT ) {
    0          
14977 4         15 kgb_insert_blank_after( $ibeg - 1 );
14978              
14979             }
14980             elsif ( $rOpts_kgb_before == DELETE ) {
14981 0         0 $self->kgb_delete_if_blank( $ibeg - 1 );
14982             }
14983             }
14984             }
14985              
14986             # We will only put blanks before code lines. We could loosen
14987             # this rule a little, but we have to be very careful because
14988             # for example we certainly don't want to drop a blank line
14989             # after a line like this:
14990             # my $var = <<EOM;
14991 6 100 66     37 if ( $line_type eq 'CODE' && defined($K_first) ) {
14992              
14993             # - Do not put a blank before a line of different level
14994             # - Do not put a blank line if we ended the search badly
14995             # - Do not put a blank at the end of the file
14996             # - Do not put a blank line before a hanging side comment
14997 5         10 my $rLL = $self->[_rLL_];
14998 5         13 my $level = $rLL->[$K_first]->[_LEVEL_];
14999 5         11 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
15000              
15001 5 50 66     46 if ( $level == $level_beg
      100        
      66        
      66        
15002             && $ci_level == 0
15003             && !$bad_ending
15004 3         18 && $iend < @{$rlines}
15005             && $CODE_type ne 'HSC' )
15006             {
15007 3 50       19 if ( $rOpts_kgb_after == INSERT ) {
    0          
15008 3         13 kgb_insert_blank_after($iend);
15009             }
15010             elsif ( $rOpts_kgb_after == DELETE ) {
15011 0         0 $self->kgb_delete_if_blank( $iend + 1 );
15012             }
15013             }
15014             }
15015             }
15016 9         26 kgb_split_into_sub_groups();
15017             }
15018              
15019             # reset for another group
15020 25         70 kgb_initialize_group_vars();
15021              
15022 25         42 return;
15023             } ## end sub kgb_end_group
15024              
15025             sub kgb_find_container_end {
15026              
15027             # If the keyword line is continued onto subsequent lines, find the
15028             # closing token '$K_closing' so that we can easily skip past the
15029             # contents of the container.
15030              
15031             # We only set this value if we find a simple list, meaning
15032             # -contents only one level deep
15033             # -not welded
15034              
15035 75     75 0 127 my ($self) = @_;
15036              
15037             # First check: skip if next line is not one deeper
15038 75         168 my $Knext_nonblank = $self->K_next_nonblank($K_last);
15039 75 50       154 return if ( !defined($Knext_nonblank) );
15040 75         113 my $rLL = $self->[_rLL_];
15041 75         120 my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
15042 75 100       169 return if ( $level_next != $level_beg + 1 );
15043              
15044             # Find the parent container of the first token on the next line
15045 7         25 my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
15046 7 50       26 return unless ( defined($parent_seqno) );
15047              
15048             # Must not be a weld (can be unstable)
15049             return
15050 7 50 33     23 if ( $total_weld_count
15051             && $self->is_welded_at_seqno($parent_seqno) );
15052              
15053             # Opening container must exist and be on this line
15054 7         15 my $Ko = $self->[_K_opening_container_]->{$parent_seqno};
15055 7 50 33     55 return unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last );
      33        
15056              
15057             # Verify that the closing container exists and is on a later line
15058 7         13 my $Kc = $self->[_K_closing_container_]->{$parent_seqno};
15059 7 50 33     29 return unless ( defined($Kc) && $Kc > $K_last );
15060              
15061             # That's it
15062 7         12 $K_closing = $Kc;
15063              
15064 7         14 return;
15065             } ## end sub kgb_find_container_end
15066              
15067             sub kgb_add_to_group {
15068 75     75 0 176 my ( $self, $i, $token, $level ) = @_;
15069              
15070             # End the previous group if we have reached the maximum
15071             # group size
15072 75 50 33     164 if ( $rOpts_kgb_size_max && @group >= $rOpts_kgb_size_max ) {
15073 0         0 $self->kgb_end_group();
15074             }
15075              
15076 75 100       158 if ( @group == 0 ) {
15077 9         16 $ibeg = $i;
15078 9         18 $level_beg = $level;
15079 9         18 $count = 0;
15080             }
15081              
15082 75         103 $count++;
15083 75         101 $iend = $i;
15084              
15085             # New sub-group?
15086 75 100 100     309 if ( !@group || $token ne $group[-1]->[1] ) {
15087 23         46 push @subgroup, scalar(@group);
15088             }
15089 75         221 push @group, [ $i, $token, $count ];
15090              
15091             # remember if this line ends in an open container
15092 75         203 $self->kgb_find_container_end();
15093              
15094 75         110 return;
15095             } ## end sub kgb_add_to_group
15096              
15097             #---------------------
15098             # -kgb main subroutine
15099             #---------------------
15100              
15101             sub keyword_group_scan {
15102 555     555 0 1414 my $self = shift;
15103              
15104             # Called once per file to process --keyword-group-blanks-* parameters.
15105              
15106             # Task:
15107             # Manipulate blank lines around keyword groups (kgb* flags)
15108             # Scan all lines looking for runs of consecutive lines beginning with
15109             # selected keywords. Example keywords are 'my', 'our', 'local', ... but
15110             # they may be anything. We will set flags requesting that blanks be
15111             # inserted around and within them according to input parameters. Note
15112             # that we are scanning the lines as they came in in the input stream, so
15113             # they are not necessarily well formatted.
15114              
15115             # Returns:
15116             # The output of this sub is a return hash ref whose keys are the indexes
15117             # of lines after which we desire a blank line. For line index $i:
15118             # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
15119             # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
15120              
15121             # Nothing to do if no blanks can be output. This test added to fix
15122             # case b760.
15123 555 100       1956 if ( !$rOpts_maximum_consecutive_blank_lines ) {
15124 12         40 return $rhash_of_desires;
15125             }
15126              
15127             #---------------
15128             # initialization
15129             #---------------
15130 543         2644 my $quit = kgb_initialize();
15131 543 100       1916 if ($quit) { return $rhash_of_desires }
  537         1378  
15132              
15133 6         15 my $rLL = $self->[_rLL_];
15134 6         27 my $rlines = $self->[_rlines_];
15135              
15136 6         35 $self->kgb_end_group();
15137 6         16 my $i = -1;
15138             my $Opt_repeat_count =
15139 6         24 $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
15140              
15141             #----------------------------------
15142             # loop over all lines of the source
15143             #----------------------------------
15144 6         12 foreach my $line_of_tokens ( @{$rlines} ) {
  6         15  
15145              
15146 181         259 $i++;
15147             last
15148 181 50 33     344 if ( $Opt_repeat_count > 0
15149             && $number_of_groups_seen >= $Opt_repeat_count );
15150              
15151 181         373 kgb_initialize_line_vars();
15152              
15153 181         333 $line_type = $line_of_tokens->{_line_type};
15154              
15155             # always end a group at non-CODE
15156 181 100       342 if ( $line_type ne 'CODE' ) { $self->kgb_end_group(); next }
  5         18  
  5         15  
15157              
15158 176         267 $CODE_type = $line_of_tokens->{_code_type};
15159              
15160             # end any group at a format skipping line
15161 176 50 66     342 if ( $CODE_type && $CODE_type eq 'FS' ) {
15162 0         0 $self->kgb_end_group();
15163 0         0 next;
15164             }
15165              
15166             # continue in a verbatim (VB) type; it may be quoted text
15167 176 100       316 if ( $CODE_type eq 'VB' ) {
15168 6 50       32 if ( $ibeg >= 0 ) { $iend = $i; }
  6         10  
15169 6         11 next;
15170             }
15171              
15172             # and continue in blank (BL) types
15173 170 100       271 if ( $CODE_type eq 'BL' ) {
15174 5 100       19 if ( $ibeg >= 0 ) {
15175 3         10 $iend = $i;
15176 3         7 push @{iblanks}, $i;
15177              
15178             # propagate current subgroup token
15179 3         6 my $tok = $group[-1]->[1];
15180 3         9 push @group, [ $i, $tok, $count ];
15181             }
15182 5         11 next;
15183             }
15184              
15185             # examine the first token of this line
15186 165         243 my $rK_range = $line_of_tokens->{_rK_range};
15187 165         201 ( $K_first, $K_last ) = @{$rK_range};
  165         322  
15188 165 50       329 if ( !defined($K_first) ) {
15189              
15190             # Somewhat unexpected blank line..
15191             # $rK_range is normally defined for line type CODE, but this can
15192             # happen for example if the input line was a single semicolon
15193             # which is being deleted. In that case there was code in the
15194             # input file but it is not being retained. So we can silently
15195             # return.
15196 0         0 return $rhash_of_desires;
15197             }
15198              
15199 165         258 my $level = $rLL->[$K_first]->[_LEVEL_];
15200 165         254 my $type = $rLL->[$K_first]->[_TYPE_];
15201 165         263 my $token = $rLL->[$K_first]->[_TOKEN_];
15202 165         246 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
15203              
15204             # End a group 'badly' at an unexpected level. This will prevent
15205             # blank lines being incorrectly placed after the end of the group.
15206             # We are looking for any deviation from two acceptable patterns:
15207             # PATTERN 1: a simple list; secondary lines are at level+1
15208             # PATTERN 2: a long statement; all secondary lines same level
15209             # This was added as a fix for case b1177, in which a complex
15210             # structure got incorrectly inserted blank lines.
15211 165 100       303 if ( $ibeg >= 0 ) {
15212              
15213             # Check for deviation from PATTERN 1, simple list:
15214 118 100 100     397 if ( defined($K_closing) && $K_first < $K_closing ) {
    100          
15215 19 100       93 $self->kgb_end_group(1) if ( $level != $level_beg + 1 );
15216             }
15217              
15218             # Check for deviation from PATTERN 2, single statement:
15219 1         5 elsif ( $level != $level_beg ) { $self->kgb_end_group(1) }
15220             }
15221              
15222             # Do not look for keywords in lists ( keyword 'my' can occur in
15223             # lists, see case b760); fixed for c048.
15224 165 100       324 if ( $self->is_list_by_K($K_first) ) {
15225 27 100       56 if ( $ibeg >= 0 ) { $iend = $i }
  15         29  
15226 27         52 next;
15227             }
15228              
15229             # see if this is a code type we seek (i.e. comment)
15230 138 50 66     305 if ( $CODE_type
      33        
15231             && $keyword_group_list_comment_pattern
15232             && $CODE_type =~ /$keyword_group_list_comment_pattern/ )
15233             {
15234              
15235 0         0 my $tok = $CODE_type;
15236              
15237             # Continuing a group
15238 0 0 0     0 if ( $ibeg >= 0 && $level == $level_beg ) {
15239 0         0 $self->kgb_add_to_group( $i, $tok, $level );
15240             }
15241              
15242             # Start new group
15243             else {
15244              
15245             # first end old group if any; we might be starting new
15246             # keywords at different level
15247 0 0       0 if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
  0         0  
15248 0         0 $self->kgb_add_to_group( $i, $tok, $level );
15249             }
15250 0         0 next;
15251             }
15252              
15253             # See if it is a keyword we seek, but never start a group in a
15254             # continuation line; the code may be badly formatted.
15255 138 100 100     901 if ( $ci_level == 0
    100 100        
15256             && $type eq 'k'
15257             && $token =~ /$keyword_group_list_pattern/ )
15258             {
15259              
15260             # Continuing a keyword group
15261 75 100 66     279 if ( $ibeg >= 0 && $level == $level_beg ) {
15262 66         165 $self->kgb_add_to_group( $i, $token, $level );
15263             }
15264              
15265             # Start new keyword group
15266             else {
15267              
15268             # first end old group if any; we might be starting new
15269             # keywords at different level
15270 9 50       27 if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
  0         0  
15271 9         36 $self->kgb_add_to_group( $i, $token, $level );
15272             }
15273 75         162 next;
15274             }
15275              
15276             # This is not one of our keywords, but we are in a keyword group
15277             # so see if we should continue or quit
15278             elsif ( $ibeg >= 0 ) {
15279              
15280             # - bail out on a large level change; we may have walked into a
15281             # data structure or anonymous sub code.
15282 35 50 33     168 if ( $level > $level_beg + 1 || $level < $level_beg ) {
15283 0         0 $self->kgb_end_group(1);
15284 0         0 next;
15285             }
15286              
15287             # - keep going on a continuation line of the same level, since
15288             # it is probably a continuation of our previous keyword,
15289             # - and keep going past hanging side comments because we never
15290             # want to interrupt them.
15291 35 100 100     143 if ( ( ( $level == $level_beg ) && $ci_level > 0 )
      100        
15292             || $CODE_type eq 'HSC' )
15293             {
15294 25         41 $iend = $i;
15295 25         47 next;
15296             }
15297              
15298             # - continue if if we are within in a container which started
15299             # with the line of the previous keyword.
15300 10 100 100     52 if ( defined($K_closing) && $K_first <= $K_closing ) {
15301              
15302             # continue if entire line is within container
15303 5 100       12 if ( $K_last <= $K_closing ) { $iend = $i; next }
  3         8  
  3         5  
15304              
15305             # continue at ); or }; or ];
15306 2         6 my $KK = $K_closing + 1;
15307 2 100       8 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
15308 1 50       4 if ( $KK < $K_last ) {
15309 0 0       0 if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
  0         0  
15310 0 0 0     0 if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' )
15311             {
15312 0         0 $self->kgb_end_group(1);
15313 0         0 next;
15314             }
15315             }
15316 1         2 $iend = $i;
15317 1         2 next;
15318             }
15319              
15320 1         6 $self->kgb_end_group(1);
15321 1         3 next;
15322             }
15323              
15324             # - end the group if none of the above
15325 5         24 $self->kgb_end_group();
15326 5         16 next;
15327             }
15328              
15329             # not in a keyword group; continue
15330 28         59 else { next }
15331             } ## end of loop over all lines
15332              
15333 6         22 $self->kgb_end_group();
15334 6         21 return $rhash_of_desires;
15335              
15336             } ## end sub keyword_group_scan
15337             } ## end closure keyword_group_scan
15338              
15339             #######################################
15340             # CODE SECTION 7: Process lines of code
15341             #######################################
15342              
15343             { ## begin closure process_line_of_CODE
15344              
15345             # The routines in this closure receive lines of code and combine them into
15346             # 'batches' and send them along. A 'batch' is the unit of code which can be
15347             # processed further as a unit. It has the property that it is the largest
15348             # amount of code into which which perltidy is free to place one or more
15349             # line breaks within it without violating any constraints.
15350              
15351             # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
15352              
15353             # flags needed by the store routine
15354             my $line_of_tokens;
15355             my $no_internal_newlines;
15356             my $CODE_type;
15357             my $current_line_starts_in_quote;
15358              
15359             # range of K of tokens for the current line
15360             my ( $K_first, $K_last );
15361              
15362             my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
15363             $rblock_type_of_seqno, $ri_starting_one_line_block );
15364              
15365             # past stored nonblank tokens and flags
15366             my (
15367             $K_last_nonblank_code, $looking_for_else,
15368             $is_static_block_comment, $last_CODE_type,
15369             $last_line_had_side_comment, $next_parent_seqno,
15370             $next_slevel,
15371             );
15372              
15373             # Called once at the start of a new file
15374             sub initialize_process_line_of_CODE {
15375 555     555 0 1357 $K_last_nonblank_code = undef;
15376 555         1155 $looking_for_else = 0;
15377 555         1198 $is_static_block_comment = 0;
15378 555         1201 $last_line_had_side_comment = 0;
15379 555         1386 $next_parent_seqno = SEQ_ROOT;
15380 555         1262 $next_slevel = undef;
15381 555         990 return;
15382             } ## end sub initialize_process_line_of_CODE
15383              
15384             # Batch variables: these describe the current batch of code being formed
15385             # and sent down the pipeline. They are initialized in the next
15386             # sub.
15387             my (
15388             $rbrace_follower, $index_start_one_line_block,
15389             $starting_in_quote, $ending_in_quote,
15390             );
15391              
15392             # Called before the start of each new batch
15393             sub initialize_batch_variables {
15394              
15395             # Initialize array values for a new batch. Any changes here must be
15396             # carefully coordinated with sub store_token_to_go.
15397              
15398 5102     5102 0 9098 $max_index_to_go = UNDEFINED_INDEX;
15399 5102         8680 $summed_lengths_to_go[0] = 0;
15400 5102         8320 $nesting_depth_to_go[0] = 0;
15401 5102         9416 $ri_starting_one_line_block = [];
15402              
15403             # Redefine some sparse arrays.
15404             # It is more efficient to redefine these sparse arrays and rely on
15405             # undef's instead of initializing to 0's. Testing showed that using
15406             # @array=() is more efficient than $#array=-1
15407              
15408 5102         9709 @old_breakpoint_to_go = ();
15409 5102         8575 @forced_breakpoint_to_go = ();
15410 5102         8839 @block_type_to_go = ();
15411 5102         8385 @mate_index_to_go = ();
15412 5102         9258 @type_sequence_to_go = ();
15413              
15414             # NOTE: @nobreak_to_go is sparse and could be treated this way, but
15415             # testing showed that there would be very little efficiency gain
15416             # because an 'if' test must be added in store_token_to_go.
15417              
15418             # The initialization code for the remaining batch arrays is as follows
15419             # and can be activated for testing. But profiling shows that it is
15420             # time-consuming to re-initialize the batch arrays and is not necessary
15421             # because the maximum valid token, $max_index_to_go, is carefully
15422             # controlled. This means however that it is not possible to do any
15423             # type of filter or map operation directly on these arrays. And it is
15424             # not possible to use negative indexes. As a precaution against program
15425             # changes which might do this, sub pad_array_to_go adds some undefs at
15426             # the end of the current batch of data.
15427              
15428             ## 0 && do { #<<<
15429             ## @nobreak_to_go = ();
15430             ## @token_lengths_to_go = ();
15431             ## @levels_to_go = ();
15432             ## @ci_levels_to_go = ();
15433             ## @tokens_to_go = ();
15434             ## @K_to_go = ();
15435             ## @types_to_go = ();
15436             ## @leading_spaces_to_go = ();
15437             ## @reduced_spaces_to_go = ();
15438             ## @inext_to_go = ();
15439             ## @parent_seqno_to_go = ();
15440             ## };
15441              
15442 5102         7889 $rbrace_follower = undef;
15443 5102         8077 $ending_in_quote = 0;
15444              
15445 5102         7381 $index_start_one_line_block = undef;
15446              
15447             # initialize forced breakpoint vars associated with each output batch
15448 5102         7605 $forced_breakpoint_count = 0;
15449 5102         7617 $index_max_forced_break = UNDEFINED_INDEX;
15450 5102         7142 $forced_breakpoint_undo_count = 0;
15451              
15452 5102         13832 return;
15453             } ## end sub initialize_batch_variables
15454              
15455             sub leading_spaces_to_go {
15456              
15457             # return the number of indentation spaces for a token in the output
15458             # stream
15459              
15460 5022     5022 0 9390 my ($ii) = @_;
15461 5022 50       10482 return 0 if ( $ii < 0 );
15462 5022         8108 my $indentation = $leading_spaces_to_go[$ii];
15463 5022 100       12925 return ref($indentation) ? $indentation->get_spaces() : $indentation;
15464             } ## end sub leading_spaces_to_go
15465              
15466             sub create_one_line_block {
15467              
15468             # set index starting next one-line block
15469             # call with no args to delete the current one-line block
15470 1318     1318 0 2561 ($index_start_one_line_block) = @_;
15471 1318         2032 return;
15472             } ## end sub create_one_line_block
15473              
15474             # Routine to place the current token into the output stream.
15475             # Called once per output token.
15476              
15477 38     38   409 use constant DEBUG_STORE => 0;
  38         103  
  38         53727  
15478              
15479             sub store_token_to_go {
15480              
15481 54778     54778 0 87203 my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
15482              
15483             #-------------------------------------------------------
15484             # Token storage utility for sub process_line_of_CODE.
15485             # Add one token to the next batch of '_to_go' variables.
15486             #-------------------------------------------------------
15487              
15488             # Input parameters:
15489             # $Ktoken_vars = the index K in the global token array
15490             # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
15491             # unless they are temporarily being overridden
15492              
15493             #------------------------------------------------------------------
15494             # NOTE: called once per token so coding efficiency is critical here.
15495             # All changes need to be benchmarked with Devel::NYTProf.
15496             #------------------------------------------------------------------
15497              
15498             my (
15499              
15500             $type,
15501             $token,
15502             $ci_level,
15503             $level,
15504             $seqno,
15505             $length,
15506              
15507 54778         75560 ) = @{$rtoken_vars}[
  54778         148671  
15508              
15509             _TYPE_,
15510             _TOKEN_,
15511             _CI_LEVEL_,
15512             _LEVEL_,
15513             _TYPE_SEQUENCE_,
15514             _TOKEN_LENGTH_,
15515              
15516             ];
15517              
15518             # Check for emergency flush...
15519             # The K indexes in the batch must always be a continuous sequence of
15520             # the global token array. The batch process programming assumes this.
15521             # If storing this token would cause this relation to fail we must dump
15522             # the current batch before storing the new token. It is extremely rare
15523             # for this to happen. One known example is the following two-line
15524             # snippet when run with parameters
15525             # --noadd-newlines --space-terminal-semicolon:
15526             # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
15527             # $yy=1;
15528 54778 100       95466 if ( $max_index_to_go >= 0 ) {
15529 50027 50 66     155224 if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) {
    50          
15530 0         0 $self->flush_batch_of_CODE();
15531             }
15532              
15533             # Do not output consecutive blank tokens ... this should not
15534             # happen, but it is worth checking. Later code can then make the
15535             # simplifying assumption that blank tokens are not consecutive.
15536             elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
15537              
15538 0         0 if (DEVEL_MODE) {
15539              
15540             # if this happens, it is may be that consecutive blanks
15541             # were inserted into the token stream in 'respace_tokens'
15542             my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
15543             Fault("consecutive blanks near line $lno; please fix");
15544             }
15545 0         0 return;
15546             }
15547             }
15548              
15549             # Do not start a batch with a blank token.
15550             # Fixes cases b149 b888 b984 b985 b986 b987
15551             else {
15552 4751 100       12733 if ( $type eq 'b' ) { return }
  201         487  
15553             }
15554              
15555             # Update counter and do initializations if first token of new batch
15556 54577 100       96116 if ( !++$max_index_to_go ) {
15557              
15558             # Reset flag '$starting_in_quote' for a new batch. It must be set
15559             # to the value of '$in_continued_quote', but here for efficiency we
15560             # set it to zero, which is its normal value. Then in coding below
15561             # we will change it if we find we are actually in a continued quote.
15562 4550         6960 $starting_in_quote = 0;
15563              
15564             # Update the next parent sequence number for each new batch.
15565              
15566             #----------------------------------------
15567             # Begin coding from sub parent_seqno_by_K
15568             #----------------------------------------
15569              
15570             # The following is equivalent to this call but much faster:
15571             # $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
15572              
15573 4550         7851 $next_parent_seqno = SEQ_ROOT;
15574 4550 100       8926 if ($seqno) {
15575 882         2493 $next_parent_seqno = $rparent_of_seqno->{$seqno};
15576             }
15577             else {
15578 3668         6889 my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
15579 3668 100       8256 if ( defined($Kt) ) {
15580 3376         7591 my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
15581 3376         8003 my $type_t = $rLL->[$Kt]->[_TYPE_];
15582              
15583             # if next container token is closing, it is the parent seqno
15584 3376 100       7516 if ( $is_closing_type{$type_t} ) {
15585 520         1256 $next_parent_seqno = $type_sequence_t;
15586             }
15587              
15588             # otherwise we want its parent container
15589             else {
15590             $next_parent_seqno =
15591 2856         7070 $rparent_of_seqno->{$type_sequence_t};
15592             }
15593             }
15594             }
15595 4550 50       9612 $next_parent_seqno = SEQ_ROOT
15596             unless ( defined($next_parent_seqno) );
15597              
15598             #--------------------------------------
15599             # End coding from sub parent_seqno_by_K
15600             #--------------------------------------
15601              
15602 4550         9494 $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
15603             }
15604              
15605             # Clip levels to zero if there are level errors in the file.
15606             # We had to wait until now for reasons explained in sub 'write_line'.
15607 54577 50       94461 if ( $level < 0 ) { $level = 0 }
  0         0  
15608              
15609             # Safety check that length is defined. This is slow and should not be
15610             # needed now, so just do it in DEVEL_MODE to check programming changes.
15611             # Formerly needed for --indent-only, in which the entire set of tokens
15612             # is normally turned into type 'q'. Lengths are now defined in sub
15613             # 'respace_tokens' so this check is no longer needed.
15614 54577         65917 if ( DEVEL_MODE && !defined($length) ) {
15615             my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
15616             $length = length($token);
15617             Fault(<<EOM);
15618             undefined length near line $lno; num chars=$length, token='$token'
15619             EOM
15620             }
15621              
15622             #----------------------------
15623             # add this token to the batch
15624             #----------------------------
15625 54577         85488 $K_to_go[$max_index_to_go] = $Ktoken_vars;
15626 54577         92017 $types_to_go[$max_index_to_go] = $type;
15627 54577         88748 $tokens_to_go[$max_index_to_go] = $token;
15628 54577         75555 $ci_levels_to_go[$max_index_to_go] = $ci_level;
15629 54577         77025 $levels_to_go[$max_index_to_go] = $level;
15630 54577         74946 $nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
15631 54577         73553 $token_lengths_to_go[$max_index_to_go] = $length;
15632              
15633             # Skip point initialization for these sparse arrays - undef's okay;
15634             # See also related code in sub initialize_batch_variables.
15635             ## $old_breakpoint_to_go[$max_index_to_go] = 0;
15636             ## $forced_breakpoint_to_go[$max_index_to_go] = 0;
15637             ## $block_type_to_go[$max_index_to_go] = EMPTY_STRING;
15638             ## $type_sequence_to_go[$max_index_to_go] = $seqno;
15639              
15640             # NOTE: nobreak_to_go can be treated as a sparse array, but testing
15641             # showed that there is almost no efficiency gain because an if test
15642             # would need to be added.
15643              
15644             # We keep a running sum of token lengths from the start of this batch:
15645             # summed_lengths_to_go[$i] = total length to just before token $i
15646             # summed_lengths_to_go[$i+1] = total length to just after token $i
15647 54577         85334 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
15648             $summed_lengths_to_go[$max_index_to_go] + $length;
15649              
15650             # Initialize some sequence-dependent variables to their normal values
15651 54577         82871 $parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno;
15652 54577         77999 $nesting_depth_to_go[$max_index_to_go] = $next_slevel;
15653              
15654             # Then fix them at container tokens:
15655 54577 100       90568 if ($seqno) {
15656              
15657 9112         18160 $type_sequence_to_go[$max_index_to_go] = $seqno;
15658              
15659             $block_type_to_go[$max_index_to_go] =
15660 9112         16276 $rblock_type_of_seqno->{$seqno};
15661              
15662 9112 100       22061 if ( $is_opening_token{$token} ) {
    100          
15663              
15664 4395         8199 my $slevel = $rdepth_of_opening_seqno->[$seqno];
15665 4395         6859 $nesting_depth_to_go[$max_index_to_go] = $slevel;
15666 4395         6516 $next_slevel = $slevel + 1;
15667              
15668 4395         6905 $next_parent_seqno = $seqno;
15669              
15670             }
15671             elsif ( $is_closing_token{$token} ) {
15672              
15673 4345         7829 $next_slevel = $rdepth_of_opening_seqno->[$seqno];
15674 4345         7007 my $slevel = $next_slevel + 1;
15675 4345         6668 $nesting_depth_to_go[$max_index_to_go] = $slevel;
15676              
15677 4345         8831 my $parent_seqno = $rparent_of_seqno->{$seqno};
15678 4345 50       9232 $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
15679 4345         6895 $parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
15680 4345         7200 $next_parent_seqno = $parent_seqno;
15681              
15682             }
15683             else {
15684             # ternary token: nothing to do
15685             }
15686             }
15687              
15688             # Define the indentation that this token will have in two cases:
15689             # Without CI = reduced_spaces_to_go
15690             # With CI = leading_spaces_to_go
15691 54577         98467 $leading_spaces_to_go[$max_index_to_go] =
15692             $reduced_spaces_to_go[$max_index_to_go] =
15693             $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
15694              
15695 54577 100       93826 $leading_spaces_to_go[$max_index_to_go] +=
15696             $rOpts_continuation_indentation
15697             if ($ci_level);
15698             ## NOTE: No longer allowing ci_level > 1, so avoid multiplication
15699             ## $rOpts_continuation_indentation * $ci_level
15700              
15701             # Correct these values if we are starting in a continued quote
15702 54577 100 100     98784 if ( $current_line_starts_in_quote
15703             && $Ktoken_vars == $K_first )
15704             {
15705             # in a continued quote - correct value set above if first token
15706 19 50       95 if ( $max_index_to_go == 0 ) { $starting_in_quote = 1 }
  19         49  
15707              
15708 19         50 $leading_spaces_to_go[$max_index_to_go] = 0;
15709 19         41 $reduced_spaces_to_go[$max_index_to_go] = 0;
15710             }
15711              
15712 54577         66458 DEBUG_STORE && do {
15713             my ( $a, $b, $c ) = caller();
15714             print STDOUT
15715             "STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
15716             };
15717 54577         84969 return;
15718             } ## end sub store_token_to_go
15719              
15720             sub flush_batch_of_CODE {
15721              
15722             # Finish and process the current batch.
15723             # This must be the only call to grind_batch_of_CODE()
15724 5440     5440 0 9815 my ($self) = @_;
15725              
15726             # If a batch has been started ...
15727 5440 100       11549 if ( $max_index_to_go >= 0 ) {
15728              
15729             # Create an array to hold variables for this batch
15730 4547         8597 my $this_batch = [];
15731              
15732 4547 100       9931 $this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote);
15733 4547 100       9307 $this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote);
15734              
15735 4547 100 100     15363 if ( $CODE_type || $last_CODE_type ) {
15736 1190 100       4351 $this_batch->[_batch_CODE_type_] =
15737             $K_to_go[$max_index_to_go] >= $K_first
15738             ? $CODE_type
15739             : $last_CODE_type;
15740             }
15741              
15742             $last_line_had_side_comment =
15743 4547   100     14609 ( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' );
15744              
15745             # The flag $is_static_block_comment applies to the line which just
15746             # arrived. So it only applies if we are outputting that line.
15747 4547 100 66     11135 if ( $is_static_block_comment && !$last_line_had_side_comment ) {
15748 13         60 $this_batch->[_is_static_block_comment_] =
15749             $K_to_go[0] == $K_first;
15750             }
15751              
15752 4547         9737 $this_batch->[_ri_starting_one_line_block_] =
15753             $ri_starting_one_line_block;
15754              
15755 4547         8226 $self->[_this_batch_] = $this_batch;
15756              
15757             #-------------------
15758             # process this batch
15759             #-------------------
15760 4547         14356 $self->grind_batch_of_CODE();
15761              
15762             # Done .. this batch is history
15763 4547         8291 $self->[_this_batch_] = undef;
15764              
15765 4547         11217 initialize_batch_variables();
15766             }
15767              
15768 5440         9322 return;
15769             } ## end sub flush_batch_of_CODE
15770              
15771             sub end_batch {
15772              
15773             # End the current batch, EXCEPT for a few special cases
15774 4955     4955 0 9941 my ($self) = @_;
15775              
15776 4955 50       10529 if ( $max_index_to_go < 0 ) {
15777              
15778             # nothing to do .. this is harmless but wastes time.
15779 0         0 if (DEVEL_MODE) {
15780             Fault("sub end_batch called with nothing to do; please fix\n");
15781             }
15782 0         0 return;
15783             }
15784              
15785             # Exceptions when a line does not end with a comment... (fixes c058)
15786 4955 100       11579 if ( $types_to_go[$max_index_to_go] ne '#' ) {
15787              
15788             # Exception 1: Do not end line in a weld
15789             return
15790             if ( $total_weld_count
15791 3959 100 100     10353 && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
15792              
15793             # Exception 2: just set a tentative breakpoint if we might be in a
15794             # one-line block
15795 3911 100       8535 if ( defined($index_start_one_line_block) ) {
15796 428         1543 $self->set_forced_breakpoint($max_index_to_go);
15797 428         818 return;
15798             }
15799             }
15800              
15801 4479         11499 $self->flush_batch_of_CODE();
15802 4479         8874 return;
15803             } ## end sub end_batch
15804              
15805             sub flush_vertical_aligner {
15806 1806     1806 0 3825 my ($self) = @_;
15807 1806         3594 my $vao = $self->[_vertical_aligner_object_];
15808 1806         7070 $vao->flush();
15809 1806         2962 return;
15810             } ## end sub flush_vertical_aligner
15811              
15812             # flush is called to output any tokens in the pipeline, so that
15813             # an alternate source of lines can be written in the correct order
15814             sub flush {
15815 1740     1740 0 4202 my ( $self, $CODE_type_flush ) = @_;
15816              
15817             # end the current batch with 1 exception
15818              
15819 1740         2936 $index_start_one_line_block = undef;
15820              
15821             # Exception: if we are flushing within the code stream only to insert
15822             # blank line(s), then we can keep the batch intact at a weld. This
15823             # improves formatting of -ce. See test 'ce1.ce'
15824 1740 100 66     5965 if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) {
15825 779 100       2206 $self->end_batch() if ( $max_index_to_go >= 0 );
15826             }
15827              
15828             # otherwise, we have to shut things down completely.
15829 961         2550 else { $self->flush_batch_of_CODE() }
15830              
15831 1740         5878 $self->flush_vertical_aligner();
15832 1740         2864 return;
15833             } ## end sub flush
15834              
15835             my %is_assignment_or_fat_comma;
15836              
15837             BEGIN {
15838 38     38   883 %is_assignment_or_fat_comma = %is_assignment;
15839 38         138869 $is_assignment_or_fat_comma{'=>'} = 1;
15840             }
15841              
15842             sub process_line_of_CODE {
15843              
15844 6573     6573 0 12603 my ( $self, $my_line_of_tokens ) = @_;
15845              
15846             #----------------------------------------------------------------
15847             # This routine is called once per INPUT line to format all of the
15848             # tokens on that line.
15849             #----------------------------------------------------------------
15850              
15851             # It outputs full-line comments and blank lines immediately.
15852              
15853             # For lines of code:
15854             # - Tokens are copied one-by-one from the global token
15855             # array $rLL to a set of '_to_go' arrays which collect batches of
15856             # tokens. This is done with calls to 'store_token_to_go'.
15857             # - A batch is closed and processed upon reaching a well defined
15858             # structural break point (i.e. code block boundary) or forced
15859             # breakpoint (i.e. side comment or special user controls).
15860             # - Subsequent stages of formatting make additional line breaks
15861             # appropriate for lists and logical structures, and as necessary to
15862             # keep line lengths below the requested maximum line length.
15863              
15864             #-----------------------------------
15865             # begin initialize closure variables
15866             #-----------------------------------
15867 6573         12778 $line_of_tokens = $my_line_of_tokens;
15868 6573         14681 my $rK_range = $line_of_tokens->{_rK_range};
15869 6573 50       16847 if ( !defined( $rK_range->[0] ) ) {
15870              
15871             # Empty line: This can happen if tokens are deleted, for example
15872             # with the -mangle parameter
15873 0         0 return;
15874             }
15875              
15876 6573         9553 ( $K_first, $K_last ) = @{$rK_range};
  6573         14071  
15877 6573         11330 $last_CODE_type = $CODE_type;
15878 6573         10666 $CODE_type = $line_of_tokens->{_code_type};
15879 6573         12635 $current_line_starts_in_quote = $line_of_tokens->{_starting_in_quote};
15880              
15881 6573         71505 $rLL = $self->[_rLL_];
15882 6573         14629 $radjusted_levels = $self->[_radjusted_levels_];
15883 6573         11600 $rparent_of_seqno = $self->[_rparent_of_seqno_];
15884 6573         10901 $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
15885 6573         10867 $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
15886              
15887             #---------------------------------
15888             # end initialize closure variables
15889             #---------------------------------
15890              
15891             # This flag will become nobreak_to_go and should be set to 2 to prevent
15892             # a line break AFTER the current token.
15893 6573         9903 $no_internal_newlines = 0;
15894 6573 100 66     23694 if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
15895 119         191 $no_internal_newlines = 2;
15896             }
15897              
15898 6573         11094 my $input_line = $line_of_tokens->{_line_text};
15899              
15900 6573         10367 my ( $is_block_comment, $has_side_comment );
15901 6573 100       20235 if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
15902 1065 100       2865 if ( $K_last == $K_first ) { $is_block_comment = 1 }
  701         1483  
15903 364         779 else { $has_side_comment = 1 }
15904             }
15905              
15906 6573         12549 my $is_static_block_comment_without_leading_space =
15907             $CODE_type eq 'SBCX';
15908 6573   100     18787 $is_static_block_comment =
15909             $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
15910              
15911             # check for a $VERSION statement
15912 6573 100       12731 if ( $CODE_type eq 'VER' ) {
15913 4         16 $self->[_saw_VERSION_in_this_file_] = 1;
15914 4         19 $no_internal_newlines = 2;
15915             }
15916              
15917             # Add interline blank if any
15918 6573         10148 my $last_old_nonblank_type = "b";
15919 6573         9613 my $first_new_nonblank_token = EMPTY_STRING;
15920 6573         9859 my $K_first_true = $K_first;
15921 6573 100       13557 if ( $max_index_to_go >= 0 ) {
15922 2481         4849 $last_old_nonblank_type = $types_to_go[$max_index_to_go];
15923 2481         6393 $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
15924 2481 100 66     18824 if ( !$is_block_comment
      66        
      66        
15925             && $types_to_go[$max_index_to_go] ne 'b'
15926             && $K_first > 0
15927             && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
15928             {
15929 2305         4052 $K_first -= 1;
15930             }
15931             }
15932              
15933 6573         10787 my $rtok_first = $rLL->[$K_first];
15934              
15935 6573         12087 my $in_quote = $line_of_tokens->{_ending_in_quote};
15936 6573         9964 $ending_in_quote = $in_quote;
15937              
15938             #------------------------------------
15939             # Handle a block (full-line) comment.
15940             #------------------------------------
15941 6573 100       13296 if ($is_block_comment) {
15942              
15943 701 100       2383 if ( $rOpts->{'delete-block-comments'} ) {
15944 21         65 $self->flush();
15945 21         65 return;
15946             }
15947              
15948 680         1414 $index_start_one_line_block = undef;
15949 680 100       2008 $self->end_batch() if ( $max_index_to_go >= 0 );
15950              
15951             # output a blank line before block comments
15952 680 100 100     4092 if (
      66        
      100        
      66        
      100        
      100        
15953             # unless we follow a blank or comment line
15954             $self->[_last_line_leading_type_] ne '#'
15955             && $self->[_last_line_leading_type_] ne 'b'
15956              
15957             # only if allowed
15958             && $rOpts->{'blanks-before-comments'}
15959              
15960             # if this is NOT an empty comment, unless it follows a side
15961             # comment and could become a hanging side comment.
15962             && (
15963             $rtok_first->[_TOKEN_] ne '#'
15964             || ( $last_line_had_side_comment
15965             && $rLL->[$K_first]->[_LEVEL_] > 0 )
15966             )
15967              
15968             # not after a short line ending in an opening token
15969             # because we already have space above this comment.
15970             # Note that the first comment in this if block, after
15971             # the 'if (', does not get a blank line because of this.
15972             && !$self->[_last_output_short_opening_token_]
15973              
15974             # never before static block comments
15975             && !$is_static_block_comment
15976             )
15977             {
15978 50         214 $self->flush(); # switching to new output stream
15979 50         132 my $file_writer_object = $self->[_file_writer_object_];
15980 50         246 $file_writer_object->write_blank_code_line();
15981 50         139 $self->[_last_line_leading_type_] = 'b';
15982             }
15983              
15984 680 100 100     5426 if (
      100        
      100        
15985             $rOpts->{'indent-block-comments'}
15986             && ( !$rOpts->{'indent-spaced-block-comments'}
15987             || $input_line =~ /^\s+/ )
15988             && !$is_static_block_comment_without_leading_space
15989             )
15990             {
15991 632         1312 my $Ktoken_vars = $K_first;
15992 632         1258 my $rtoken_vars = $rLL->[$Ktoken_vars];
15993 632         2580 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
15994 632         2106 $self->end_batch();
15995             }
15996             else {
15997              
15998             # switching to new output stream
15999 48         180 $self->flush();
16000              
16001             # Note that last arg in call here is 'undef' for comments
16002 48         117 my $file_writer_object = $self->[_file_writer_object_];
16003 48         261 $file_writer_object->write_code_line(
16004             $rtok_first->[_TOKEN_] . "\n", undef );
16005 48         139 $self->[_last_line_leading_type_] = '#';
16006             }
16007 680         2441 return;
16008             }
16009              
16010             #--------------------------------------------
16011             # Compare input/output indentation in logfile
16012             #--------------------------------------------
16013 5872 100       14052 if ( $self->[_save_logfile_] ) {
16014              
16015             # Compare input/output indentation except for:
16016             # - hanging side comments
16017             # - continuation lines (have unknown leading blank space)
16018             # - and lines which are quotes (they may have been outdented)
16019             my $guessed_indentation_level =
16020 5         14 $line_of_tokens->{_guessed_indentation_level};
16021              
16022 5 50 66     57 unless ( $CODE_type eq 'HSC'
      33        
      66        
16023             || $rtok_first->[_CI_LEVEL_] > 0
16024             || $guessed_indentation_level == 0
16025             && $rtok_first->[_TYPE_] eq 'Q' )
16026             {
16027 3         9 my $input_line_number = $line_of_tokens->{_line_number};
16028 3         19 $self->compare_indentation_levels( $K_first,
16029             $guessed_indentation_level, $input_line_number );
16030             }
16031             }
16032              
16033             #-----------------------------------------
16034             # Handle a line marked as indentation-only
16035             #-----------------------------------------
16036              
16037 5872 100       12538 if ( $CODE_type eq 'IO' ) {
16038 12         42 $self->flush();
16039 12         24 my $line = $input_line;
16040              
16041             # Fix for rt #125506 Unexpected string formatting
16042             # in which leading space of a terminal quote was removed
16043 12         92 $line =~ s/\s+$//;
16044 12 100       62 $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
16045              
16046 12         19 my $Ktoken_vars = $K_first;
16047              
16048             # We work with a copy of the token variables and change the
16049             # first token to be the entire line as a quote variable
16050 12         23 my $rtoken_vars = $rLL->[$Ktoken_vars];
16051 12         40 $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
16052              
16053             # Patch: length is not really important here but must be defined
16054 12         26 $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
16055              
16056 12         74 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16057 12         42 $self->end_batch();
16058 12         58 return;
16059             }
16060              
16061             #---------------------------
16062             # Handle all other lines ...
16063             #---------------------------
16064              
16065             # If we just saw the end of an elsif block, write nag message
16066             # if we do not see another elseif or an else.
16067 5860 100       11469 if ($looking_for_else) {
16068              
16069             ## /^(elsif|else)$/
16070 13 50       75 if ( !$is_elsif_else{ $rLL->[$K_first_true]->[_TOKEN_] } ) {
16071 0         0 write_logfile_entry("(No else block)\n");
16072             }
16073 13         40 $looking_for_else = 0;
16074             }
16075              
16076             # This is a good place to kill incomplete one-line blocks
16077 5860 100       12190 if ( $max_index_to_go >= 0 ) {
16078              
16079             # For -iob and -lp, mark essential old breakpoints.
16080             # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
16081             # See related code below.
16082 2440 50 66     6840 if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
16083 0         0 my $type_first = $rLL->[$K_first_true]->[_TYPE_];
16084 0 0       0 if ( $is_assignment_or_fat_comma{$type_first} ) {
16085 0         0 $old_breakpoint_to_go[$max_index_to_go] = 1;
16086             }
16087             }
16088              
16089 2440 100 100     14987 if (
      100        
      100        
      100        
16090              
16091             # this check needed -mangle (for example rt125012)
16092             (
16093             ( !$index_start_one_line_block )
16094             && ( $last_old_nonblank_type eq ';' )
16095             && ( $first_new_nonblank_token ne '}' )
16096             )
16097              
16098             # Patch for RT #98902. Honor request to break at old commas.
16099             || ( $rOpts_break_at_old_comma_breakpoints
16100             && $last_old_nonblank_type eq ',' )
16101             )
16102             {
16103 30 100       102 $forced_breakpoint_to_go[$max_index_to_go] = 1
16104             if ($rOpts_break_at_old_comma_breakpoints);
16105 30         62 $index_start_one_line_block = undef;
16106 30         96 $self->end_batch();
16107             }
16108              
16109             # Keep any requested breaks before this line. Note that we have to
16110             # use the original K_first because it may have been reduced above
16111             # to add a blank. The value of the flag is as follows:
16112             # 1 => hard break, flush the batch
16113             # 2 => soft break, set breakpoint and continue building the batch
16114             # added check on max_index_to_go for c177
16115 2440 100 100     9862 if ( $max_index_to_go >= 0
16116             && $self->[_rbreak_before_Kfirst_]->{$K_first_true} )
16117             {
16118 9         20 $index_start_one_line_block = undef;
16119 9 100       29 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
16120 4         15 $self->set_forced_breakpoint($max_index_to_go);
16121             }
16122             else {
16123 5         21 $self->end_batch();
16124             }
16125             }
16126             }
16127              
16128             #--------------------------------------
16129             # loop to process the tokens one-by-one
16130             #--------------------------------------
16131 5860         17043 $self->process_line_inner_loop($has_side_comment);
16132              
16133             # if there is anything left in the output buffer ...
16134 5860 100       14062 if ( $max_index_to_go >= 0 ) {
16135              
16136 3257         6616 my $type = $rLL->[$K_last]->[_TYPE_];
16137 3257         6413 my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
16138              
16139             # we have to flush ..
16140 3257 100 100     39524 if (
      100        
      100        
      100        
      66        
      100        
      100        
      100        
      66        
      33        
      66        
16141              
16142             # if there is a side comment...
16143             $type eq '#'
16144              
16145             # if this line ends in a quote
16146             # NOTE: This is critically important for insuring that quoted
16147             # lines do not get processed by things like -sot and -sct
16148             || $in_quote
16149              
16150             # if this is a VERSION statement
16151             || $CODE_type eq 'VER'
16152              
16153             # to keep a label at the end of a line
16154             || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
16155              
16156             # if we have a hard break request
16157             || $break_flag && $break_flag != 2
16158              
16159             # if we are instructed to keep all old line breaks
16160             || !$rOpts->{'delete-old-newlines'}
16161              
16162             # if this is a line of the form 'use overload'. A break here in
16163             # the input file is a good break because it will allow the
16164             # operators which follow to be formatted well. Without this
16165             # break the formatting with -ci=4 -xci is poor, for example.
16166              
16167             # use overload
16168             # '+' => sub {
16169             # print length $_[2], "\n";
16170             # my ( $x, $y ) = _order(@_);
16171             # Number::Roman->new( int $x + $y );
16172             # },
16173             # '-' => sub {
16174             # my ( $x, $y ) = _order(@_);
16175             # Number::Roman->new( int $x - $y );
16176             # };
16177             || ( $max_index_to_go == 2
16178             && $types_to_go[0] eq 'k'
16179             && $tokens_to_go[0] eq 'use'
16180             && $tokens_to_go[$max_index_to_go] eq 'overload' )
16181             )
16182             {
16183 562         1149 $index_start_one_line_block = undef;
16184 562         1587 $self->end_batch();
16185             }
16186              
16187             else {
16188              
16189             # Check for a soft break request
16190 2695 50 33     7092 if ( $break_flag && $break_flag == 2 ) {
16191 0         0 $self->set_forced_breakpoint($max_index_to_go);
16192             }
16193              
16194             # mark old line breakpoints in current output stream
16195 2695 50 33     6926 if (
      66        
16196             !$rOpts_ignore_old_breakpoints
16197              
16198             # Mark essential old breakpoints if combination -iob -lp is
16199             # used. These two options do not work well together, but
16200             # we can avoid turning -iob off by ignoring -iob at certain
16201             # essential line breaks. See also related code above.
16202             # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
16203             || ( $rOpts_line_up_parentheses
16204             && $is_assignment_or_fat_comma{$type} )
16205             )
16206             {
16207 2685         5962 $old_breakpoint_to_go[$max_index_to_go] = 1;
16208             }
16209             }
16210             }
16211              
16212 5860         18772 return;
16213             } ## end sub process_line_of_CODE
16214              
16215             sub process_line_inner_loop {
16216              
16217 5860     5860 0 10772 my ( $self, $has_side_comment ) = @_;
16218              
16219             #--------------------------------------------------------------------
16220             # Loop to move all tokens from one input line to a newly forming batch
16221             #--------------------------------------------------------------------
16222              
16223             # Do not start a new batch with a blank space
16224 5860 100 100     20502 if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
16225 20         46 $K_first++;
16226             }
16227              
16228 5860         13465 foreach my $Ktoken_vars ( $K_first .. $K_last ) {
16229              
16230 54084         87674 my $rtoken_vars = $rLL->[$Ktoken_vars];
16231              
16232             #--------------
16233             # handle blanks
16234             #--------------
16235 54084 100       120600 if ( $rtoken_vars->[_TYPE_] eq 'b' ) {
16236 19092         42900 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16237 19092         31804 next;
16238             }
16239              
16240             #------------------
16241             # handle non-blanks
16242             #------------------
16243 34992         50436 my $type = $rtoken_vars->[_TYPE_];
16244              
16245             # If we are continuing after seeing a right curly brace, flush
16246             # buffer unless we see what we are looking for, as in
16247             # } else ...
16248 34992 100       59539 if ($rbrace_follower) {
16249 197         617 my $token = $rtoken_vars->[_TOKEN_];
16250 197 100       824 unless ( $rbrace_follower->{$token} ) {
16251 156 100       688 $self->end_batch() if ( $max_index_to_go >= 0 );
16252             }
16253 197         561 $rbrace_follower = undef;
16254             }
16255              
16256             my (
16257 34992         50580 $block_type, $type_sequence,
16258             $is_opening_BLOCK, $is_closing_BLOCK,
16259             $nobreak_BEFORE_BLOCK
16260             );
16261              
16262 34992 100       65842 if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
16263              
16264 9062         16972 my $token = $rtoken_vars->[_TOKEN_];
16265 9062         14210 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
16266 9062         15682 $block_type = $rblock_type_of_seqno->{$type_sequence};
16267              
16268 9062 100 66     30317 if ( $block_type
      100        
      100        
16269             && $token eq $type
16270             && $block_type ne 't'
16271             && !$self->[_rshort_nested_]->{$type_sequence} )
16272             {
16273              
16274 1924 100       6554 if ( $type eq '{' ) {
    50          
16275 962         1587 $is_opening_BLOCK = 1;
16276 962         1742 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
16277             }
16278             elsif ( $type eq '}' ) {
16279 962         1697 $is_closing_BLOCK = 1;
16280 962         1742 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
16281             }
16282             }
16283             }
16284              
16285             #---------------------
16286             # handle side comments
16287             #---------------------
16288 34992 100       59044 if ($has_side_comment) {
16289              
16290             # if at last token ...
16291 2196 100 100     10347 if ( $Ktoken_vars == $K_last ) {
    100 66        
16292 364         1353 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16293 364         895 next;
16294             }
16295              
16296             # if before last token ... do not allow breaks which would
16297             # promote a side comment to a block comment
16298             elsif ($Ktoken_vars == $K_last - 1
16299             || $Ktoken_vars == $K_last - 2
16300             && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' )
16301             {
16302 364         686 $no_internal_newlines = 2;
16303             }
16304             }
16305              
16306             # Process non-blank and non-comment tokens ...
16307              
16308             #-----------------
16309             # handle semicolon
16310             #-----------------
16311 34628 100       86312 if ( $type eq ';' ) {
    100          
    100          
    100          
16312              
16313 2537         5704 my $next_nonblank_token_type = 'b';
16314 2537         4658 my $next_nonblank_token = EMPTY_STRING;
16315 2537 100       6300 if ( $Ktoken_vars < $K_last ) {
16316 525         1110 my $Knnb = $Ktoken_vars + 1;
16317 525 100       1650 $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
16318 525         1055 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
16319 525         993 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
16320             }
16321              
16322 2537 50 66     6945 if ( $rOpts_break_at_old_semicolon_breakpoints
      66        
      33        
16323             && ( $Ktoken_vars == $K_first )
16324             && $max_index_to_go >= 0
16325             && !defined($index_start_one_line_block) )
16326             {
16327 1         7 $self->end_batch();
16328             }
16329              
16330 2537         6841 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16331              
16332 2537 100 100     19987 $self->end_batch()
      100        
      100        
16333             unless (
16334             $no_internal_newlines
16335             || ( $rOpts_keep_interior_semicolons
16336             && $Ktoken_vars < $K_last )
16337             || ( $next_nonblank_token eq '}' )
16338             );
16339             }
16340              
16341             #-----------
16342             # handle '{'
16343             #-----------
16344             elsif ($is_opening_BLOCK) {
16345              
16346             # Tentatively output this token. This is required before
16347             # calling starting_one_line_block. We may have to unstore
16348             # it, though, if we have to break before it.
16349 962         2987 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16350              
16351             # Look ahead to see if we might form a one-line block..
16352 962         4799 my $too_long =
16353             $self->starting_one_line_block( $Ktoken_vars,
16354             $K_last_nonblank_code, $K_last );
16355 962         3620 $self->clear_breakpoint_undo_stack();
16356              
16357             # to simplify the logic below, set a flag to indicate if
16358             # this opening brace is far from the keyword which introduces it
16359 962         1590 my $keyword_on_same_line = 1;
16360 962 0 66     6182 if (
      66        
      0        
      33        
16361             $max_index_to_go >= 0
16362             && defined($K_last_nonblank_code)
16363             && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
16364             && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
16365             || $too_long )
16366             )
16367             {
16368 0         0 $keyword_on_same_line = 0;
16369             }
16370              
16371             # Break before '{' if requested with -bl or -bli flag
16372 962         2129 my $want_break = $self->[_rbrace_left_]->{$type_sequence};
16373              
16374             # But do not break if this token is welded to the left
16375 962 100 100     2895 if ( $total_weld_count
16376             && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
16377             {
16378 21         72 $want_break = 0;
16379             }
16380              
16381             # Break BEFORE an opening '{' ...
16382 962 100 100     5380 if (
      33        
      66        
16383              
16384             # if requested
16385             $want_break
16386              
16387             # and we were unable to start looking for a block,
16388             && !defined($index_start_one_line_block)
16389              
16390             # or if it will not be on same line as its keyword, so that
16391             # it will be outdented (eval.t, overload.t), and the user
16392             # has not insisted on keeping it on the right
16393             || ( !$keyword_on_same_line
16394             && !$rOpts_opening_brace_always_on_right )
16395             )
16396             {
16397              
16398             # but only if allowed
16399 50 50       162 unless ($nobreak_BEFORE_BLOCK) {
16400              
16401             # since we already stored this token, we must unstore it
16402 50         201 $self->unstore_token_to_go();
16403              
16404             # then output the line
16405 50 100       203 $self->end_batch() if ( $max_index_to_go >= 0 );
16406              
16407             # and now store this token at the start of a new line
16408 50         1180 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16409             }
16410             }
16411              
16412             # now output this line
16413             $self->end_batch()
16414 962 100 66     4934 if ( $max_index_to_go >= 0 && !$no_internal_newlines );
16415             }
16416              
16417             #-----------
16418             # handle '}'
16419             #-----------
16420             elsif ($is_closing_BLOCK) {
16421              
16422 962         1961 my $next_nonblank_token_type = 'b';
16423 962         1786 my $next_nonblank_token = EMPTY_STRING;
16424 962         2151 my $Knnb;
16425 962 100       2542 if ( $Ktoken_vars < $K_last ) {
16426 417         962 $Knnb = $Ktoken_vars + 1;
16427 417 100       1490 $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
16428 417         973 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
16429 417         952 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
16430             }
16431              
16432             # If there is a pending one-line block ..
16433 962 100       2499 if ( defined($index_start_one_line_block) ) {
16434              
16435             # Fix for b1208: if a side comment follows this closing
16436             # brace then we must include its length in the length test
16437             # ... unless the -issl flag is set (fixes b1307-1309).
16438             # Assume a minimum of 1 blank space to the comment.
16439 351         729 my $added_length = 0;
16440 351 100 100     1320 if ( $has_side_comment
      100        
16441             && !$rOpts_ignore_side_comment_lengths
16442             && $next_nonblank_token_type eq '#' )
16443             {
16444 17         48 $added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_];
16445             }
16446              
16447             # we have to terminate it if..
16448 351 50       1175 if (
16449              
16450             # it is too long (final length may be different from
16451             # initial estimate). note: must allow 1 space for this
16452             # token
16453             $self->excess_line_length( $index_start_one_line_block,
16454             $max_index_to_go ) + $added_length >= 0
16455             )
16456             {
16457 0         0 $index_start_one_line_block = undef;
16458             }
16459             }
16460              
16461             # put a break before this closing curly brace if appropriate
16462             $self->end_batch()
16463 962 100 100     4397 if ( $max_index_to_go >= 0
      100        
16464             && !$nobreak_BEFORE_BLOCK
16465             && !defined($index_start_one_line_block) );
16466              
16467             # store the closing curly brace
16468 962         3028 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16469              
16470             # ok, we just stored a closing curly brace. Often, but
16471             # not always, we want to end the line immediately.
16472             # So now we have to check for special cases.
16473              
16474             # if this '}' successfully ends a one-line block..
16475 962         2072 my $one_line_block_type = EMPTY_STRING;
16476 962         1792 my $keep_going;
16477 962 100       2705 if ( defined($index_start_one_line_block) ) {
16478              
16479             # Remember the type of token just before the
16480             # opening brace. It would be more general to use
16481             # a stack, but this will work for one-line blocks.
16482 351         782 $one_line_block_type =
16483             $types_to_go[$index_start_one_line_block];
16484              
16485             # we have to actually make it by removing tentative
16486             # breaks that were set within it
16487 351         1598 $self->undo_forced_breakpoint_stack(0);
16488              
16489             # For -lp, extend the nobreak to include a trailing
16490             # terminal ','. This is because the -lp indentation was
16491             # not known when making one-line blocks, so we may be able
16492             # to move the line back to fit. Otherwise we may create a
16493             # needlessly stranded comma on the next line.
16494 351         820 my $iend_nobreak = $max_index_to_go - 1;
16495 351 100 100     1173 if ( $rOpts_line_up_parentheses
      66        
16496             && $next_nonblank_token_type eq ','
16497             && $Knnb eq $K_last )
16498             {
16499 1         3 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
16500             my $is_excluded =
16501 1         5 $self->[_ris_excluded_lp_container_]->{$p_seqno};
16502 1 50       5 $iend_nobreak = $max_index_to_go if ( !$is_excluded );
16503             }
16504              
16505 351         1388 $self->set_nobreaks( $index_start_one_line_block,
16506             $iend_nobreak );
16507              
16508             # save starting block indexes so that sub correct_lp can
16509             # check and adjust -lp indentation (c098)
16510 351         533 push @{$ri_starting_one_line_block},
  351         949  
16511             $index_start_one_line_block;
16512              
16513             # then re-initialize for the next one-line block
16514 351         690 $index_start_one_line_block = undef;
16515              
16516             # then decide if we want to break after the '}' ..
16517             # We will keep going to allow certain brace followers as in:
16518             # do { $ifclosed = 1; last } unless $losing;
16519             #
16520             # But make a line break if the curly ends a
16521             # significant block:
16522 351 100 100     2721 if (
      66        
16523             (
16524             $is_block_without_semicolon{$block_type}
16525              
16526             # Follow users break point for
16527             # one line block types U & G, such as a 'try' block
16528             || $one_line_block_type =~ /^[UG]$/
16529             && $Ktoken_vars == $K_last
16530             )
16531              
16532             # if needless semicolon follows we handle it later
16533             && $next_nonblank_token ne ';'
16534             )
16535             {
16536 84 100       347 $self->end_batch()
16537             unless ($no_internal_newlines);
16538             }
16539             }
16540              
16541             # set string indicating what we need to look for brace follower
16542             # tokens
16543 962 100 100     7381 if ( $is_if_unless_elsif_else{$block_type} ) {
    100          
    100          
    100          
16544 184         401 $rbrace_follower = undef;
16545             }
16546             elsif ( $block_type eq 'do' ) {
16547 45         195 $rbrace_follower = \%is_do_follower;
16548 45 100       288 if (
16549             $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
16550             )
16551             {
16552 3         8 $rbrace_follower = { ')' => 1 };
16553             }
16554             }
16555              
16556             # added eval for borris.t
16557             elsif ($is_sort_map_grep_eval{$block_type}
16558             || $one_line_block_type eq 'G' )
16559             {
16560 133         295 $rbrace_follower = undef;
16561 133         309 $keep_going = 1;
16562             }
16563              
16564             # anonymous sub
16565             elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
16566 173 100       712 if ($one_line_block_type) {
16567              
16568 81         265 $rbrace_follower = \%is_anon_sub_1_brace_follower;
16569              
16570             # Exceptions to help keep -lp intact, see git #74 ...
16571             # Exception 1: followed by '}' on this line
16572 81 100 100     683 if ( $Ktoken_vars < $K_last
    100 100        
16573             && $next_nonblank_token eq '}' )
16574             {
16575 2         13 $rbrace_follower = undef;
16576 2         7 $keep_going = 1;
16577             }
16578              
16579             # Exception 2: followed by '}' on next line if -lp set.
16580             # The -lp requirement allows the formatting to follow
16581             # old breaks when -lp is not used, minimizing changes.
16582             # Fixes issue c087.
16583             elsif ($Ktoken_vars == $K_last
16584             && $rOpts_line_up_parentheses )
16585             {
16586 1         3 my $K_closing_container =
16587             $self->[_K_closing_container_];
16588 1         3 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
16589 1         3 my $Kc = $K_closing_container->{$p_seqno};
16590             my $is_excluded =
16591 1         4 $self->[_ris_excluded_lp_container_]->{$p_seqno};
16592 1   33     50 $keep_going =
16593             ( defined($Kc)
16594             && $rLL->[$Kc]->[_TOKEN_] eq '}'
16595             && !$is_excluded
16596             && $Kc - $Ktoken_vars <= 2 );
16597 1 50       7 $rbrace_follower = undef if ($keep_going);
16598             }
16599             }
16600             else {
16601 92         288 $rbrace_follower = \%is_anon_sub_brace_follower;
16602             }
16603             }
16604              
16605             # None of the above: specify what can follow a closing
16606             # brace of a block which is not an
16607             # if/elsif/else/do/sort/map/grep/eval
16608             # Testfiles:
16609             # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
16610             else {
16611 427         1024 $rbrace_follower = \%is_other_brace_follower;
16612             }
16613              
16614             # See if an elsif block is followed by another elsif or else;
16615             # complain if not.
16616 962 100       2667 if ( $block_type eq 'elsif' ) {
16617              
16618 25 100       126 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
16619 17         40 $looking_for_else = 1; # ok, check on next line
16620             }
16621             else {
16622             ## /^(elsif|else)$/
16623 8 50       41 if ( !$is_elsif_else{$next_nonblank_token} ) {
16624 0         0 write_logfile_entry("No else block :(\n");
16625             }
16626             }
16627             }
16628              
16629             # keep going after certain block types (map,sort,grep,eval)
16630             # added eval for borris.t
16631 962 100 100     4417 if ($keep_going) {
    100          
    100          
16632              
16633             # keep going
16634 136         278 $rbrace_follower = undef;
16635              
16636             }
16637              
16638             # if no more tokens, postpone decision until re-entering
16639             elsif ( ( $next_nonblank_token_type eq 'b' )
16640             && $rOpts_add_newlines )
16641             {
16642 506 100       1712 unless ($rbrace_follower) {
16643 156 100 66     968 $self->end_batch()
16644             unless ( $no_internal_newlines
16645             || $max_index_to_go < 0 );
16646             }
16647             }
16648             elsif ($rbrace_follower) {
16649              
16650 292 100       964 if ( $rbrace_follower->{$next_nonblank_token} ) {
16651              
16652             # Fix for b1385: keep break after a comma following a
16653             # 'do' block. This could also be used for other block
16654             # types, but that would cause a significant change in
16655             # existing formatting without much benefit.
16656 192 0 100     961 if ( $next_nonblank_token eq ','
      66        
      33        
      33        
16657             && $Knnb eq $K_last
16658             && $block_type eq 'do'
16659             && $rOpts_add_newlines
16660             && $self->is_trailing_comma($Knnb) )
16661             {
16662 0         0 $self->[_rbreak_after_Klast_]->{$K_last} = 1;
16663             }
16664             }
16665             else {
16666 100 100 100     564 $self->end_batch()
16667             unless ( $no_internal_newlines
16668             || $max_index_to_go < 0 );
16669             }
16670              
16671 292         714 $rbrace_follower = undef;
16672             }
16673              
16674             else {
16675 28 100 100     165 $self->end_batch()
16676             unless ( $no_internal_newlines
16677             || $max_index_to_go < 0 );
16678             }
16679              
16680             } ## end treatment of closing block token
16681              
16682             #------------------------------
16683             # handle here_doc target string
16684             #------------------------------
16685             elsif ( $type eq 'h' ) {
16686              
16687             # no newlines after seeing here-target
16688 9         38 $no_internal_newlines = 2;
16689 9         38 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16690             }
16691              
16692             #-----------------------------
16693             # handle all other token types
16694             #-----------------------------
16695             else {
16696              
16697 30158         67063 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16698              
16699             # break after a label if requested
16700 30158 100 100     58307 if ( $rOpts_break_after_labels
      100        
16701             && $type eq 'J'
16702             && $rOpts_break_after_labels == 1 )
16703             {
16704 3 50       9 $self->end_batch()
16705             unless ($no_internal_newlines);
16706             }
16707             }
16708              
16709             # remember previous nonblank, non-comment OUTPUT token
16710 34628         59607 $K_last_nonblank_code = $Ktoken_vars;
16711              
16712             } ## end of loop over all tokens in this line
16713 5860         10241 return;
16714             } ## end sub process_line_inner_loop
16715              
16716             } ## end closure process_line_of_CODE
16717              
16718             sub is_trailing_comma {
16719 0     0 0 0 my ( $self, $KK ) = @_;
16720              
16721             # Given:
16722             # $KK - index of a comma in token list
16723             # Return:
16724             # true if the comma at index $KK is a trailing comma
16725             # false if not
16726              
16727 0         0 my $rLL = $self->[_rLL_];
16728 0         0 my $type_KK = $rLL->[$KK]->[_TYPE_];
16729 0 0       0 if ( $type_KK ne ',' ) {
16730 0         0 DEVEL_MODE
16731             && Fault("Bad call: expected type ',' but received '$type_KK'\n");
16732 0         0 return;
16733             }
16734 0         0 my $Knnb = $self->K_next_nonblank($KK);
16735 0 0       0 if ( defined($Knnb) ) {
16736 0         0 my $type_sequence = $rLL->[$Knnb]->[_TYPE_SEQUENCE_];
16737 0         0 my $type_Knnb = $rLL->[$Knnb]->[_TYPE_];
16738 0 0 0     0 if ( $type_sequence && $is_closing_type{$type_Knnb} ) {
16739 0         0 return 1;
16740             }
16741             }
16742 0         0 return;
16743             } ## end sub is_trailing_comma
16744              
16745             sub tight_paren_follows {
16746              
16747 45     45 0 153 my ( $self, $K_to_go_0, $K_ic ) = @_;
16748              
16749             # Input parameters:
16750             # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
16751             # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
16752             # Return parameter:
16753             # false if we want a break after the closing do brace
16754             # true if we do not want a break after the closing do brace
16755              
16756             # We are at the closing brace of a 'do' block. See if this brace is
16757             # followed by a closing paren, and if so, set a flag which indicates
16758             # that we do not want a line break between the '}' and ')'.
16759              
16760             # xxxxx ( ...... do { ... } ) {
16761             # ^-------looking at this brace, K_ic
16762              
16763             # Subscript notation:
16764             # _i = inner container (braces in this case)
16765             # _o = outer container (parens in this case)
16766             # _io = inner opening = '{'
16767             # _ic = inner closing = '}'
16768             # _oo = outer opening = '('
16769             # _oc = outer closing = ')'
16770              
16771             # |--K_oo |--K_oc = outer container
16772             # xxxxx ( ...... do { ...... } ) {
16773             # |--K_io |--K_ic = inner container
16774              
16775             # In general, the safe thing to do is return a 'false' value
16776             # if the statement appears to be complex. This will have
16777             # the downstream side-effect of opening up outer containers
16778             # to help make complex code readable. But for simpler
16779             # do blocks it can be preferable to keep the code compact
16780             # by returning a 'true' value.
16781              
16782 45 50       165 return unless defined($K_ic);
16783 45         133 my $rLL = $self->[_rLL_];
16784              
16785             # we should only be called at a closing block
16786 45         111 my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
16787 45 50       148 return unless ($seqno_i); # shouldn't happen;
16788              
16789             # This only applies if the next nonblank is a ')'
16790 45         184 my $K_oc = $self->K_next_nonblank($K_ic);
16791 45 100       219 return unless defined($K_oc);
16792 44         128 my $token_next = $rLL->[$K_oc]->[_TOKEN_];
16793 44 100       197 return unless ( $token_next eq ')' );
16794              
16795 7         19 my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
16796 7         22 my $K_io = $self->[_K_opening_container_]->{$seqno_i};
16797 7         18 my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
16798 7 50 33     49 return unless ( defined($K_io) && defined($K_oo) );
16799              
16800             # RULE 1: Do not break before a closing signature paren
16801             # (regardless of complexity). This is a fix for issue git#22.
16802             # Looking for something like:
16803             # sub xxx ( ... do { ... } ) {
16804             # ^----- next block_type
16805 7         27 my $K_test = $self->K_next_nonblank($K_oc);
16806 7 100 66     46 if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
16807 3         7 my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
16808 3 50       8 if ($seqno_test) {
16809 3 50 66     14 if ( $self->[_ris_asub_block_]->{$seqno_test}
16810             || $self->[_ris_sub_block_]->{$seqno_test} )
16811             {
16812 3         12 return 1;
16813             }
16814             }
16815             }
16816              
16817             # RULE 2: Break if the contents within braces appears to be 'complex'. We
16818             # base this decision on the number of tokens between braces.
16819              
16820             # xxxxx ( ... do { ... } ) {
16821             # ^^^^^^
16822              
16823             # Although very simple, it has the advantages of (1) being insensitive to
16824             # changes in lengths of identifier names, (2) easy to understand, implement
16825             # and test. A test case for this is 't/snippets/long_line.in'.
16826              
16827             # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
16828             # if ( do { $2 !~ /&/ } ) { ... }
16829              
16830             # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
16831             # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
16832              
16833             # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
16834             # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
16835              
16836 4 50       32 return if ( $K_ic - $K_io > 16 );
16837              
16838             # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
16839             # As with the previous rule, we decide based on the token count
16840              
16841             # xxxxx ( ... do { ... } ) {
16842             # ^^^^^^^^
16843              
16844             # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
16845             # $K_io - $K_oo = 4 [Pass Rule 3]
16846             # if ( do { $2 !~ /&/ } ) { ... }
16847              
16848             # Example: $K_ic - $K_oo = 10 [Pass rule 2]
16849             # $K_io - $K_oo = 9 [Pass rule 3]
16850             # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
16851              
16852 0 0       0 return if ( $K_io - $K_oo > 9 );
16853              
16854             # RULE 4: Break if we have already broken this batch of output tokens
16855 0 0       0 return if ( $K_oo < $K_to_go_0 );
16856              
16857             # RULE 5: Break if input is not on one line
16858             # For example, we will set the flag for the following expression
16859             # written in one line:
16860              
16861             # This has: $K_ic - $K_oo = 10 [Pass rule 2]
16862             # $K_io - $K_oo = 8 [Pass rule 3]
16863             # $self->debug( 'Error: ' . do { local $/; <$err> } );
16864              
16865             # but we break after the brace if it is on multiple lines on input, since
16866             # the user may prefer it on multiple lines:
16867              
16868             # [Fail rule 5]
16869             # $self->debug(
16870             # 'Error: ' . do { local $/; <$err> }
16871             # );
16872              
16873 0 0       0 if ( !$rOpts_ignore_old_breakpoints ) {
16874 0         0 my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
16875 0         0 my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
16876 0 0       0 return if ( $iline_oo != $iline_oc );
16877             }
16878              
16879             # OK to keep the paren tight
16880 0         0 return 1;
16881             } ## end sub tight_paren_follows
16882              
16883             my %is_brace_semicolon_colon;
16884              
16885             BEGIN {
16886 38     38   267 my @q = qw( { } ; : );
16887 38         78081 @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
16888             }
16889              
16890             sub starting_one_line_block {
16891              
16892             # After seeing an opening curly brace, look for the closing brace and see
16893             # if the entire block will fit on a line. This routine is not always right
16894             # so a check is made later (at the closing brace) to make sure we really
16895             # have a one-line block. We have to do this preliminary check, though,
16896             # because otherwise we would always break at a semicolon within a one-line
16897             # block if the block contains multiple statements.
16898              
16899             # Given:
16900             # $Kj = index of opening brace
16901             # $K_last_nonblank = index of previous nonblank code token
16902             # $K_last = index of last token of input line
16903              
16904             # Calls 'create_one_line_block' if one-line block might be formed.
16905              
16906             # Also returns a flag '$too_long':
16907             # true = distance from opening keyword to OPENING brace exceeds
16908             # the maximum line length.
16909             # false (simple return) => not too long
16910             # Note that this flag is for distance from the statement start to the
16911             # OPENING brace, not the closing brace.
16912              
16913 962     962 0 2694 my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
16914              
16915 962         1936 my $rbreak_container = $self->[_rbreak_container_];
16916 962         1726 my $rshort_nested = $self->[_rshort_nested_];
16917 962         1803 my $rLL = $self->[_rLL_];
16918 962         1670 my $K_opening_container = $self->[_K_opening_container_];
16919 962         1689 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
16920              
16921             # kill any current block - we can only go 1 deep
16922 962         2836 create_one_line_block();
16923              
16924 962         1623 my $i_start = 0;
16925              
16926             # This routine should not have been called if there are no tokens in the
16927             # 'to_go' arrays of previously stored tokens. A previous call to
16928             # 'store_token_to_go' should have stored an opening brace. An error here
16929             # indicates that a programming change may have caused a flush operation to
16930             # clean out the previously stored tokens.
16931 962 50 33     4377 if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
16932 0         0 Fault("program bug: store_token_to_go called incorrectly\n")
16933             if (DEVEL_MODE);
16934 0         0 return;
16935             }
16936              
16937             # Return if block should be broken
16938 962         1968 my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
16939 962 100       2436 if ( $rbreak_container->{$type_sequence_j} ) {
16940 20         80 return;
16941             }
16942              
16943 942         1773 my $ris_bli_container = $self->[_ris_bli_container_];
16944 942         1721 my $is_bli = $ris_bli_container->{$type_sequence_j};
16945              
16946 942         2188 my $block_type = $rblock_type_of_seqno->{$type_sequence_j};
16947 942 50       2254 $block_type = EMPTY_STRING unless ( defined($block_type) );
16948              
16949 942         1734 my $previous_nonblank_token = EMPTY_STRING;
16950 942         1599 my $i_last_nonblank = -1;
16951 942 100       2161 if ( defined($K_last_nonblank) ) {
16952 924         1689 $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
16953 924 100       2140 if ( $i_last_nonblank >= 0 ) {
16954 820         1736 $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
16955             }
16956             }
16957              
16958             #---------------------------------------------------------------------
16959             # find the starting keyword for this block (such as 'if', 'else', ...)
16960             #---------------------------------------------------------------------
16961 942 100 100     11614 if (
    100 100        
    100 100        
    50 33        
      33        
16962             $max_index_to_go == 0
16963             ##|| $block_type =~ /^[\{\}\;\:]$/
16964             || $is_brace_semicolon_colon{$block_type}
16965             || substr( $block_type, 0, 7 ) eq 'package'
16966             )
16967             {
16968 148         351 $i_start = $max_index_to_go;
16969             }
16970              
16971             # the previous nonblank token should start these block types
16972             elsif (
16973             $i_last_nonblank >= 0
16974             && ( $previous_nonblank_token eq $block_type
16975             || $self->[_ris_asub_block_]->{$type_sequence_j}
16976             || $self->[_ris_sub_block_]->{$type_sequence_j}
16977             || substr( $block_type, -2, 2 ) eq '()' )
16978             )
16979             {
16980 575         1256 $i_start = $i_last_nonblank;
16981              
16982             # For signatures and extended syntax ...
16983             # If this brace follows a parenthesized list, we should look back to
16984             # find the keyword before the opening paren because otherwise we might
16985             # form a one line block which stays intact, and cause the parenthesized
16986             # expression to break open. That looks bad.
16987 575 100       1861 if ( $tokens_to_go[$i_start] eq ')' ) {
16988              
16989             # Find the opening paren
16990 33         83 my $K_start = $K_to_go[$i_start];
16991 33 50       143 return unless defined($K_start);
16992 33         82 my $seqno = $type_sequence_to_go[$i_start];
16993 33 50       111 return unless ($seqno);
16994 33         95 my $K_opening = $K_opening_container->{$seqno};
16995 33 50       108 return unless defined($K_opening);
16996 33         87 my $i_opening = $i_start + ( $K_opening - $K_start );
16997              
16998             # give up if not on this line
16999 33 50       95 return unless ( $i_opening >= 0 );
17000 33         66 $i_start = $i_opening;
17001              
17002             # go back one token before the opening paren
17003 33 50       99 if ( $i_start > 0 ) { $i_start-- }
  33         59  
17004 33 100 66     177 if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
  19         35  
17005 33         67 my $lev = $levels_to_go[$i_start];
17006 33 100       127 if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return }
  2         15  
17007             }
17008             }
17009              
17010             elsif ( $previous_nonblank_token eq ')' ) {
17011              
17012             # For something like "if (xxx) {", the keyword "if" will be
17013             # just after the most recent break. This will be 0 unless
17014             # we have just killed a one-line block and are starting another.
17015             # (doif.t)
17016             # Note: cannot use inext_index_to_go[] here because that array
17017             # is still being constructed.
17018 215         523 $i_start = $index_max_forced_break + 1;
17019 215 100       750 if ( $types_to_go[$i_start] eq 'b' ) {
17020 2         5 $i_start++;
17021             }
17022              
17023             # Patch to avoid breaking short blocks defined with extended_syntax:
17024             # Strip off any trailing () which was added in the parser to mark
17025             # the opening keyword. For example, in the following
17026             # create( TypeFoo $e) {$bubba}
17027             # the blocktype would be marked as create()
17028 215         475 my $stripped_block_type = $block_type;
17029 215 50       766 if ( substr( $block_type, -2, 2 ) eq '()' ) {
17030 0         0 $stripped_block_type = substr( $block_type, 0, -2 );
17031             }
17032 215 100       690 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
17033 9         31 return;
17034             }
17035             }
17036              
17037             # patch for SWITCH/CASE to retain one-line case/when blocks
17038             elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
17039              
17040             # Note: cannot use inext_index_to_go[] here because that array
17041             # is still being constructed.
17042 4         8 $i_start = $index_max_forced_break + 1;
17043 4 50       12 if ( $types_to_go[$i_start] eq 'b' ) {
17044 0         0 $i_start++;
17045             }
17046 4 50       9 unless ( $tokens_to_go[$i_start] eq $block_type ) {
17047 0         0 return;
17048             }
17049             }
17050             else {
17051              
17052             #-------------------------------------------
17053             # Couldn't find start - return too_long flag
17054             #-------------------------------------------
17055 0         0 return 1;
17056             }
17057              
17058 931         3170 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
17059              
17060 931         2238 my $maximum_line_length =
17061             $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
17062              
17063             # see if distance to the opening container is too great to even start
17064 931 100       2383 if ( $pos > $maximum_line_length ) {
17065              
17066             #------------------------------
17067             # too long to the opening token
17068             #------------------------------
17069 14         49 return 1;
17070             }
17071              
17072             #-----------------------------------------------------------------------
17073             # OK so far: the statement is not to long just to the OPENING token. Now
17074             # see if everything to the closing token will fit on one line
17075             #-----------------------------------------------------------------------
17076              
17077             # This is part of an update to fix cases b562 .. b983
17078 917         2131 my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j};
17079 917 50       2309 return unless ( defined($K_closing) );
17080 917         2402 my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
17081             $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
17082              
17083 917         1825 my $excess = $pos + 1 + $container_length - $maximum_line_length;
17084              
17085             # Add a small tolerance for welded tokens (case b901)
17086 917 100 100     2644 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) {
17087 24         71 $excess += 2;
17088             }
17089              
17090 917 100       2516 if ( $excess > 0 ) {
17091              
17092             # line is too long... there is no chance of forming a one line block
17093             # if the excess is more than 1 char
17094 273 100       1048 return if ( $excess > 1 );
17095              
17096             # ... and give up if it is not a one-line block on input.
17097             # note: for a one-line block on input, it may be possible to keep
17098             # it as a one-line block (by removing a needless semicolon ).
17099 2         7 my $K_start = $K_to_go[$i_start];
17100 2         11 my $ldiff =
17101             $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
17102 2 50       11 return if ($ldiff);
17103             }
17104              
17105             #------------------------------------------------------------------
17106             # Loop to check contents and length of the potential one-line block
17107             #------------------------------------------------------------------
17108 644         2203 foreach my $Ki ( $Kj + 1 .. $K_last ) {
17109              
17110             # old whitespace could be arbitrarily large, so don't use it
17111 3255 100       7312 if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
  1243         1796  
17112 2012         2999 else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
17113              
17114             # ignore some small blocks
17115 3255         4915 my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
17116 3255         4347 my $nobreak = $rshort_nested->{$type_sequence_i};
17117              
17118             # Return false result if we exceed the maximum line length,
17119 3255 50 100     12544 if ( $pos > $maximum_line_length ) {
    100 100        
    100 100        
    100 100        
      100        
      100        
17120 0         0 return;
17121             }
17122              
17123             # keep going for non-containers
17124             elsif ( !$type_sequence_i ) {
17125              
17126             }
17127              
17128             # return if we encounter another opening brace before finding the
17129             # closing brace.
17130             elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
17131             && $rLL->[$Ki]->[_TYPE_] eq '{'
17132             && $rblock_type_of_seqno->{$type_sequence_i}
17133             && !$nobreak )
17134             {
17135 26         84 return;
17136             }
17137              
17138             # if we find our closing brace..
17139             elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
17140             && $rLL->[$Ki]->[_TYPE_] eq '}'
17141             && $rblock_type_of_seqno->{$type_sequence_i}
17142             && !$nobreak )
17143             {
17144              
17145             # be sure any trailing comment also fits on the line
17146 330         654 my $Ki_nonblank = $Ki;
17147 330 100       1148 if ( $Ki_nonblank < $K_last ) {
17148 183         361 $Ki_nonblank++;
17149 183 100 66     1027 if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
17150             && $Ki_nonblank < $K_last )
17151             {
17152 111         231 $Ki_nonblank++;
17153             }
17154             }
17155              
17156             # Patch for one-line sort/map/grep/eval blocks with side comments:
17157             # We will ignore the side comment length for sort/map/grep/eval
17158             # because this can lead to statements which change every time
17159             # perltidy is run. Here is an example from Denis Moskowitz which
17160             # oscillates between these two states without this patch:
17161              
17162             ## --------
17163             ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
17164             ## @baz;
17165             ##
17166             ## grep {
17167             ## $_->foo ne 'bar'
17168             ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
17169             ## @baz;
17170             ## --------
17171              
17172             # When the first line is input it gets broken apart by the main
17173             # line break logic in sub process_line_of_CODE.
17174             # When the second line is input it gets recombined by
17175             # process_line_of_CODE and passed to the output routines. The
17176             # output routines (break_long_lines) do not break it apart
17177             # because the bond strengths are set to the highest possible value
17178             # for grep/map/eval/sort blocks, so the first version gets output.
17179             # It would be possible to fix this by changing bond strengths,
17180             # but they are high to prevent errors in older versions of perl.
17181             # See c100 for eval test.
17182 330 100 100     1951 if ( $Ki < $K_last
      100        
      100        
      100        
      66        
17183             && $rLL->[$K_last]->[_TYPE_] eq '#'
17184             && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
17185             && !$rOpts_ignore_side_comment_lengths
17186             && !$is_sort_map_grep_eval{$block_type}
17187             && $K_last - $Ki_nonblank <= 2 )
17188             {
17189             # Only include the side comment for if/else/elsif/unless if it
17190             # immediately follows (because the current '$rbrace_follower'
17191             # logic for these will give an immediate brake after these
17192             # closing braces). So for example a line like this
17193             # if (...) { ... } ; # very long comment......
17194             # will already break like this:
17195             # if (...) { ... }
17196             # ; # very long comment......
17197             # so we do not need to include the length of the comment, which
17198             # would break the block. Project 'bioperl' has coding like this.
17199             ## !~ /^(if|else|elsif|unless)$/
17200 19 50 66     116 if ( !$is_if_unless_elsif_else{$block_type}
17201             || $K_last == $Ki_nonblank )
17202             {
17203 19         50 $Ki_nonblank = $K_last;
17204 19         50 $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
17205              
17206 19 50       77 if ( $Ki_nonblank > $Ki + 1 ) {
17207              
17208             # source whitespace could be anything, assume
17209             # at least one space before the hash on output
17210 19 100       84 if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
17211 17         40 $pos += 1;
17212             }
17213 2         7 else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
17214             }
17215              
17216 19 50       67 if ( $pos >= $maximum_line_length ) {
17217 0         0 return;
17218             }
17219             }
17220             }
17221              
17222             #--------------------------
17223             # ok, it's a one-line block
17224             #--------------------------
17225 330         1035 create_one_line_block($i_start);
17226 330         1002 return;
17227             }
17228              
17229             # just keep going for other characters
17230             else {
17231             }
17232             }
17233              
17234             #--------------------------------------------------
17235             # End Loop to examine tokens in potential one-block
17236             #--------------------------------------------------
17237              
17238             # We haven't hit the closing brace, but there is still space. So the
17239             # question here is, should we keep going to look at more lines in hopes of
17240             # forming a new one-line block, or should we stop right now. The problem
17241             # with continuing is that we will not be able to honor breaks before the
17242             # opening brace if we continue.
17243              
17244             # Typically we will want to keep trying to make one-line blocks for things
17245             # like sort/map/grep/eval. But it is not always a good idea to make as
17246             # many one-line blocks as possible, so other types are not done. The user
17247             # can always use -mangle.
17248              
17249             # If we want to keep going, we will create a new one-line block.
17250             # The blocks which we can keep going are in a hash, but we never want
17251             # to continue if we are at a '-bli' block.
17252 288 100 66     1475 if ( $want_one_line_block{$block_type} && !$is_bli ) {
17253 47         144 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence_j};
17254             my $semicolon_count = $rtype_count
17255 47 100 100     258 && $rtype_count->{';'} ? $rtype_count->{';'} : 0;
17256              
17257             # Ignore a terminal semicolon in the count
17258 47 100       164 if ( $semicolon_count <= 2 ) {
17259 44         101 my $K_closing_container = $self->[_K_closing_container_];
17260 44         94 my $K_closing_j = $K_closing_container->{$type_sequence_j};
17261 44         165 my $Kp = $self->K_previous_nonblank($K_closing_j);
17262 44 100 66     332 if ( defined($Kp)
17263             && $rLL->[$Kp]->[_TYPE_] eq ';' )
17264             {
17265 23         57 $semicolon_count -= 1;
17266             }
17267             }
17268 47 100 66     215 if ( $semicolon_count <= 0 ) {
    100          
17269 26         80 create_one_line_block($i_start);
17270             }
17271             elsif ( $semicolon_count == 1 && $block_type eq 'eval' ) {
17272              
17273             # Mark short broken eval blocks for possible later use in
17274             # avoiding adding spaces before a 'package' line. This is not
17275             # essential but helps keep newer and older formatting the same.
17276 18         49 $self->[_ris_short_broken_eval_block_]->{$type_sequence_j} = 1;
17277             }
17278             }
17279 288         825 return;
17280             } ## end sub starting_one_line_block
17281              
17282             sub unstore_token_to_go {
17283              
17284             # remove most recent token from output stream
17285 50     50 0 106 my $self = shift;
17286 50 100       188 if ( $max_index_to_go > 0 ) {
17287 47         82 $max_index_to_go--;
17288             }
17289             else {
17290 3         7 $max_index_to_go = UNDEFINED_INDEX;
17291             }
17292 50         91 return;
17293             } ## end sub unstore_token_to_go
17294              
17295             sub compare_indentation_levels {
17296              
17297             # Check to see if output line tabbing agrees with input line
17298             # this can be very useful for debugging a script which has an extra
17299             # or missing brace.
17300              
17301 3     3 0 9 my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
17302 3 50       8 return unless ( defined($K_first) );
17303              
17304 3         16 my $rLL = $self->[_rLL_];
17305              
17306             # ignore a line with a leading blank token - issue c195
17307 3         6 my $type = $rLL->[$K_first]->[_TYPE_];
17308 3 50       8 return if ( $type eq 'b' );
17309              
17310 3         6 my $structural_indentation_level = $self->[_radjusted_levels_]->[$K_first];
17311              
17312             # record max structural depth for log file
17313 3 50       9 if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
17314 0         0 $self->[_maximum_BLOCK_level_] = $structural_indentation_level;
17315 0         0 $self->[_maximum_BLOCK_level_at_line_] = $line_number;
17316             }
17317              
17318 3         7 my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
17319             my $is_closing_block =
17320             $type_sequence
17321 3   0     11 && $self->[_rblock_type_of_seqno_]->{$type_sequence}
17322             && $type eq '}';
17323              
17324 3 50       17 if ( $guessed_indentation_level ne $structural_indentation_level ) {
17325 0         0 $self->[_last_tabbing_disagreement_] = $line_number;
17326              
17327 0 0       0 if ($is_closing_block) {
17328              
17329 0 0       0 if ( !$self->[_in_brace_tabbing_disagreement_] ) {
17330 0         0 $self->[_in_brace_tabbing_disagreement_] = $line_number;
17331             }
17332 0 0       0 if ( !$self->[_first_brace_tabbing_disagreement_] ) {
17333 0         0 $self->[_first_brace_tabbing_disagreement_] = $line_number;
17334             }
17335             }
17336              
17337 0 0       0 if ( !$self->[_in_tabbing_disagreement_] ) {
17338 0         0 $self->[_tabbing_disagreement_count_]++;
17339              
17340 0 0       0 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
17341 0         0 write_logfile_entry(
17342             "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
17343             );
17344             }
17345 0         0 $self->[_in_tabbing_disagreement_] = $line_number;
17346 0 0       0 $self->[_first_tabbing_disagreement_] = $line_number
17347             unless ( $self->[_first_tabbing_disagreement_] );
17348             }
17349             }
17350             else {
17351              
17352 3 50       9 $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
17353              
17354 3         7 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
17355 3 50       11 if ($in_tabbing_disagreement) {
17356              
17357 0 0       0 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
17358 0         0 write_logfile_entry(
17359             "End indentation disagreement from input line $in_tabbing_disagreement\n"
17360             );
17361              
17362 0 0       0 if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
17363             {
17364 0         0 write_logfile_entry(
17365             "No further tabbing disagreements will be noted\n");
17366             }
17367             }
17368 0         0 $self->[_in_tabbing_disagreement_] = 0;
17369              
17370             }
17371             }
17372 3         24 return;
17373             } ## end sub compare_indentation_levels
17374              
17375             ###################################################
17376             # CODE SECTION 8: Utilities for setting breakpoints
17377             ###################################################
17378              
17379             { ## begin closure set_forced_breakpoint
17380              
17381             my @forced_breakpoint_undo_stack;
17382              
17383             # These are global vars for efficiency:
17384             # my $forced_breakpoint_count;
17385             # my $forced_breakpoint_undo_count;
17386             # my $index_max_forced_break;
17387              
17388             # Break before or after certain tokens based on user settings
17389             my %break_before_or_after_token;
17390              
17391             BEGIN {
17392              
17393             # Updated to use all operators. This fixes case b1054
17394             # Here is the previous simplified version:
17395             ## my @q = qw( . : ? and or xor && || );
17396 38     38   820 my @q = @all_operators;
17397              
17398 38         196 push @q, ',';
17399 38         3508 @break_before_or_after_token{@q} = (1) x scalar(@q);
17400             } ## end BEGIN
17401              
17402             sub set_fake_breakpoint {
17403              
17404             # Just bump up the breakpoint count as a signal that there are breaks.
17405             # This is useful if we have breaks but may want to postpone deciding
17406             # where to make them.
17407 213     213 0 771 $forced_breakpoint_count++;
17408 213         449 return;
17409             } ## end sub set_fake_breakpoint
17410              
17411 38     38   385 use constant DEBUG_FORCE => 0;
  38         131  
  38         27333  
17412              
17413             sub set_forced_breakpoint {
17414 3938     3938 0 7850 my ( $self, $i ) = @_;
17415              
17416             # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
17417              
17418             # Exceptions:
17419             # - If the token at index $i is a blank, backup to $i-1 to
17420             # get to the previous nonblank token.
17421             # - For certain tokens, the break may be placed BEFORE the token
17422             # at index $i, depending on user break preference settings.
17423             # - If a break is made after an opening token, then a break will
17424             # also be made before the corresponding closing token.
17425              
17426             # Returns '$i_nonblank':
17427             # = index of the token after which the breakpoint was actually placed
17428             # = undef if breakpoint was not set.
17429 3938         5729 my $i_nonblank;
17430              
17431 3938 50 33     13224 if ( !defined($i) || $i < 0 ) {
17432              
17433             # Calls with bad index $i are harmless but waste time and should
17434             # be caught and eliminated during code development.
17435 0         0 if (DEVEL_MODE) {
17436             my ( $a, $b, $c ) = caller();
17437             Fault(
17438             "Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
17439             );
17440             }
17441 0         0 return;
17442             }
17443              
17444             # Break after token $i
17445 3938         8907 $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
17446              
17447             # If we break at an opening container..break at the closing
17448 3938         6318 my $set_closing;
17449 3938 100 100     13686 if ( defined($i_nonblank)
17450             && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
17451             {
17452 1576         2830 $set_closing = 1;
17453 1576         4125 $self->set_closing_breakpoint($i_nonblank);
17454             }
17455              
17456 3938         5792 DEBUG_FORCE && do {
17457             my ( $a, $b, $c ) = caller();
17458             my $msg =
17459             "FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
17460             if ( !defined($i_nonblank) ) {
17461             $i = EMPTY_STRING unless defined($i);
17462             $msg .= " but could not set break after i='$i'\n";
17463             }
17464             else {
17465             my $nobr = $nobreak_to_go[$i_nonblank];
17466             $nobr = 0 if ( !defined($nobr) );
17467             $msg .= <<EOM;
17468             set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobr
17469             EOM
17470             if ( defined($set_closing) ) {
17471             $msg .=
17472             " Also set closing breakpoint corresponding to this token\n";
17473             }
17474             }
17475             print STDOUT $msg;
17476             };
17477              
17478 3938         7438 return $i_nonblank;
17479             } ## end sub set_forced_breakpoint
17480              
17481             sub set_forced_breakpoint_AFTER {
17482 4492     4492 0 7896 my ( $self, $i ) = @_;
17483              
17484             # This routine is only called by sub set_forced_breakpoint and
17485             # sub set_closing_breakpoint.
17486              
17487             # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
17488              
17489             # Exceptions:
17490             # - If the token at index $i is a blank, backup to $i-1 to
17491             # get to the previous nonblank token.
17492             # - For certain tokens, the break may be placed BEFORE the token
17493             # at index $i, depending on user break preference settings.
17494              
17495             # Returns:
17496             # - the index of the token after which the break was set, or
17497             # - undef if no break was set
17498              
17499 4492 50 33     14692 return unless ( defined($i) && $i >= 0 );
17500              
17501             # Back up at a blank so we have a token to examine.
17502             # This was added to fix for cases like b932 involving an '=' break.
17503 4492 100 100     15338 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
  752         1385  
17504              
17505             # Never break between welded tokens
17506             return
17507             if ( $total_weld_count
17508 4492 100 100     10414 && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
17509              
17510 4449         7517 my $token = $tokens_to_go[$i];
17511 4449         6767 my $type = $types_to_go[$i];
17512              
17513             # For certain tokens, use user settings to decide if we break before or
17514             # after it
17515 4449 100 66     20679 if ( $break_before_or_after_token{$token}
    100 66        
      66        
17516             && ( $type eq $token || $type eq 'k' ) )
17517             {
17518 1925 100 66     5689 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
  238         448  
17519             }
17520              
17521             # breaks are forced before 'if' and 'unless'
17522 12         29 elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
17523              
17524 4449 100 66     14423 if ( $i >= 0 && $i <= $max_index_to_go ) {
17525 4443 100       9011 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
17526              
17527 4443 100 66     20231 if ( $i_nonblank >= 0
      100        
17528             && !$nobreak_to_go[$i_nonblank]
17529             && !$forced_breakpoint_to_go[$i_nonblank] )
17530             {
17531 3515         6554 $forced_breakpoint_to_go[$i_nonblank] = 1;
17532              
17533 3515 100       7326 if ( $i_nonblank > $index_max_forced_break ) {
17534 2386         3767 $index_max_forced_break = $i_nonblank;
17535             }
17536 3515         4931 $forced_breakpoint_count++;
17537 3515         6550 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
17538             = $i_nonblank;
17539              
17540             # success
17541 3515         9336 return $i_nonblank;
17542             }
17543             }
17544 934         2175 return;
17545             } ## end sub set_forced_breakpoint_AFTER
17546              
17547             sub clear_breakpoint_undo_stack {
17548 962     962 0 1999 my ($self) = @_;
17549 962         1562 $forced_breakpoint_undo_count = 0;
17550 962         1478 return;
17551             }
17552              
17553 38     38   368 use constant DEBUG_UNDOBP => 0;
  38         158  
  38         23319  
17554              
17555             sub undo_forced_breakpoint_stack {
17556              
17557 447     447 0 1111 my ( $self, $i_start ) = @_;
17558              
17559             # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
17560             # remove all breakpoints from the top of the 'undo stack' down to and
17561             # including index $i_start.
17562              
17563             # The 'undo stack' is a stack of all breakpoints made for a batch of
17564             # code.
17565              
17566 447 50       1218 if ( $i_start < 0 ) {
17567 0         0 $i_start = 0;
17568 0         0 my ( $a, $b, $c ) = caller();
17569              
17570             # Bad call, can only be due to a recent programming change.
17571 0         0 Fault(
17572             "Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
17573             ) if (DEVEL_MODE);
17574 0         0 return;
17575             }
17576              
17577 447         1353 while ( $forced_breakpoint_undo_count > $i_start ) {
17578 746         1312 my $i =
17579             $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
17580 746 50 33     3042 if ( $i >= 0 && $i <= $max_index_to_go ) {
17581 746         1368 $forced_breakpoint_to_go[$i] = 0;
17582 746         1155 $forced_breakpoint_count--;
17583              
17584 746         1681 DEBUG_UNDOBP && do {
17585             my ( $a, $b, $c ) = caller();
17586             print STDOUT
17587             "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
17588             };
17589             }
17590              
17591             # shouldn't happen, but not a critical error
17592             else {
17593 0         0 if (DEVEL_MODE) {
17594             my ( $a, $b, $c ) = caller();
17595             Fault(<<EOM);
17596             Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go
17597             EOM
17598             }
17599             }
17600             }
17601 447         845 return;
17602             } ## end sub undo_forced_breakpoint_stack
17603             } ## end closure set_forced_breakpoint
17604              
17605             { ## begin closure set_closing_breakpoint
17606              
17607             my %postponed_breakpoint;
17608              
17609             sub initialize_postponed_breakpoint {
17610 555     555 0 1752 %postponed_breakpoint = ();
17611 555         1104 return;
17612             }
17613              
17614             sub has_postponed_breakpoint {
17615 2984     2984 0 5557 my ($seqno) = @_;
17616 2984         7706 return $postponed_breakpoint{$seqno};
17617             }
17618              
17619             sub set_closing_breakpoint {
17620              
17621             # set a breakpoint at a matching closing token
17622 2243     2243 0 4618 my ( $self, $i_break ) = @_;
17623              
17624 2243 100       5072 if ( defined( $mate_index_to_go[$i_break] ) ) {
17625              
17626             # Don't reduce the '2' in the statement below.
17627             # Test files: attrib.t, BasicLyx.pm.html
17628 561 100       1719 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
17629              
17630             # break before } ] and ), but sub set_forced_breakpoint will decide
17631             # to break before or after a ? and :
17632 554 100       1840 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
17633 554         1454 $self->set_forced_breakpoint_AFTER(
17634             $mate_index_to_go[$i_break] - $inc );
17635             }
17636             }
17637             else {
17638 1682         3074 my $type_sequence = $type_sequence_to_go[$i_break];
17639 1682 50       3565 if ($type_sequence) {
17640 1682         3856 $postponed_breakpoint{$type_sequence} = 1;
17641             }
17642             }
17643 2243         3970 return;
17644             } ## end sub set_closing_breakpoint
17645             } ## end closure set_closing_breakpoint
17646              
17647             #########################################
17648             # CODE SECTION 9: Process batches of code
17649             #########################################
17650              
17651             { ## begin closure grind_batch_of_CODE
17652              
17653             # The routines in this closure begin the processing of a 'batch' of code.
17654              
17655             # A variable to keep track of consecutive nonblank lines so that we can
17656             # insert occasional blanks
17657             my @nonblank_lines_at_depth;
17658              
17659             # A variable to remember maximum size of previous batches; this is needed
17660             # by the logical padding routine
17661             my $peak_batch_size;
17662             my $batch_count;
17663              
17664             # variables to keep track of indentation of unmatched containers.
17665             my %saved_opening_indentation;
17666              
17667             sub initialize_grind_batch_of_CODE {
17668 555     555 0 1722 @nonblank_lines_at_depth = ();
17669 555         1508 $peak_batch_size = 0;
17670 555         1128 $batch_count = 0;
17671 555         2329 %saved_opening_indentation = ();
17672 555         949 return;
17673             } ## end sub initialize_grind_batch_of_CODE
17674              
17675             # sub grind_batch_of_CODE receives sections of code which are the longest
17676             # possible lines without a break. In other words, it receives what is left
17677             # after applying all breaks forced by blank lines, block comments, side
17678             # comments, pod text, and structural braces. Its job is to break this code
17679             # down into smaller pieces, if necessary, which fit within the maximum
17680             # allowed line length. Then it sends the resulting lines of code on down
17681             # the pipeline to the VerticalAligner package, breaking the code into
17682             # continuation lines as necessary. The batch of tokens are in the "to_go"
17683             # arrays. The name 'grind' is slightly suggestive of a machine continually
17684             # breaking down long lines of code, but mainly it is unique and easy to
17685             # remember and find with an editor search.
17686              
17687             # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
17688             # together in the following way:
17689              
17690             # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
17691             # combines them into the largest sequences of tokens which might form a new
17692             # line.
17693             # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
17694             # lines.
17695              
17696             # So sub 'process_line_of_CODE' builds up the longest possible continuous
17697             # sequences of tokens, regardless of line length, and then
17698             # grind_batch_of_CODE breaks these sequences back down into the new output
17699             # lines.
17700              
17701             # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
17702              
17703 38     38   367 use constant DEBUG_GRIND => 0;
  38         121  
  38         14154  
17704              
17705             sub check_grind_input {
17706              
17707             # Check for valid input to sub grind_batch_of_CODE. An error here
17708             # would most likely be due to an error in 'sub store_token_to_go'.
17709 0     0 0 0 my ($self) = @_;
17710              
17711             # Be sure there are tokens in the batch
17712 0 0       0 if ( $max_index_to_go < 0 ) {
17713 0         0 Fault(<<EOM);
17714             sub grind incorrectly called with max_index_to_go=$max_index_to_go
17715             EOM
17716             }
17717 0         0 my $Klimit = $self->[_Klimit_];
17718              
17719             # The local batch tokens must be a continuous part of the global token
17720             # array.
17721 0         0 my $KK;
17722 0         0 foreach my $ii ( 0 .. $max_index_to_go ) {
17723              
17724 0         0 my $Km = $KK;
17725              
17726 0         0 $KK = $K_to_go[$ii];
17727 0 0 0     0 if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
      0        
17728 0 0       0 $KK = '(undef)' unless defined($KK);
17729 0         0 Fault(<<EOM);
17730             at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
17731             EOM
17732             }
17733              
17734 0 0 0     0 if ( $ii > 0 && $KK != $Km + 1 ) {
17735 0         0 my $im = $ii - 1;
17736 0         0 Fault(<<EOM);
17737             Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
17738             EOM
17739             }
17740             }
17741 0         0 return;
17742             } ## end sub check_grind_input
17743              
17744             # This filter speeds up a critical if-test
17745             my %quick_filter;
17746              
17747             BEGIN {
17748 38     38   555 my @q = qw# L { ( [ R ] ) } ? : f => #;
17749 38         162 push @q, ',';
17750 38         189152 @quick_filter{@q} = (1) x scalar(@q);
17751             }
17752              
17753             sub grind_batch_of_CODE {
17754              
17755 4547     4547 0 8164 my ($self) = @_;
17756              
17757             #-----------------------------------------------------------------
17758             # This sub directs the formatting of one complete batch of tokens.
17759             # The tokens of the batch are in the '_to_go' arrays.
17760             #-----------------------------------------------------------------
17761              
17762 4547         7958 my $this_batch = $self->[_this_batch_];
17763 4547         8776 $this_batch->[_peak_batch_size_] = $peak_batch_size;
17764 4547         8426 $this_batch->[_batch_count_] = ++$batch_count;
17765              
17766 4547         6313 $self->check_grind_input() if (DEVEL_MODE);
17767              
17768             # This routine is only called from sub flush_batch_of_code, so that
17769             # routine is a better spot for debugging.
17770 4547         6226 DEBUG_GRIND && do {
17771             my $token = my $type = EMPTY_STRING;
17772             if ( $max_index_to_go >= 0 ) {
17773             $token = $tokens_to_go[$max_index_to_go];
17774             $type = $types_to_go[$max_index_to_go];
17775             }
17776             my $output_str = EMPTY_STRING;
17777             if ( $max_index_to_go > 20 ) {
17778             my $mm = $max_index_to_go - 10;
17779             $output_str =
17780             join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... "
17781             . join( EMPTY_STRING,
17782             @tokens_to_go[ $mm .. $max_index_to_go ] );
17783             }
17784             else {
17785             $output_str = join EMPTY_STRING,
17786             @tokens_to_go[ 0 .. $max_index_to_go ];
17787             }
17788             print STDERR <<EOM;
17789             grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
17790             $output_str
17791             EOM
17792             };
17793              
17794             # Remove any trailing blank, which is possible (c192 has example)
17795 4547 100 66     17466 if ( $max_index_to_go >= 0 && $types_to_go[$max_index_to_go] eq 'b' ) {
17796 220         532 $max_index_to_go -= 1;
17797             }
17798              
17799 4547 50       9605 return if ( $max_index_to_go < 0 );
17800              
17801 4547         7059 my $lp_object_count_this_batch;
17802 4547 100       8918 if ($rOpts_line_up_parentheses) {
17803 302         963 $this_batch->[_lp_object_count_this_batch_] =
17804             $lp_object_count_this_batch = $self->set_lp_indentation();
17805             }
17806              
17807             #-----------------------------------------------------------
17808             # Shortcut for block comments. But not for block comments
17809             # with lp because they must use the lp corrector step below.
17810             #-----------------------------------------------------------
17811 4547 100 100     15686 if ( !$max_index_to_go
      100        
17812             && $types_to_go[0] eq '#'
17813             && !$lp_object_count_this_batch )
17814             {
17815 629         1181 my $ibeg = 0;
17816 629         1649 $this_batch->[_ri_first_] = [$ibeg];
17817 629         1503 $this_batch->[_ri_last_] = [$ibeg];
17818              
17819 629         2870 $self->convey_batch_to_vertical_aligner();
17820              
17821 629         1537 my $level = $levels_to_go[$ibeg];
17822 629         1465 $self->[_last_line_leading_type_] = $types_to_go[$ibeg];
17823 629         1323 $self->[_last_line_leading_level_] = $level;
17824 629         1323 $nonblank_lines_at_depth[$level] = 1;
17825 629         1314 return;
17826             }
17827              
17828             #-------------
17829             # Normal route
17830             #-------------
17831              
17832 3918         7485 my $rLL = $self->[_rLL_];
17833              
17834             #-------------------------------------------------------
17835             # Loop over the batch to initialize some batch variables
17836             #-------------------------------------------------------
17837 3918         6320 my $comma_count_in_batch = 0;
17838 3918         9113 my @colon_list;
17839             my @ix_seqno_controlling_ci;
17840 3918         0 my %comma_arrow_count;
17841 3918         5608 my $comma_arrow_count_contained = 0;
17842 3918         9049 my @unmatched_closing_indexes_in_this_batch;
17843             my @unmatched_opening_indexes_in_this_batch;
17844              
17845 3918         0 my @i_for_semicolon;
17846 3918         9087 foreach my $i ( 0 .. $max_index_to_go ) {
17847              
17848 53678 100       93727 if ( $types_to_go[$i] eq 'b' ) {
17849 18671         30427 $inext_to_go[$i] = $inext_to_go[ $i - 1 ] = $i + 1;
17850 18671         26792 next;
17851             }
17852              
17853 35007         49820 $inext_to_go[$i] = $i + 1;
17854              
17855             # This is an optional shortcut to save a bit of time by skipping
17856             # most tokens. Note: the filter may need to be updated if the
17857             # next 'if' tests are ever changed to include more token types.
17858 35007 100       71073 next if ( !$quick_filter{ $types_to_go[$i] } );
17859              
17860 13028         19469 my $type = $types_to_go[$i];
17861              
17862             # gather info needed by sub break_long_lines
17863 13028 100       26708 if ( $type_sequence_to_go[$i] ) {
    100          
    100          
    50          
17864 9062         13437 my $seqno = $type_sequence_to_go[$i];
17865 9062         12911 my $token = $tokens_to_go[$i];
17866              
17867             # remember indexes of any tokens controlling xci
17868             # in this batch. This list is needed by sub undo_ci.
17869 9062 100       17734 if ( $self->[_ris_seqno_controlling_ci_]->{$seqno} ) {
17870 120         216 push @ix_seqno_controlling_ci, $i;
17871             }
17872              
17873 9062 100       22237 if ( $is_opening_sequence_token{$token} ) {
    50          
17874 4531 100       9374 if ( $self->[_rbreak_container_]->{$seqno} ) {
17875 22         122 $self->set_forced_breakpoint($i);
17876             }
17877 4531         7944 push @unmatched_opening_indexes_in_this_batch, $i;
17878 4531 100       10863 if ( $type eq '?' ) {
17879 186         668 push @colon_list, $type;
17880             }
17881             }
17882             elsif ( $is_closing_sequence_token{$token} ) {
17883              
17884 4531 100 100     15518 if ( $i > 0 && $self->[_rbreak_container_]->{$seqno} ) {
17885 3         12 $self->set_forced_breakpoint( $i - 1 );
17886             }
17887              
17888 4531         7320 my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
17889 4531 100 66     14125 if ( defined($i_mate) && $i_mate >= 0 ) {
17890 3717 50       7374 if ( $type_sequence_to_go[$i_mate] ==
17891             $type_sequence_to_go[$i] )
17892             {
17893 3717         6388 $mate_index_to_go[$i] = $i_mate;
17894 3717         5770 $mate_index_to_go[$i_mate] = $i;
17895 3717         5792 my $cac = $comma_arrow_count{$seqno};
17896 3717 100       7624 $comma_arrow_count_contained += $cac if ($cac);
17897             }
17898             else {
17899 0         0 push @unmatched_opening_indexes_in_this_batch,
17900             $i_mate;
17901 0         0 push @unmatched_closing_indexes_in_this_batch, $i;
17902             }
17903             }
17904             else {
17905 814         1915 push @unmatched_closing_indexes_in_this_batch, $i;
17906             }
17907 4531 100       10855 if ( $type eq ':' ) {
17908 186         642 push @colon_list, $type;
17909             }
17910             } ## end elsif ( $is_closing_sequence_token...)
17911              
17912             } ## end if ($seqno)
17913              
17914 2916         4772 elsif ( $type eq ',' ) { $comma_count_in_batch++; }
17915             elsif ( $type eq '=>' ) {
17916 1016 100       2477 if (@unmatched_opening_indexes_in_this_batch) {
17917 948         1496 my $j = $unmatched_opening_indexes_in_this_batch[-1];
17918 948         1564 my $seqno = $type_sequence_to_go[$j];
17919 948         2219 $comma_arrow_count{$seqno}++;
17920             }
17921             }
17922             elsif ( $type eq 'f' ) {
17923 34         78 push @i_for_semicolon, $i;
17924             }
17925              
17926             } ## end for ( my $i = 0 ; $i <=...)
17927              
17928             # Break at a single interior C-style for semicolon in this batch (c154)
17929 3918 100 100     11442 if ( @i_for_semicolon && @i_for_semicolon == 1 ) {
17930 2         8 my $i = $i_for_semicolon[0];
17931 2         11 my $inext = $inext_to_go[$i];
17932 2 50 33     35 if ( $inext <= $max_index_to_go && $types_to_go[$inext] ne '#' ) {
17933 2         20 $self->set_forced_breakpoint($i);
17934             }
17935             }
17936              
17937 3918         7505 my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
17938             @unmatched_closing_indexes_in_this_batch;
17939              
17940 3918 100       8508 if (@unmatched_opening_indexes_in_this_batch) {
17941 711         2142 $this_batch->[_runmatched_opening_indexes_] =
17942             \@unmatched_opening_indexes_in_this_batch;
17943             }
17944              
17945 3918 100       8109 if (@ix_seqno_controlling_ci) {
17946 40         90 $this_batch->[_rix_seqno_controlling_ci_] =
17947             \@ix_seqno_controlling_ci;
17948             }
17949              
17950             #------------------------
17951             # Set special breakpoints
17952             #------------------------
17953             # If this line ends in a code block brace, set breaks at any
17954             # previous closing code block braces to breakup a chain of code
17955             # blocks on one line. This is very rare but can happen for
17956             # user-defined subs. For example we might be looking at this:
17957             # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
17958 3918         5973 my $saw_good_break; # flag to force breaks even if short line
17959 3918 100 100     13437 if (
      100        
17960              
17961             # looking for opening or closing block brace
17962             $block_type_to_go[$max_index_to_go]
17963              
17964             # never any good breaks if just one token
17965             && $max_index_to_go > 0
17966              
17967             # but not one of these which are never duplicated on a line:
17968             # until|while|for|if|elsif|else
17969             && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
17970             }
17971             )
17972             {
17973 354         790 my $lev = $nesting_depth_to_go[$max_index_to_go];
17974              
17975             # Walk backwards from the end and
17976             # set break at any closing block braces at the same level.
17977             # But quit if we are not in a chain of blocks.
17978 354         1414 foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) {
17979 749 100       1734 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
17980 727 50       1586 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
17981              
17982 727 50       3550 if ( $block_type_to_go[$i] ) {
    100          
17983 0 0       0 if ( $tokens_to_go[$i] eq '}' ) {
17984 0         0 $self->set_forced_breakpoint($i);
17985 0         0 $saw_good_break = 1;
17986             }
17987             }
17988              
17989             # quit if we see anything besides words, function, blanks
17990             # at this level
17991 276         677 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
17992             }
17993             }
17994              
17995             #-----------------------------------------------
17996             # insertion of any blank lines before this batch
17997             #-----------------------------------------------
17998              
17999 3918         6410 my $imin = 0;
18000 3918         6200 my $imax = $max_index_to_go;
18001              
18002             # trim any blank tokens - for safety, but should not be necessary
18003 3918 50       8683 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
  0         0  
18004 3918 50       8635 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
  0         0  
18005              
18006 3918 50       8421 if ( $imin > $imax ) {
18007 0         0 if (DEVEL_MODE) {
18008             my $K0 = $K_to_go[0];
18009             my $lno = EMPTY_STRING;
18010             if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
18011             Fault(<<EOM);
18012             Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
18013             EOM
18014             }
18015 0         0 return;
18016             }
18017              
18018 3918         7203 my $last_line_leading_type = $self->[_last_line_leading_type_];
18019 3918         6408 my $last_line_leading_level = $self->[_last_line_leading_level_];
18020              
18021 3918         6851 my $leading_type = $types_to_go[0];
18022 3918         6840 my $leading_level = $levels_to_go[0];
18023              
18024             # add blank line(s) before certain key types but not after a comment
18025 3918 100       9269 if ( $last_line_leading_type ne '#' ) {
18026 3069         5147 my $blank_count = 0;
18027 3069         5369 my $leading_token = $tokens_to_go[0];
18028              
18029             # break before certain key blocks except one-liners
18030 3069 100       11518 if ( $leading_type eq 'k' ) {
    100          
    100          
18031 1138 100 100     8968 if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
    100 66        
18032 7 100       26 $blank_count = $rOpts->{'blank-lines-before-subs'}
18033             if ( terminal_type_i( 0, $max_index_to_go ) ne '}' );
18034             }
18035              
18036             # Break before certain block types if we haven't had a
18037             # break at this level for a while. This is the
18038             # difficult decision..
18039             elsif ($last_line_leading_type ne 'b'
18040             && $is_if_unless_while_until_for_foreach{$leading_token} )
18041             {
18042 102         304 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
18043 102 50       398 if ( !defined($lc) ) { $lc = 0 }
  0         0  
18044              
18045             # patch for RT #128216: no blank line inserted at a level
18046             # change
18047 102 100       338 if ( $levels_to_go[0] != $last_line_leading_level ) {
18048 32         95 $lc = 0;
18049             }
18050              
18051 102 50 100     782 if ( $rOpts->{'blanks-before-blocks'}
      66        
      66        
18052             && $lc >= $rOpts->{'long-block-line-count'}
18053             && $self->consecutive_nonblank_lines() >=
18054             $rOpts->{'long-block-line-count'}
18055             && terminal_type_i( 0, $max_index_to_go ) ne '}' )
18056             {
18057 1         2 $blank_count = 1;
18058             }
18059             }
18060             }
18061              
18062             # blank lines before subs except declarations and one-liners
18063             elsif ( $leading_type eq 'i' ) {
18064             my $special_identifier =
18065 511         1451 $self->[_ris_special_identifier_token_]->{$leading_token};
18066 511 100       1498 if ($special_identifier) {
18067             ## $leading_token =~ /$SUB_PATTERN/
18068 73 100       278 if ( $special_identifier eq 'sub' ) {
    50          
18069              
18070 55 100       262 $blank_count = $rOpts->{'blank-lines-before-subs'}
18071             if ( terminal_type_i( 0, $max_index_to_go ) !~
18072             /^[\;\}\,]$/ );
18073             }
18074              
18075             # break before all package declarations
18076             ## substr( $leading_token, 0, 8 ) eq 'package '
18077             elsif ( $special_identifier eq 'package' ) {
18078              
18079             # ... except in a very short eval block
18080 18         35 my $pseqno = $parent_seqno_to_go[0];
18081             $blank_count = $rOpts->{'blank-lines-before-packages'}
18082             if (
18083 18 50       57 !$self->[_ris_short_broken_eval_block_]->{$pseqno}
18084             );
18085             }
18086             }
18087             }
18088              
18089             # Check for blank lines wanted before a closing brace
18090             elsif ( $leading_token eq '}' ) {
18091 601 50 66     2105 if ( $rOpts->{'blank-lines-before-closing-block'}
      33        
18092             && $block_type_to_go[0]
18093             && $block_type_to_go[0] =~
18094             /$blank_lines_before_closing_block_pattern/ )
18095             {
18096 2         7 my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
18097 2 50       6 if ( $nblanks > $blank_count ) {
18098 2         5 $blank_count = $nblanks;
18099             }
18100             }
18101             }
18102              
18103 3069 100       7129 if ($blank_count) {
18104              
18105             # future: send blank line down normal path to VerticalAligner?
18106 43         161 $self->flush_vertical_aligner();
18107 43         114 my $file_writer_object = $self->[_file_writer_object_];
18108 43         208 $file_writer_object->require_blank_code_lines($blank_count);
18109             }
18110             }
18111              
18112             # update blank line variables and count number of consecutive
18113             # non-blank, non-comment lines at this level
18114 3918 100 100     17778 if ( $leading_level == $last_line_leading_level
      100        
18115             && $leading_type ne '#'
18116             && defined( $nonblank_lines_at_depth[$leading_level] ) )
18117             {
18118 2295         4007 $nonblank_lines_at_depth[$leading_level]++;
18119             }
18120             else {
18121 1623         3073 $nonblank_lines_at_depth[$leading_level] = 1;
18122             }
18123              
18124 3918         6921 $self->[_last_line_leading_type_] = $leading_type;
18125 3918         6420 $self->[_last_line_leading_level_] = $leading_level;
18126              
18127             #--------------------------
18128             # scan lists and long lines
18129             #--------------------------
18130              
18131             # Flag to remember if we called sub 'pad_array_to_go'.
18132             # Some routines (break_lists(), break_long_lines() ) need some
18133             # extra tokens added at the end of the batch. Most batches do not
18134             # use these routines, so we will avoid calling 'pad_array_to_go'
18135             # unless it is needed.
18136 3918         9477 my $called_pad_array_to_go;
18137              
18138             # set all forced breakpoints for good list formatting
18139             my $is_long_line;
18140 3918         0 my $multiple_old_lines_in_batch;
18141 3918 100       8894 if ( $max_index_to_go > 0 ) {
    100          
18142 3263         9842 $is_long_line =
18143             $self->excess_line_length( $imin, $max_index_to_go ) > 0;
18144              
18145 3263         6056 my $Kbeg = $K_to_go[0];
18146 3263         5555 my $Kend = $K_to_go[$max_index_to_go];
18147 3263         7750 $multiple_old_lines_in_batch =
18148             $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
18149             }
18150              
18151             # Optional optimization: avoid calling break_lists for a single block
18152             # brace. This is done by turning off the flag $is_unbalanced_batch.
18153             elsif ($is_unbalanced_batch) {
18154 493         1077 my $block_type = $block_type_to_go[0];
18155 493 100 100     3258 if ( $block_type
      100        
18156             && !$lp_object_count_this_batch
18157             && $is_block_without_semicolon{$block_type} )
18158             {
18159             # opening blocks can skip break_lists call if no commas in
18160             # container.
18161 191 100       642 if ( $leading_type eq '{' ) {
18162 14         35 my $seqno = $type_sequence_to_go[0];
18163 14         52 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$seqno};
18164 14 50       68 if ($rtype_count) {
18165 14         39 my $comma_count = $rtype_count->{','};
18166 14 50       49 if ( !$comma_count ) {
18167 14         31 $is_unbalanced_batch = 0;
18168             }
18169             }
18170             }
18171              
18172             # closing block braces can be skipped
18173             else {
18174 177         1284 $is_unbalanced_batch = 0;
18175             }
18176              
18177             }
18178             }
18179              
18180 3918         7135 my $rbond_strength_bias = [];
18181 3918 100 100     29059 if (
      100        
      33        
      66        
      66        
      66        
      66        
18182             $is_long_line
18183             || $multiple_old_lines_in_batch
18184              
18185             # must always call break_lists() with unbalanced batches because
18186             # it is maintaining some stacks
18187             || $is_unbalanced_batch
18188              
18189             # call break_lists if we might want to break at commas
18190             || (
18191             $comma_count_in_batch
18192             && ( $rOpts_maximum_fields_per_table > 0
18193             && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
18194             || $rOpts_comma_arrow_breakpoints == 0 )
18195             )
18196              
18197             # call break_lists if user may want to break open some one-line
18198             # hash references
18199             || ( $comma_arrow_count_contained
18200             && $rOpts_comma_arrow_breakpoints != 3 )
18201             )
18202             {
18203             # add a couple of extra terminal blank tokens
18204 1738         6648 $self->pad_array_to_go();
18205 1738         2866 $called_pad_array_to_go = 1;
18206              
18207 1738         6006 my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias );
18208 1738   66     5926 $saw_good_break ||= $sgb;
18209             }
18210              
18211             # let $ri_first and $ri_last be references to lists of
18212             # first and last tokens of line fragments to output..
18213 3918         7176 my ( $ri_first, $ri_last );
18214              
18215             #-----------------------------
18216             # a single token uses one line
18217             #-----------------------------
18218 3918 100       8278 if ( !$max_index_to_go ) {
18219 655         1506 $ri_first = [$imin];
18220 655         1474 $ri_last = [$imax];
18221             }
18222              
18223             # for multiple tokens
18224             else {
18225              
18226             #-------------------------
18227             # write a single line if..
18228             #-------------------------
18229 3263 100 100     17992 if (
18230             (
18231              
18232             # this line is 'short'
18233             !$is_long_line
18234              
18235             # and we didn't see a good breakpoint
18236             && !$saw_good_break
18237              
18238             # and we don't already have an interior breakpoint
18239             && !$forced_breakpoint_count
18240             )
18241              
18242             # or, we aren't allowed to add any newlines
18243             || !$rOpts_add_newlines
18244              
18245             )
18246             {
18247 2153         4653 $ri_first = [$imin];
18248 2153         4516 $ri_last = [$imax];
18249             }
18250              
18251             #-----------------------------
18252             # otherwise use multiple lines
18253             #-----------------------------
18254             else {
18255              
18256             # add a couple of extra terminal blank tokens if we haven't
18257             # already done so
18258 1110 50       2673 $self->pad_array_to_go() unless ($called_pad_array_to_go);
18259              
18260 1110         5044 ( $ri_first, $ri_last, my $rbond_strength_to_go ) =
18261             $self->break_long_lines( $saw_good_break, \@colon_list,
18262             $rbond_strength_bias );
18263              
18264 1110         4961 $self->break_all_chain_tokens( $ri_first, $ri_last );
18265              
18266             $self->break_equals( $ri_first, $ri_last )
18267 1110 100       1859 if @{$ri_first} >= 3;
  1110         5157  
18268              
18269             # now we do a correction step to clean this up a bit
18270             # (The only time we would not do this is for debugging)
18271             $self->recombine_breakpoints( $ri_first, $ri_last,
18272             $rbond_strength_to_go )
18273 1110 100 100     3784 if ( $rOpts_recombine && @{$ri_first} > 1 );
  1081         5670  
18274              
18275 1110 100       5341 $self->insert_final_ternary_breaks( $ri_first, $ri_last )
18276             if (@colon_list);
18277             }
18278              
18279 3263 100 66     9482 $self->insert_breaks_before_list_opening_containers( $ri_first,
18280             $ri_last )
18281             if ( %break_before_container_types && $max_index_to_go > 0 );
18282              
18283             # Check for a phantom semicolon at the end of the batch
18284 3263 100 66     9359 if ( !$token_lengths_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
18285 18         144 $self->unmask_phantom_token($imax);
18286             }
18287              
18288 3263 100       7438 if ( $rOpts_one_line_block_semicolons == 0 ) {
18289 6         20 $self->delete_one_line_semicolons( $ri_first, $ri_last );
18290             }
18291              
18292             # Remember the largest batch size processed. This is needed by the
18293             # logical padding routine to avoid padding the first nonblank token
18294 3263 100       7698 if ( $max_index_to_go > $peak_batch_size ) {
18295 953         1933 $peak_batch_size = $max_index_to_go;
18296             }
18297             }
18298              
18299             #-------------------
18300             # -lp corrector step
18301             #-------------------
18302 3918 100       8543 if ($lp_object_count_this_batch) {
18303 134         516 $self->correct_lp_indentation( $ri_first, $ri_last );
18304             }
18305              
18306             #--------------------
18307             # ship this batch out
18308             #--------------------
18309 3918         7252 $this_batch->[_ri_first_] = $ri_first;
18310 3918         6247 $this_batch->[_ri_last_] = $ri_last;
18311              
18312 3918         12968 $self->convey_batch_to_vertical_aligner();
18313              
18314             #-------------------------------------------------------------------
18315             # Write requested number of blank lines after an opening block brace
18316             #-------------------------------------------------------------------
18317 3918 100       10266 if ($rOpts_blank_lines_after_opening_block) {
18318 6         9 my $iterm = $imax;
18319 6 50 33     22 if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
18320 0         0 $iterm -= 1;
18321 0 0 0     0 if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
18322 0         0 $iterm -= 1;
18323             }
18324             }
18325              
18326 6 50 66     50 if ( $types_to_go[$iterm] eq '{'
      33        
18327             && $block_type_to_go[$iterm]
18328             && $block_type_to_go[$iterm] =~
18329             /$blank_lines_after_opening_block_pattern/ )
18330             {
18331 2         7 my $nblanks = $rOpts_blank_lines_after_opening_block;
18332 2         8 $self->flush_vertical_aligner();
18333 2         4 my $file_writer_object = $self->[_file_writer_object_];
18334 2         8 $file_writer_object->require_blank_code_lines($nblanks);
18335             }
18336             }
18337              
18338 3918         13626 return;
18339             } ## end sub grind_batch_of_CODE
18340              
18341             sub iprev_to_go {
18342 4085     4085 0 7535 my ($i) = @_;
18343              
18344             # Given index $i of a token in the '_to_go' arrays, return
18345             # the index of the previous nonblank token.
18346 4085 100 100     16220 return $i - 1 > 0
18347             && $types_to_go[ $i - 1 ] eq 'b' ? $i - 2 : $i - 1;
18348             }
18349              
18350             sub unmask_phantom_token {
18351 18     18 0 70 my ( $self, $iend ) = @_;
18352              
18353             # Turn a phantom token into a real token.
18354              
18355             # Input parameter:
18356             # $iend = the index in the output batch array of this token.
18357              
18358             # Phantom tokens are specially marked token types (such as ';') with
18359             # no token text which only become real tokens if they occur at the end
18360             # of an output line. At one time phantom ',' tokens were handled
18361             # here, but now they are processed elsewhere.
18362              
18363 18         60 my $rLL = $self->[_rLL_];
18364 18         50 my $KK = $K_to_go[$iend];
18365 18         72 my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
18366              
18367 18         47 my $type = $types_to_go[$iend];
18368 18 50       69 return unless ( $type eq ';' );
18369 18         44 my $tok = $type;
18370 18         45 my $tok_len = length($tok);
18371 18 50       77 if ( $want_left_space{$type} != WS_NO ) {
18372 0         0 $tok = SPACE . $tok;
18373 0         0 $tok_len += 1;
18374             }
18375              
18376 18         61 $tokens_to_go[$iend] = $tok;
18377 18         41 $token_lengths_to_go[$iend] = $tok_len;
18378              
18379 18         43 $rLL->[$KK]->[_TOKEN_] = $tok;
18380 18         56 $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
18381              
18382 18         104 $self->note_added_semicolon($line_number);
18383              
18384             # This changes the summed lengths of the rest of this batch
18385 18         79 foreach ( $iend .. $max_index_to_go ) {
18386 18         68 $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
18387             }
18388 18         40 return;
18389             } ## end sub unmask_phantom_token
18390              
18391             sub save_opening_indentation {
18392              
18393             # This should be called after each batch of tokens is output. It
18394             # saves indentations of lines of all unmatched opening tokens.
18395             # These will be used by sub get_opening_indentation.
18396              
18397 839     839 0 2485 my ( $self, $ri_first, $ri_last, $rindentation_list,
18398             $runmatched_opening_indexes )
18399             = @_;
18400              
18401 839 100       2396 $runmatched_opening_indexes = []
18402             if ( !defined($runmatched_opening_indexes) );
18403              
18404             # QW INDENTATION PATCH 1:
18405             # Also save indentation for multiline qw quotes
18406 839         1631 my @i_qw;
18407             my $seqno_qw_opening;
18408 839 100       2566 if ( $types_to_go[$max_index_to_go] eq 'q' ) {
18409 149         301 my $KK = $K_to_go[$max_index_to_go];
18410             $seqno_qw_opening =
18411 149         335 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
18412 149 100       401 if ($seqno_qw_opening) {
18413 32         89 push @i_qw, $max_index_to_go;
18414             }
18415             }
18416              
18417             # we need to save indentations of any unmatched opening tokens
18418             # in this batch because we may need them in a subsequent batch.
18419 839         1503 foreach ( @{$runmatched_opening_indexes}, @i_qw ) {
  839         2484  
18420              
18421 846         1928 my $seqno = $type_sequence_to_go[$_];
18422              
18423 846 100       2007 if ( !$seqno ) {
18424 32 50 33     231 if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
18425 32         99 $seqno = $seqno_qw_opening;
18426             }
18427             else {
18428              
18429             # shouldn't happen
18430 0         0 $seqno = 'UNKNOWN';
18431 0         0 DEVEL_MODE && Fault("unable to find sequence number\n");
18432             }
18433             }
18434              
18435 846         2420 $saved_opening_indentation{$seqno} = [
18436             lookup_opening_indentation(
18437             $_, $ri_first, $ri_last, $rindentation_list
18438             )
18439             ];
18440             }
18441 839         1892 return;
18442             } ## end sub save_opening_indentation
18443              
18444             sub get_saved_opening_indentation {
18445 864     864 0 1831 my ($seqno) = @_;
18446 864         2109 my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
18447              
18448 864 50       1998 if ($seqno) {
18449 864 50       2371 if ( $saved_opening_indentation{$seqno} ) {
18450             ( $indent, $offset, $is_leading ) =
18451 864         1373 @{ $saved_opening_indentation{$seqno} };
  864         2243  
18452 864         1531 $exists = 1;
18453             }
18454             }
18455              
18456             # some kind of serious error it doesn't exist
18457             # (example is badfile.t)
18458              
18459 864         3093 return ( $indent, $offset, $is_leading, $exists );
18460             } ## end sub get_saved_opening_indentation
18461             } ## end closure grind_batch_of_CODE
18462              
18463             sub lookup_opening_indentation {
18464              
18465             # get the indentation of the line in the current output batch
18466             # which output a selected opening token
18467             #
18468             # given:
18469             # $i_opening - index of an opening token in the current output batch
18470             # whose line indentation we need
18471             # $ri_first - reference to list of the first index $i for each output
18472             # line in this batch
18473             # $ri_last - reference to list of the last index $i for each output line
18474             # in this batch
18475             # $rindentation_list - reference to a list containing the indentation
18476             # used for each line. (NOTE: the first slot in
18477             # this list is the last returned line number, and this is
18478             # followed by the list of indentations).
18479             #
18480             # return
18481             # -the indentation of the line which contained token $i_opening
18482             # -and its offset (number of columns) from the start of the line
18483              
18484 1396     1396 0 3219 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
18485              
18486 1396 50       2086 if ( !@{$ri_last} ) {
  1396         3338  
18487              
18488             # An error here implies a bug introduced by a recent program change.
18489             # Every batch of code has lines, so this should never happen.
18490 0         0 if (DEVEL_MODE) {
18491             Fault("Error in opening_indentation: no lines");
18492             }
18493 0         0 return ( 0, 0, 0 );
18494             }
18495              
18496 1396         2707 my $nline = $rindentation_list->[0]; # line number of previous lookup
18497              
18498             # reset line location if necessary
18499 1396 100       3533 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
18500              
18501             # find the correct line
18502 1396 50       3311 unless ( $i_opening > $ri_last->[-1] ) {
18503 1396         3841 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
  5300         8828  
18504             }
18505              
18506             # Error - token index is out of bounds - shouldn't happen
18507             # A program bug has been introduced in one of the calling routines.
18508             # We better stop here.
18509             else {
18510 0         0 my $i_last_line = $ri_last->[-1];
18511 0         0 if (DEVEL_MODE) {
18512             Fault(<<EOM);
18513             Program bug in call to lookup_opening_indentation - index out of range
18514             called with index i_opening=$i_opening > $i_last_line = max index of last line
18515             This batch has max index = $max_index_to_go,
18516             EOM
18517             }
18518 0         0 $nline = $#{$ri_last};
  0         0  
18519             }
18520              
18521 1396         2501 $rindentation_list->[0] =
18522             $nline; # save line number to start looking next call
18523 1396         2570 my $ibeg = $ri_start->[$nline];
18524 1396         3720 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
18525 1396         2872 my $is_leading = ( $ibeg == $i_opening );
18526 1396         6346 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
18527             } ## end sub lookup_opening_indentation
18528              
18529             sub terminal_type_i {
18530              
18531             # returns type of last token on this line (terminal token), as follows:
18532             # returns # for a full-line comment
18533             # returns ' ' for a blank line
18534             # otherwise returns final token type
18535              
18536 69     69 0 184 my ( $ibeg, $iend ) = @_;
18537              
18538             # Start at the end and work backwards
18539 69         135 my $i = $iend;
18540 69         146 my $type_i = $types_to_go[$i];
18541              
18542             # Check for side comment
18543 69 100       219 if ( $type_i eq '#' ) {
18544 8         45 $i--;
18545 8 50       38 if ( $i < $ibeg ) {
18546 0 0       0 return wantarray ? ( $type_i, $ibeg ) : $type_i;
18547             }
18548 8         28 $type_i = $types_to_go[$i];
18549             }
18550              
18551             # Skip past a blank
18552 69 100       225 if ( $type_i eq 'b' ) {
18553 7         15 $i--;
18554 7 50       27 if ( $i < $ibeg ) {
18555 0 0       0 return wantarray ? ( $type_i, $ibeg ) : $type_i;
18556             }
18557 7         18 $type_i = $types_to_go[$i];
18558             }
18559              
18560             # Found it..make sure it is a BLOCK termination,
18561             # but hide a terminal } after sort/map/grep/eval/do because it is not
18562             # necessarily the end of the line. (terminal.t)
18563 69         198 my $block_type = $block_type_to_go[$i];
18564 69 100 66     395 if (
      66        
18565             $type_i eq '}'
18566             && ( !$block_type
18567             || $is_sort_map_grep_eval_do{$block_type} )
18568             )
18569             {
18570 1         3 $type_i = 'b';
18571             }
18572 69 100       571 return wantarray ? ( $type_i, $i ) : $type_i;
18573             } ## end sub terminal_type_i
18574              
18575             sub pad_array_to_go {
18576              
18577             # To simplify coding in break_lists and set_bond_strengths, it helps to
18578             # create some extra blank tokens at the end of the arrays. We also add
18579             # some undef's to help guard against using invalid data.
18580 1738     1738 0 3485 my ($self) = @_;
18581 1738         3657 $K_to_go[ $max_index_to_go + 1 ] = undef;
18582 1738         3541 $tokens_to_go[ $max_index_to_go + 1 ] = EMPTY_STRING;
18583 1738         3368 $tokens_to_go[ $max_index_to_go + 2 ] = EMPTY_STRING;
18584 1738         3137 $tokens_to_go[ $max_index_to_go + 3 ] = undef;
18585 1738         3592 $types_to_go[ $max_index_to_go + 1 ] = 'b';
18586 1738         3577 $types_to_go[ $max_index_to_go + 2 ] = 'b';
18587 1738         3277 $types_to_go[ $max_index_to_go + 3 ] = undef;
18588 1738         3031 $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
18589 1738         3717 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
18590             $nesting_depth_to_go[$max_index_to_go];
18591              
18592             # /^[R\}\)\]]$/
18593 1738 100       6509 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
    100          
18594 223 50       875 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
18595              
18596             # Nesting depths are set to be >=0 in sub write_line, so it should
18597             # not be possible to get here unless the code has a bracing error
18598             # which leaves a closing brace with zero nesting depth.
18599 0 0       0 unless ( get_saw_brace_error() ) {
18600 0         0 if (DEVEL_MODE) {
18601             Fault(<<EOM);
18602             Program bug in pad_array_to_go: hit nesting error which should have been caught
18603             EOM
18604             }
18605             }
18606             }
18607             else {
18608 223         657 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
18609             }
18610             }
18611              
18612             # /^[L\{\(\[]$/
18613             elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
18614 559         1267 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
18615             }
18616 1738         3113 return;
18617             } ## end sub pad_array_to_go
18618              
18619             sub break_all_chain_tokens {
18620              
18621             # scan the current breakpoints looking for breaks at certain "chain
18622             # operators" (. : && || + etc) which often occur repeatedly in a long
18623             # statement. If we see a break at any one, break at all similar tokens
18624             # within the same container.
18625             #
18626 1110     1110 0 2738 my ( $self, $ri_left, $ri_right ) = @_;
18627              
18628 1110         4017 my %saw_chain_type;
18629             my %left_chain_type;
18630 1110         0 my %right_chain_type;
18631 1110         0 my %interior_chain_type;
18632 1110         1719 my $nmax = @{$ri_right} - 1;
  1110         2446  
18633              
18634             # scan the left and right end tokens of all lines
18635 1110         2062 my $count = 0;
18636 1110         2976 for my $n ( 0 .. $nmax ) {
18637 3984         5911 my $il = $ri_left->[$n];
18638 3984         5764 my $ir = $ri_right->[$n];
18639 3984         6159 my $typel = $types_to_go[$il];
18640 3984         5944 my $typer = $types_to_go[$ir];
18641 3984 100       7500 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
18642 3984 100       7130 $typer = '+' if ( $typer eq '-' );
18643 3984 100       7153 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
18644 3984 100       7015 $typer = '*' if ( $typer eq '/' );
18645              
18646 3984 100       7634 my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel;
18647 3984 100       6971 my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer;
18648 3984 100 100     9185 if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) {
18649 321 100       897 next if ( $typel eq '?' );
18650 255         455 push @{ $left_chain_type{$keyl} }, $il;
  255         688  
18651 255         584 $saw_chain_type{$keyl} = 1;
18652 255         1246 $count++;
18653             }
18654 3918 100 100     9940 if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) {
18655 48 100       154 next if ( $typer eq '?' );
18656 47         87 push @{ $right_chain_type{$keyr} }, $ir;
  47         141  
18657 47         114 $saw_chain_type{$keyr} = 1;
18658 47         94 $count++;
18659             }
18660             }
18661 1110 100       4349 return unless $count;
18662              
18663             # now look for any interior tokens of the same types
18664 124         406 $count = 0;
18665 124         285 my $has_interior_dot_or_plus;
18666 124         455 for my $n ( 0 .. $nmax ) {
18667 779         1193 my $il = $ri_left->[$n];
18668 779         1095 my $ir = $ri_right->[$n];
18669 779         1543 foreach my $i ( $il + 1 .. $ir - 1 ) {
18670 4184         5730 my $type = $types_to_go[$i];
18671 4184 100       6568 my $key = $type eq 'k' ? $tokens_to_go[$i] : $type;
18672 4184 100       6798 $key = '+' if ( $key eq '-' );
18673 4184 100       6708 $key = '*' if ( $key eq '/' );
18674 4184 100       7815 if ( $saw_chain_type{$key} ) {
18675 193         334 push @{ $interior_chain_type{$key} }, $i;
  193         431  
18676 193         327 $count++;
18677 193   100     695 $has_interior_dot_or_plus ||= ( $key eq '.' || $key eq '+' );
      100        
18678             }
18679             }
18680             }
18681 124 100       865 return unless $count;
18682              
18683 33         185 my @keys = keys %saw_chain_type;
18684              
18685             # quit if just ONE continuation line with leading . For example--
18686             # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
18687             # . $contents;
18688             # Fixed for b1399.
18689 33 50 66     223 if ( $has_interior_dot_or_plus && $nmax == 1 && @keys == 1 ) {
      33        
18690 0         0 return;
18691             }
18692              
18693             # now make a list of all new break points
18694 33         91 my @insert_list;
18695              
18696             # loop over all chain types
18697 33         117 foreach my $key (@keys) {
18698              
18699             # loop over all interior chain tokens
18700 41         80 foreach my $itest ( @{ $interior_chain_type{$key} } ) {
  41         122  
18701              
18702             # loop over all left end tokens of same type
18703 193 100       426 if ( $left_chain_type{$key} ) {
18704 71 50       186 next if $nobreak_to_go[ $itest - 1 ];
18705 71         96 foreach my $i ( @{ $left_chain_type{$key} } ) {
  71         148  
18706 146 100       290 next unless $self->in_same_container_i( $i, $itest );
18707 15         37 push @insert_list, $itest - 1;
18708              
18709             # Break at matching ? if this : is at a different level.
18710             # For example, the ? before $THRf_DEAD in the following
18711             # should get a break if its : gets a break.
18712             #
18713             # my $flags =
18714             # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
18715             # : ( $_ & 4 ) ? $THRf_R_DETACHED
18716             # : $THRf_R_JOINABLE;
18717 15 100 66     62 if ( $key eq ':'
18718             && $levels_to_go[$i] != $levels_to_go[$itest] )
18719             {
18720 1         6 my $i_question = $mate_index_to_go[$itest];
18721 1 50 33     8 if ( defined($i_question) && $i_question > 0 ) {
18722 1         3 push @insert_list, $i_question - 1;
18723             }
18724             }
18725 15         28 last;
18726             }
18727             }
18728              
18729             # loop over all right end tokens of same type
18730 193 100       464 if ( $right_chain_type{$key} ) {
18731 122 50       235 next if $nobreak_to_go[$itest];
18732 122         173 foreach my $i ( @{ $right_chain_type{$key} } ) {
  122         227  
18733 227 100       436 next unless $self->in_same_container_i( $i, $itest );
18734 31         86 push @insert_list, $itest;
18735              
18736             # break at matching ? if this : is at a different level
18737 31 50 33     130 if ( $key eq ':'
18738             && $levels_to_go[$i] != $levels_to_go[$itest] )
18739             {
18740 0         0 my $i_question = $mate_index_to_go[$itest];
18741 0 0       0 if ( defined($i_question) ) {
18742 0         0 push @insert_list, $i_question;
18743             }
18744             }
18745 31         67 last;
18746             }
18747             }
18748             }
18749             }
18750              
18751             # insert any new break points
18752 33 100       178 if (@insert_list) {
18753 20         108 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18754             }
18755 33         190 return;
18756             } ## end sub break_all_chain_tokens
18757              
18758             sub insert_additional_breaks {
18759              
18760             # this routine will add line breaks at requested locations after
18761             # sub break_long_lines has made preliminary breaks.
18762              
18763 101     101 0 300 my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
18764 101         205 my $i_f;
18765             my $i_l;
18766 101         218 my $line_number = 0;
18767 101         199 foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
  210         446  
  101         486  
18768              
18769 216 50       480 next if ( $nobreak_to_go[$i_break_left] );
18770              
18771 216         385 $i_f = $ri_first->[$line_number];
18772 216         347 $i_l = $ri_last->[$line_number];
18773 216         498 while ( $i_break_left >= $i_l ) {
18774 383         524 $line_number++;
18775              
18776             # shouldn't happen unless caller passes bad indexes
18777 383 50       499 if ( $line_number >= @{$ri_last} ) {
  383         774  
18778 0         0 if (DEVEL_MODE) {
18779             Fault(<<EOM);
18780             Non-fatal program bug: couldn't set break at $i_break_left
18781             EOM
18782             }
18783 0         0 return;
18784             }
18785 383         555 $i_f = $ri_first->[$line_number];
18786 383         738 $i_l = $ri_last->[$line_number];
18787             }
18788              
18789             # Do not leave a blank at the end of a line; back up if necessary
18790 216 100       515 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
  11         16  
18791              
18792 216         359 my $i_break_right = $inext_to_go[$i_break_left];
18793 216 50 66     1929 if ( $i_break_left >= $i_f
      66        
      33        
18794             && $i_break_left < $i_l
18795             && $i_break_right > $i_f
18796             && $i_break_right <= $i_l )
18797             {
18798 101         178 splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
  101         410  
18799 101         203 splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
  101         306  
18800             }
18801             }
18802 101         358 return;
18803             } ## end sub insert_additional_breaks
18804              
18805             { ## begin closure in_same_container_i
18806             my $ris_break_token;
18807             my $ris_comma_token;
18808              
18809             BEGIN {
18810              
18811             # all cases break on seeing commas at same level
18812 38     38   325 my @q = qw( => );
18813 38         160 push @q, ',';
18814 38         164 @{$ris_comma_token}{@q} = (1) x scalar(@q);
  38         243  
18815              
18816             # Non-ternary text also breaks on seeing any of qw(? : || or )
18817             # Example: we would not want to break at any of these .'s
18818             # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
18819 38         184 push @q, qw( or || ? : );
18820 38         101 @{$ris_break_token}{@q} = (1) x scalar(@q);
  38         36803  
18821             } ## end BEGIN
18822              
18823             sub in_same_container_i {
18824              
18825             # Check to see if tokens at i1 and i2 are in the same container, and
18826             # not separated by certain characters: => , ? : || or
18827             # This is an interface between the _to_go arrays to the rLL array
18828 374     374 0 606 my ( $self, $i1, $i2 ) = @_;
18829              
18830             # quick check
18831 374         560 my $parent_seqno_1 = $parent_seqno_to_go[$i1];
18832 374 100       1001 return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
18833              
18834 58 100       148 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
  52         135  
18835 58         123 my $K1 = $K_to_go[$i1];
18836 58         106 my $K2 = $K_to_go[$i2];
18837 58         124 my $rLL = $self->[_rLL_];
18838              
18839 58         111 my $depth_1 = $nesting_depth_to_go[$i1];
18840 58 50       168 return if ( $depth_1 < 0 );
18841              
18842             # Shouldn't happen since i1 and i2 have same parent:
18843 58 50       157 return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
18844              
18845             # Select character set to scan for
18846 58         121 my $type_1 = $types_to_go[$i1];
18847 58 100       163 my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
18848              
18849             # Fast preliminary loop to verify that tokens are in the same container
18850 58         102 my $KK = $K1;
18851 58         98 while (1) {
18852 326         555 $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
18853 326 100       597 last if !defined($KK);
18854 323 100       581 last if ( $KK >= $K2 );
18855 268         391 my $ii = $i1 + $KK - $K1;
18856 268         414 my $depth_i = $nesting_depth_to_go[$ii];
18857 268 50       457 return if ( $depth_i < $depth_1 );
18858 268 100       487 next if ( $depth_i > $depth_1 );
18859 51 100       139 if ( $type_1 ne ':' ) {
18860 45         88 my $tok_i = $tokens_to_go[$ii];
18861 45 50 33     213 return if ( $tok_i eq '?' || $tok_i eq ':' );
18862             }
18863             }
18864              
18865             # Slow loop checking for certain characters
18866              
18867             #-----------------------------------------------------
18868             # This is potentially a slow routine and not critical.
18869             # For safety just give up for large differences.
18870             # See test file 'infinite_loop.txt'
18871             #-----------------------------------------------------
18872 58 50       291 return if ( $i2 - $i1 > 200 );
18873              
18874 58         234 foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
18875              
18876 1668         2168 my $depth_i = $nesting_depth_to_go[$ii];
18877 1668 100       2855 next if ( $depth_i > $depth_1 );
18878 400 50       672 return if ( $depth_i < $depth_1 );
18879 400         571 my $tok_i = $tokens_to_go[$ii];
18880 400 100       906 return if ( $rbreak->{$tok_i} );
18881             }
18882 47         176 return 1;
18883             } ## end sub in_same_container_i
18884             } ## end closure in_same_container_i
18885              
18886             sub break_equals {
18887              
18888             # Look for assignment operators that could use a breakpoint.
18889             # For example, in the following snippet
18890             #
18891             # $HOME = $ENV{HOME}
18892             # || $ENV{LOGDIR}
18893             # || $pw[7]
18894             # || die "no home directory for user $<";
18895             #
18896             # we could break at the = to get this, which is a little nicer:
18897             # $HOME =
18898             # $ENV{HOME}
18899             # || $ENV{LOGDIR}
18900             # || $pw[7]
18901             # || die "no home directory for user $<";
18902             #
18903             # The logic here follows the logic in set_logical_padding, which
18904             # will add the padding in the second line to improve alignment.
18905             #
18906 501     501 0 1541 my ( $self, $ri_left, $ri_right ) = @_;
18907 501         948 my $nmax = @{$ri_right} - 1;
  501         1103  
18908 501 50       1658 return unless ( $nmax >= 2 );
18909              
18910             # scan the left ends of first two lines
18911 501         1002 my $tokbeg = EMPTY_STRING;
18912 501         952 my $depth_beg;
18913 501         1396 for my $n ( 1 .. 2 ) {
18914 532         1222 my $il = $ri_left->[$n];
18915 532         1150 my $typel = $types_to_go[$il];
18916 532         1076 my $tokenl = $tokens_to_go[$il];
18917 532 100       1612 my $keyl = $typel eq 'k' ? $tokenl : $typel;
18918              
18919 532         1160 my $has_leading_op = $is_chain_operator{$keyl};
18920 532 100       2002 return unless ($has_leading_op);
18921 50 100       202 if ( $n > 1 ) {
18922             return
18923 19 100 66     181 unless ( $tokenl eq $tokbeg
18924             && $nesting_depth_to_go[$il] eq $depth_beg );
18925             }
18926 46         106 $tokbeg = $tokenl;
18927 46         138 $depth_beg = $nesting_depth_to_go[$il];
18928             }
18929              
18930             # now look for any interior tokens of the same types
18931 15         57 my $il = $ri_left->[0];
18932 15         49 my $ir = $ri_right->[0];
18933              
18934             # now make a list of all new break points
18935 15         39 my @insert_list;
18936 15         119 foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) {
18937 132         200 my $type = $types_to_go[$i];
18938 132 100 66     338 if ( $is_assignment{$type}
18939             && $nesting_depth_to_go[$i] eq $depth_beg )
18940             {
18941 1 50       3 if ( $want_break_before{$type} ) {
18942 0         0 push @insert_list, $i - 1;
18943             }
18944             else {
18945 1         3 push @insert_list, $i;
18946             }
18947             }
18948             }
18949              
18950             # Break after a 'return' followed by a chain of operators
18951             # return ( $^O !~ /win32|dos/i )
18952             # && ( $^O ne 'VMS' )
18953             # && ( $^O ne 'OS2' )
18954             # && ( $^O ne 'MacOS' );
18955             # To give:
18956             # return
18957             # ( $^O !~ /win32|dos/i )
18958             # && ( $^O ne 'VMS' )
18959             # && ( $^O ne 'OS2' )
18960             # && ( $^O ne 'MacOS' );
18961 15         94 my $i = 0;
18962 15 100 100     148 if ( $types_to_go[$i] eq 'k'
      66        
      100        
18963             && $tokens_to_go[$i] eq 'return'
18964             && $ir > $il
18965             && $nesting_depth_to_go[$i] eq $depth_beg )
18966             {
18967 4         15 push @insert_list, $i;
18968             }
18969              
18970 15 100       69 return unless (@insert_list);
18971              
18972             # One final check...
18973             # scan second and third lines and be sure there are no assignments
18974             # we want to avoid breaking at an = to make something like this:
18975             # unless ( $icon =
18976             # $html_icons{"$type-$state"}
18977             # or $icon = $html_icons{$type}
18978             # or $icon = $html_icons{$state} )
18979 5         22 for my $n ( 1 .. 2 ) {
18980 10         21 my $il_n = $ri_left->[$n];
18981 10         17 my $ir_n = $ri_right->[$n];
18982 10         38 foreach my $i ( $il_n + 1 .. $ir_n ) {
18983 100         131 my $type = $types_to_go[$i];
18984             return
18985 100 50 33     216 if ( $is_assignment{$type}
18986             && $nesting_depth_to_go[$i] eq $depth_beg );
18987             }
18988             }
18989              
18990             # ok, insert any new break point
18991 5 50       40 if (@insert_list) {
18992 5         28 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18993             }
18994 5         22 return;
18995             } ## end sub break_equals
18996              
18997             { ## begin closure recombine_breakpoints
18998              
18999             # This routine is called once per batch to see if it would be better
19000             # to combine some of the lines into which the batch has been broken.
19001              
19002             my %is_amp_amp;
19003             my %is_math_op;
19004             my %is_plus_minus;
19005             my %is_mult_div;
19006              
19007             BEGIN {
19008              
19009 38     38   202 my @q;
19010 38         224 @q = qw( && || );
19011 38         236 @is_amp_amp{@q} = (1) x scalar(@q);
19012              
19013 38         193 @q = qw( + - * / );
19014 38         180 @is_math_op{@q} = (1) x scalar(@q);
19015              
19016 38         136 @q = qw( + - );
19017 38         110 @is_plus_minus{@q} = (1) x scalar(@q);
19018              
19019 38         104 @q = qw( * / );
19020 38         29665 @is_mult_div{@q} = (1) x scalar(@q);
19021             } ## end BEGIN
19022              
19023             sub Debug_dump_breakpoints {
19024              
19025             # Debug routine to dump current breakpoints...not normally called
19026             # We are given indexes to the current lines:
19027             # $ri_beg = ref to array of BEGinning indexes of each line
19028             # $ri_end = ref to array of ENDing indexes of each line
19029 0     0 0 0 my ( $self, $ri_beg, $ri_end, $msg ) = @_;
19030 0         0 print STDERR "----Dumping breakpoints from: $msg----\n";
19031 0         0 for my $n ( 0 .. @{$ri_end} - 1 ) {
  0         0  
19032 0         0 my $ibeg = $ri_beg->[$n];
19033 0         0 my $iend = $ri_end->[$n];
19034 0         0 my $text = EMPTY_STRING;
19035 0         0 foreach my $i ( $ibeg .. $iend ) {
19036 0         0 $text .= $tokens_to_go[$i];
19037             }
19038 0         0 print STDERR "$n ($ibeg:$iend) $text\n";
19039             }
19040 0         0 print STDERR "----\n";
19041 0         0 return;
19042             } ## end sub Debug_dump_breakpoints
19043              
19044             sub delete_one_line_semicolons {
19045              
19046 6     6 0 15 my ( $self, $ri_beg, $ri_end ) = @_;
19047 6         13 my $rLL = $self->[_rLL_];
19048 6         9 my $K_opening_container = $self->[_K_opening_container_];
19049              
19050             # Walk down the lines of this batch and delete any semicolons
19051             # terminating one-line blocks;
19052 6         9 my $nmax = @{$ri_end} - 1;
  6         13  
19053              
19054 6         24 foreach my $n ( 0 .. $nmax ) {
19055 6         11 my $i_beg = $ri_beg->[$n];
19056 6         11 my $i_e = $ri_end->[$n];
19057 6         11 my $K_beg = $K_to_go[$i_beg];
19058 6         10 my $K_e = $K_to_go[$i_e];
19059 6         13 my $K_end = $K_e;
19060 6         14 my $type_end = $rLL->[$K_end]->[_TYPE_];
19061 6 100       17 if ( $type_end eq '#' ) {
19062 2         16 $K_end = $self->K_previous_nonblank($K_end);
19063 2 50       13 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
  2         5  
19064             }
19065              
19066             # we are looking for a line ending in closing brace
19067             next
19068 6 50 33     33 unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
19069              
19070             # ...and preceded by a semicolon on the same line
19071 6         16 my $K_semicolon = $self->K_previous_nonblank($K_end);
19072 6 50       16 next unless defined($K_semicolon);
19073 6         11 my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
19074 6 50       14 next if ( $i_semicolon <= $i_beg );
19075 6 50       16 next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
19076              
19077             # Safety check - shouldn't happen - not critical
19078             # This is not worth throwing a Fault, except in DEVEL_MODE
19079 6 50       16 if ( $types_to_go[$i_semicolon] ne ';' ) {
19080 0         0 DEVEL_MODE
19081             && Fault("unexpected type looking for semicolon");
19082 0         0 next;
19083             }
19084              
19085             # ... with the corresponding opening brace on the same line
19086 6         13 my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
19087 6         13 my $K_opening = $K_opening_container->{$type_sequence};
19088 6 50       17 next unless ( defined($K_opening) );
19089 6         10 my $i_opening = $i_beg + ( $K_opening - $K_beg );
19090 6 50       14 next if ( $i_opening < $i_beg );
19091              
19092             # ... and only one semicolon between these braces
19093 6         11 my $semicolon_count = 0;
19094 6         15 foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
19095 22 100       48 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
19096 2         9 $semicolon_count++;
19097 2         6 last;
19098             }
19099             }
19100 6 100       19 next if ($semicolon_count);
19101              
19102             # ...ok, then make the semicolon invisible
19103 4         6 my $len = $token_lengths_to_go[$i_semicolon];
19104 4         8 $tokens_to_go[$i_semicolon] = EMPTY_STRING;
19105 4         6 $token_lengths_to_go[$i_semicolon] = 0;
19106 4         8 $rLL->[$K_semicolon]->[_TOKEN_] = EMPTY_STRING;
19107 4         6 $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
19108 4         10 foreach ( $i_semicolon .. $max_index_to_go ) {
19109 16         31 $summed_lengths_to_go[ $_ + 1 ] -= $len;
19110             }
19111             }
19112 6         10 return;
19113             } ## end sub delete_one_line_semicolons
19114              
19115 38     38   400 use constant DEBUG_RECOMBINE => 0;
  38         120  
  38         38162  
19116              
19117             sub recombine_breakpoints {
19118              
19119 731     731 0 1998 my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_;
19120              
19121             # This sub implements the 'recombine' operation on a batch.
19122             # Its task is to combine some of these lines back together to
19123             # improve formatting. The need for this arises because
19124             # sub 'break_long_lines' is very liberal in setting line breaks
19125             # for long lines, always setting breaks at good breakpoints, even
19126             # when that creates small lines. Sometimes small line fragments
19127             # are produced which would look better if they were combined.
19128              
19129             # Input parameters:
19130             # $ri_beg = ref to array of BEGinning indexes of each line
19131             # $ri_end = ref to array of ENDing indexes of each line
19132             # $rbond_strength_to_go = array of bond strengths pulling
19133             # tokens together, used to decide where best to recombine lines.
19134              
19135             #-------------------------------------------------------------------
19136             # Do nothing under extreme stress; use <= 2 for c171.
19137             # (NOTE: New optimizations make this unnecessary. But removing this
19138             # check is not really useful because this condition only occurs in
19139             # test runs, and another formatting pass will fix things anyway.)
19140             # This routine has a long history of improvements. Some past
19141             # relevant issues are : c118, c167, c171, c186, c187, c193, c200.
19142             #-------------------------------------------------------------------
19143 731 100       2031 return if ( $high_stress_level <= 2 );
19144              
19145 730         1287 my $nmax_start = @{$ri_end} - 1;
  730         1554  
19146 730 50       1921 return if ( $nmax_start <= 0 );
19147              
19148 730         1505 my $iend_max = $ri_end->[$nmax_start];
19149 730 100       2212 if ( $types_to_go[$iend_max] eq '#' ) {
19150 46         212 $iend_max = iprev_to_go($iend_max);
19151             }
19152 730   66     3338 my $has_terminal_semicolon =
19153             $iend_max >= 0 && $types_to_go[$iend_max] eq ';';
19154              
19155             #--------------------------------------------------------------------
19156             # Break into the smallest possible sub-sections to improve efficiency
19157             #--------------------------------------------------------------------
19158              
19159             # Also make a list of all good joining tokens between the lines
19160             # n-1 and n.
19161 730         1246 my @joint;
19162              
19163 730         1503 my $rsections = [];
19164 730         1315 my $nbeg_sec = 0;
19165 730         1241 my $nend_sec;
19166 730         1303 my $nmax_section = 0;
19167 730         1873 foreach my $nn ( 1 .. $nmax_start ) {
19168 2746         4805 my $ibeg_1 = $ri_beg->[ $nn - 1 ];
19169 2746         4377 my $iend_1 = $ri_end->[ $nn - 1 ];
19170 2746         4087 my $iend_2 = $ri_end->[$nn];
19171 2746         3971 my $ibeg_2 = $ri_beg->[$nn];
19172              
19173             # Define certain good joint tokens
19174 2746         4054 my ( $itok, $itokp, $itokm );
19175 2746         4379 foreach my $itest ( $iend_1, $ibeg_2 ) {
19176 5492         8182 my $type = $types_to_go[$itest];
19177 5492 100 100     30098 if ( $is_math_op{$type}
      100        
      100        
19178             || $is_amp_amp{$type}
19179             || $is_assignment{$type}
19180             || $type eq ':' )
19181             {
19182 376         820 $itok = $itest;
19183             }
19184             }
19185              
19186             # joint[$nn] = index of joint character
19187 2746         4975 $joint[$nn] = $itok;
19188              
19189             # Update the section list
19190 2746         5885 my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
19191 2746 100 100     9639 if (
      100        
      100        
19192             $excess <= 1
19193              
19194             # The number 5 here is an arbitrary small number intended
19195             # to keep most small matches in one sub-section.
19196             || ( defined($nend_sec)
19197             && ( $nn < 5 || $nmax_start - $nn < 5 ) )
19198             )
19199             {
19200 2584         5133 $nend_sec = $nn;
19201             }
19202             else {
19203 162 100       605 if ( defined($nend_sec) ) {
19204 29         63 push @{$rsections}, [ $nbeg_sec, $nend_sec ];
  29         134  
19205 29         77 my $num = $nend_sec - $nbeg_sec;
19206 29 100       106 if ( $num > $nmax_section ) { $nmax_section = $num }
  19         44  
19207 29         58 $nbeg_sec = $nn;
19208 29         85 $nend_sec = undef;
19209             }
19210 162         414 $nbeg_sec = $nn;
19211             }
19212             }
19213              
19214 730 100       2695 if ( defined($nend_sec) ) {
19215 656         1249 push @{$rsections}, [ $nbeg_sec, $nend_sec ];
  656         2344  
19216 656         1857 my $num = $nend_sec - $nbeg_sec;
19217 656 100       1831 if ( $num > $nmax_section ) { $nmax_section = $num }
  647         1218  
19218             }
19219              
19220 730         1222 my $num_sections = @{$rsections};
  730         1436  
19221              
19222 730         1175 if ( DEBUG_RECOMBINE > 1 ) {
19223             print STDERR <<EOM;
19224             sections=$num_sections; nmax_sec=$nmax_section
19225             EOM
19226             }
19227              
19228 730         1175 if ( DEBUG_RECOMBINE > 0 ) {
19229             my $max = 0;
19230             print STDERR
19231             "-----\n$num_sections sections found for nmax=$nmax_start\n";
19232             foreach my $sect ( @{$rsections} ) {
19233             my ( $nbeg, $nend ) = @{$sect};
19234             my $num = $nend - $nbeg;
19235             if ( $num > $max ) { $max = $num }
19236             print STDERR "$nbeg $nend\n";
19237             }
19238             print STDERR "max size=$max of $nmax_start lines\n";
19239             }
19240              
19241             # Loop over all sub-sections. Note that we have to work backwards
19242             # from the end of the batch since the sections use original line
19243             # numbers, and the line numbers change as we go.
19244 730         1416 while ( my $section = pop @{$rsections} ) {
  1415         4995  
19245 685         1096 my ( $nbeg, $nend ) = @{$section};
  685         1562  
19246 685         6831 $self->recombine_section_loop(
19247             {
19248             _ri_beg => $ri_beg,
19249             _ri_end => $ri_end,
19250             _nbeg => $nbeg,
19251             _nend => $nend,
19252             _rjoint => \@joint,
19253             _rbond_strength_to_go => $rbond_strength_to_go,
19254             _has_terminal_semicolon => $has_terminal_semicolon,
19255             }
19256             );
19257             }
19258              
19259 730         2061 return;
19260             } ## end sub recombine_breakpoints
19261              
19262             sub recombine_section_loop {
19263 685     685 0 1829 my ( $self, $rhash ) = @_;
19264              
19265             # Recombine breakpoints for one section of lines in the current batch
19266              
19267             # Given:
19268             # $ri_beg, $ri_end = ref to arrays with token indexes of the first
19269             # and last line
19270             # $nbeg, $nend = line numbers bounding this section
19271             # $rjoint = ref to array of good joining tokens per line
19272              
19273             # Update: $ri_beg, $ri_end, $rjoint if lines are joined
19274              
19275             # Returns:
19276             # nothing
19277              
19278             #-------------
19279             # Definitions:
19280             #-------------
19281             # $rhash = {
19282              
19283             # _ri_beg = ref to array with starting token index by line
19284             # _ri_end = ref to array with ending token index by line
19285             # _nbeg = first line number of this section
19286             # _nend = last line number of this section
19287             # _rjoint = ref to array of good joining tokens for each line
19288             # _rbond_strength_to_go = array of bond strengths
19289             # _has_terminal_semicolon = true if last line of batch has ';'
19290              
19291             # _num_freeze = fixed number of lines at end of this batch
19292             # _optimization_on = true during final optimization loop
19293             # _num_compares = total number of line compares made so far
19294             # _pair_list = list of line pairs in optimal search order
19295              
19296             # };
19297              
19298             #-------------
19299             # How it works
19300             #-------------
19301              
19302             # We are working with a sequence of output lines and looking at
19303             # each pair. We must decide if it is better to join each of
19304             # these line pairs.
19305              
19306             # The brute force method is to loop through all line pairs and
19307             # join the best possible pair, as determined by either some
19308             # logical criterion or by the maximum 'bond strength' assigned
19309             # to the joining token. Then keep doing this until there are
19310             # no remaining line pairs to join.
19311              
19312             # This works, but a problem is that it can theoretically take
19313             # on the order of N^2 comparisons in some pathological cases.
19314             # This can require an excessive amount of run time.
19315              
19316             # We can avoid excessive run time by conceptually dividing the
19317             # work into two phases. In the first phase we make any joints
19318             # required by user settings or logic other than the strength of
19319             # joints. In the second phase we make any remaining joints
19320             # based on strengths. To do this optimally, we do a preliminary
19321             # sort on joint strengths and always loop in that order. That
19322             # way, we can stop a search on the first joint strength because
19323             # it will be the maximum.
19324              
19325             # This method is very fast, requiring no more than 3*N line
19326             # comparisons, where N is the number of lines (see below).
19327              
19328 685         1631 my $ri_beg = $rhash->{_ri_beg};
19329 685         1247 my $ri_end = $rhash->{_ri_end};
19330              
19331             # Line index range of this section:
19332 685         1578 my $nbeg = $rhash->{_nbeg}; # stays constant
19333 685         1340 my $nend = $rhash->{_nend}; # will decrease
19334              
19335             # $nmax_batch = starting number of lines in the full batch
19336             # $num_freeze = number of lines following this section to leave alone
19337 685         1132 my $nmax_batch = @{$ri_end} - 1;
  685         1393  
19338 685         2573 $rhash->{_num_freeze} = $nmax_batch - $nend;
19339              
19340             # Setup the list of line pairs to test. This stores the following
19341             # values for each line pair:
19342             # [ $n=index of the second line of the pair, $bs=bond strength]
19343 685         1251 my @pair_list;
19344 685         1437 my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
19345 685         2120 foreach my $n ( $nbeg + 1 .. $nend ) {
19346 2584         4288 my $iend_1 = $ri_end->[ $n - 1 ];
19347 2584         3863 my $ibeg_2 = $ri_beg->[$n];
19348 2584         3618 my $bs_tweak = 0;
19349 2584 100       5432 if ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { $bs_tweak = 0.25 }
  69         102  
19350 2584         4383 my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
19351 2584         7012 push @pair_list, [ $n, $bs ];
19352             }
19353              
19354             # Any order for testing is possible, but optimization is only possible
19355             # if we sort the line pairs on decreasing joint strength.
19356             @pair_list =
19357 685 50       5548 sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @pair_list;
  4021         10333  
19358 685         2121 $rhash->{_rpair_list} = \@pair_list;
19359              
19360             #----------------
19361             # Iteration limit
19362             #----------------
19363              
19364             # This is now a very fast loop which runs in O(n) time, but a
19365             # check on total number of iterations is retained to guard
19366             # against future programming errors.
19367              
19368             # Most cases require roughly 1 comparison per line pair (1 full pass).
19369             # The upper bound is estimated to be about 3 comparisons per line pair
19370             # unless optimization is deactivated. The approximate breakdown is:
19371             # 1 pass with 1 compare per joint to do any special cases, plus
19372             # 1 pass with up to 2 compares per joint in optimization mode
19373             # The most extreme cases in my collection are:
19374             # camel1.t - needs 2.7 compares per line (12 without optimization)
19375             # ternary.t - needs 2.8 compares per line (12 without optimization)
19376             # c206 - needs 3.3 compares per line, found with random testing
19377             # So a value of MAX_COMPARE_RATIO = 4 looks like an upper bound as
19378             # long as optimization is used. A value of 20 should allow all code to
19379             # pass even if optimization is turned off for testing.
19380 38     38   434 use constant MAX_COMPARE_RATIO => DEVEL_MODE ? 4 : 20;
  38         135  
  38         210683  
19381              
19382 685         1695 my $num_pairs = $nend - $nbeg + 1;
19383 685         1444 my $max_compares = MAX_COMPARE_RATIO * $num_pairs;
19384              
19385             # Always start with optimization off
19386 685         1530 $rhash->{_num_compares} = 0;
19387 685         1566 $rhash->{_optimization_on} = 0;
19388 685         1567 $rhash->{_ix_best_last} = 0;
19389              
19390             #--------------------------------------------
19391             # loop until there are no more recombinations
19392             #--------------------------------------------
19393 685         1404 my $nmax_last = $nmax_batch + 1;
19394 685         1175 while (1) {
19395              
19396             # Stop when the number of lines in the batch does not decrease
19397 1492         2219 $nmax_batch = @{$ri_end} - 1;
  1492         2713  
19398 1492 100       3645 if ( $nmax_batch >= $nmax_last ) {
19399 685         1582 last;
19400             }
19401 807         1418 $nmax_last = $nmax_batch;
19402              
19403             #-----------------------------------------
19404             # inner loop to find next best combination
19405             #-----------------------------------------
19406 807         3157 $self->recombine_inner_loop($rhash);
19407              
19408             # Iteration limit check:
19409 807 50       2247 if ( $rhash->{_num_compares} > $max_compares ) {
19410              
19411             # See note above; should only get here on a programming error
19412 0         0 if (DEVEL_MODE) {
19413             my $ibeg = $ri_beg->[$nbeg];
19414             my $Kbeg = $K_to_go[$ibeg];
19415             my $lno = $self->[_rLL_]->[$Kbeg]->[_LINE_INDEX_];
19416             Fault(<<EOM);
19417             inner loop passes =$rhash->{_num_compares} exceeds max=$max_compares, near line $lno
19418             EOM
19419             }
19420 0         0 last;
19421             }
19422              
19423             } ## end iteration loop
19424              
19425 685         1186 if (DEBUG_RECOMBINE) {
19426             my $ratio = sprintf "%0.3f", $rhash->{_num_compares} / $num_pairs;
19427             print STDERR
19428             "exiting recombine_inner_loop with $nmax_last lines, opt=$rhash->{_optimization_on}, starting pairs=$num_pairs, num_compares=$rhash->{_num_compares}, ratio=$ratio\n";
19429             }
19430              
19431 685         4195 return;
19432             } ## end sub recombine_section_loop
19433              
19434             sub recombine_inner_loop {
19435 807     807 0 1947 my ( $self, $rhash ) = @_;
19436              
19437             # This is the inner loop of the recombine operation. We look at all of
19438             # the remaining joints in this section and select the best joint to be
19439             # recombined. If a recombination is made, the number of lines
19440             # in this section will be reduced by one.
19441              
19442             # Returns: nothing
19443              
19444 807         1620 my $rK_weld_right = $self->[_rK_weld_right_];
19445 807         1404 my $rK_weld_left = $self->[_rK_weld_left_];
19446              
19447 807         1586 my $ri_beg = $rhash->{_ri_beg};
19448 807         1393 my $ri_end = $rhash->{_ri_end};
19449 807         1479 my $nbeg = $rhash->{_nbeg};
19450 807         1380 my $rjoint = $rhash->{_rjoint};
19451 807         2274 my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
19452 807         1410 my $rpair_list = $rhash->{_rpair_list};
19453              
19454             # This will remember the best joint:
19455 807         1364 my $n_best = 0;
19456 807         1340 my $bs_best = 0.;
19457 807         1198 my $ix_best = 0;
19458 807         1283 my $num_bs = 0;
19459              
19460             # The range of lines in this group is $nbeg to $nstop
19461 807         1303 my $nmax = @{$ri_end} - 1;
  807         1451  
19462 807         1657 my $nstop = $nmax - $rhash->{_num_freeze};
19463 807         1521 my $num_joints = $nstop - $nbeg;
19464              
19465             # Turn off optimization if just two joints remain to allow
19466             # special two-line logic to be checked (c193)
19467 807 100 100     2856 if ( $rhash->{_optimization_on} && $num_joints <= 2 ) {
19468 42         141 $rhash->{_optimization_on} = 0;
19469             }
19470              
19471             # Start where we ended the last search
19472 807         1552 my $ix_start = $rhash->{_ix_best_last};
19473              
19474             # Keep the starting index in bounds
19475 807         2752 $ix_start = max( 0, $ix_start );
19476              
19477             # Make a search order list which cycles around to visit
19478             # all line pairs.
19479 807         1320 my $ix_max = @{$rpair_list} - 1;
  807         1647  
19480 807         2681 my @ix_list = ( $ix_start .. $ix_max, 0 .. $ix_start - 1 );
19481 807         1607 my $ix_last = $ix_list[-1];
19482              
19483             #-------------------------
19484             # loop over all line pairs
19485             #-------------------------
19486 807         1334 my $incomplete_loop;
19487 807         1827 foreach my $ix (@ix_list) {
19488 2913         4721 my $item = $rpair_list->[$ix];
19489 2913         4107 my ( $n, $bs ) = @{$item};
  2913         5303  
19490              
19491             # This flag will be true if we 'last' out of this loop early.
19492             # We cannot turn on optimization if this is true.
19493 2913         4967 $incomplete_loop = $ix != $ix_last;
19494              
19495             # Update the count of the number of times through this inner loop
19496 2913         4495 $rhash->{_num_compares}++;
19497              
19498             #----------------------------------------------------------
19499             # If we join the current pair of lines,
19500             # line $n-1 will become the left part of the joined line
19501             # line $n will become the right part of the joined line
19502             #
19503             # Here are Indexes of the endpoint tokens of the two lines:
19504             #
19505             # -----line $n-1--- | -----line $n-----
19506             # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
19507             # ^
19508             # |
19509             # We want to decide if we should remove the line break
19510             # between the tokens at $iend_1 and $ibeg_2
19511             #
19512             # We will apply a number of ad-hoc tests to see if joining
19513             # here will look ok. The code will just move to the next
19514             # pair if the join doesn't look good. If we get through
19515             # the gauntlet of tests, the lines will be recombined.
19516             #----------------------------------------------------------
19517             #
19518             # beginning and ending tokens of the lines we are working on
19519 2913         5760 my $ibeg_1 = $ri_beg->[ $n - 1 ];
19520 2913         4474 my $iend_1 = $ri_end->[ $n - 1 ];
19521 2913         4288 my $iend_2 = $ri_end->[$n];
19522 2913         4106 my $ibeg_2 = $ri_beg->[$n];
19523              
19524             # The combined line cannot be too long
19525 2913         6104 my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
19526 2913 100       6501 next if ( $excess > 0 );
19527              
19528 2524         4198 my $type_iend_1 = $types_to_go[$iend_1];
19529 2524         3898 my $type_iend_2 = $types_to_go[$iend_2];
19530 2524         3908 my $type_ibeg_1 = $types_to_go[$ibeg_1];
19531 2524         3737 my $type_ibeg_2 = $types_to_go[$ibeg_2];
19532              
19533 2524         3406 DEBUG_RECOMBINE > 1 && do {
19534             print STDERR
19535             "RECOMBINE: ix=$ix iend1=$iend_1 iend2=$iend_2 n=$n nmax=$nmax if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
19536             };
19537              
19538             # If line $n is the last line, we set some flags and
19539             # do any special checks for it
19540 2524         3433 my $this_line_is_semicolon_terminated;
19541 2524 100       5156 if ( $n == $nmax ) {
19542              
19543 609 100       2130 if ( $type_ibeg_2 eq '{' ) {
19544              
19545             # join isolated ')' and '{' if requested (git #110)
19546 39 50 66     248 if ( $rOpts_cuddled_paren_brace
      66        
      33        
19547             && $type_iend_1 eq '}'
19548             && $iend_1 == $ibeg_1
19549             && $ibeg_2 == $iend_2 )
19550             {
19551 1 50 33     7 if ( $tokens_to_go[$iend_1] eq ')'
19552             && $tokens_to_go[$ibeg_2] eq '{' )
19553             {
19554 1         2 $n_best = $n;
19555 1         3 $ix_best = $ix;
19556 1         3 last;
19557             }
19558             }
19559              
19560             # otherwise, a terminal '{' should stay where it is
19561             # unless preceded by a fat comma
19562 38 50       187 next if ( $type_iend_1 ne '=>' );
19563             }
19564              
19565             $this_line_is_semicolon_terminated =
19566 570         1292 $rhash->{_has_terminal_semicolon};
19567              
19568             }
19569              
19570             #----------------------------------------------------------
19571             # Recombine Section 0:
19572             # Examine the special token joining this line pair, if any.
19573             # Put as many tests in this section to avoid duplicate code
19574             # and to make formatting independent of whether breaks are
19575             # to the left or right of an operator.
19576             #----------------------------------------------------------
19577              
19578 2485         3913 my $itok = $rjoint->[$n];
19579 2485 100       4860 if ($itok) {
19580 339         1088 my $ok_0 = recombine_section_0( $itok, $ri_beg, $ri_end, $n );
19581 339 100       950 next if ( !$ok_0 );
19582             }
19583              
19584             #----------------------------------------------------------
19585             # Recombine Section 1:
19586             # Join welded nested containers immediately
19587             #----------------------------------------------------------
19588              
19589 2318 50 33     4885 if (
      66        
19590             $total_weld_count
19591             && ( $type_sequence_to_go[$iend_1]
19592             && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
19593             || $type_sequence_to_go[$ibeg_2]
19594             && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
19595             )
19596             {
19597 0         0 $n_best = $n;
19598 0         0 $ix_best = $ix;
19599 0         0 last;
19600             }
19601              
19602             #----------------------------------------------------------
19603             # Recombine Section 2:
19604             # Examine token at $iend_1 (right end of first line of pair)
19605             #----------------------------------------------------------
19606              
19607 2318         5210 my ( $ok_2, $skip_Section_3 ) =
19608             recombine_section_2( $ri_beg, $ri_end, $n,
19609             $this_line_is_semicolon_terminated );
19610 2318 100       6201 next if ( !$ok_2 );
19611              
19612             #----------------------------------------------------------
19613             # Recombine Section 3:
19614             # Examine token at $ibeg_2 (left end of second line of pair)
19615             #----------------------------------------------------------
19616              
19617             # Join lines identified above as capable of
19618             # causing an outdented line with leading closing paren.
19619             # Note that we are skipping the rest of this section
19620             # and the rest of the loop to do the join.
19621 618 100       1565 if ($skip_Section_3) {
19622 12         40 $forced_breakpoint_to_go[$iend_1] = 0;
19623 12         33 $n_best = $n;
19624 12         25 $ix_best = $ix;
19625 12         26 $incomplete_loop = 1;
19626 12         32 last;
19627             }
19628              
19629 606         1878 my ( $ok_3, $bs_tweak ) =
19630             recombine_section_3( $ri_beg, $ri_end, $n,
19631             $this_line_is_semicolon_terminated );
19632 606 100       1679 next if ( !$ok_3 );
19633              
19634             #----------------------------------------------------------
19635             # Recombine Section 4:
19636             # Combine the lines if we arrive here and it is possible
19637             #----------------------------------------------------------
19638              
19639             # honor hard breakpoints
19640 376 100       1218 next if ( $forced_breakpoint_to_go[$iend_1] );
19641              
19642 149         252 if (DEVEL_MODE) {
19643              
19644             # This fault can only occur if an array index error has been
19645             # introduced by a recent programming change.
19646             my $bs_check = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
19647             if ( $bs_check != $bs ) {
19648             Fault(<<EOM);
19649             bs=$bs != $bs_check for break after type $type_iend_1 ix=$ix n=$n
19650             EOM
19651             }
19652             }
19653              
19654             # Require a few extra spaces before recombining lines if we
19655             # are at an old breakpoint unless this is a simple list or
19656             # terminal line. The goal is to avoid oscillating between
19657             # two quasi-stable end states. For example this snippet
19658             # caused problems:
19659              
19660             ## my $this =
19661             ## bless {
19662             ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
19663             ## },
19664             ## $type;
19665             next
19666 149 50 66     524 if ( $old_breakpoint_to_go[$iend_1]
      66        
      33        
      33        
19667             && !$this_line_is_semicolon_terminated
19668             && $n < $nmax
19669             && $excess + 4 > 0
19670             && $type_iend_2 ne ',' );
19671              
19672             # do not recombine if we would skip in indentation levels
19673 149 100       415 if ( $n < $nmax ) {
19674 138         315 my $if_next = $ri_beg->[ $n + 1 ];
19675             next
19676             if (
19677 138 50 66     1503 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
      0        
      33        
19678             && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
19679              
19680             # but an isolated 'if (' is undesirable
19681             && !(
19682             $n == 1
19683             && $iend_1 - $ibeg_1 <= 2
19684             && $type_ibeg_1 eq 'k'
19685             && $tokens_to_go[$ibeg_1] eq 'if'
19686             && $tokens_to_go[$iend_1] ne '('
19687             )
19688             );
19689             }
19690              
19691             ## OLD: honor no-break's
19692             ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
19693              
19694             # remember the pair with the greatest bond strength
19695 149 100       410 if ( !$n_best ) {
19696              
19697             # First good joint ...
19698 109         206 $n_best = $n;
19699 109         196 $ix_best = $ix;
19700 109         188 $bs_best = $bs;
19701 109         194 $num_bs = 1;
19702              
19703             # In optimization mode: stop on the first acceptable joint
19704             # because we already know it has the highest strength
19705 109 100       414 if ( $rhash->{_optimization_on} == 1 ) {
19706 40         104 last;
19707             }
19708             }
19709             else {
19710              
19711             # Second and later joints ..
19712 40         80 $num_bs++;
19713              
19714             # save maximum strength; in case of a tie select min $n
19715 40 50 66     289 if ( $bs > $bs_best || $bs == $bs_best && $n < $n_best ) {
      33        
19716 0         0 $n_best = $n;
19717 0         0 $ix_best = $ix;
19718 0         0 $bs_best = $bs;
19719             }
19720             }
19721              
19722             } ## end loop over all line pairs
19723              
19724             #---------------------------------------------------
19725             # recombine the pair with the greatest bond strength
19726             #---------------------------------------------------
19727 807 100       2579 if ($n_best) {
19728 122         211 DEBUG_RECOMBINE > 1
19729             && print "BEST: nb=$n_best nbeg=$nbeg stop=$nstop bs=$bs_best\n";
19730 122         249 splice @{$ri_beg}, $n_best, 1;
  122         350  
19731 122         241 splice @{$ri_end}, $n_best - 1, 1;
  122         304  
19732 122         218 splice @{$rjoint}, $n_best, 1;
  122         321  
19733              
19734 122         241 splice @{$rpair_list}, $ix_best, 1;
  122         249  
19735              
19736             # Update the line indexes in the pair list:
19737             # Old $n values greater than the best $n decrease by 1
19738             # because of the splice we just did.
19739 122         285 foreach my $item ( @{$rpair_list} ) {
  122         328  
19740 726         1024 my $n_old = $item->[0];
19741 726 100       1357 if ( $n_old > $n_best ) { $item->[0] -= 1 }
  361         625  
19742             }
19743              
19744             # Store the index of this location for starting the next search.
19745             # We must subtract 1 to get an updated index because the splice
19746             # above just removed the best pair.
19747             # BUT CAUTION: if this is the first pair in the pair list, then
19748             # this produces an invalid index. So this index must be tested
19749             # before use in the next pass through the outer loop.
19750 122         416 $rhash->{_ix_best_last} = $ix_best - 1;
19751              
19752             # Turn on optimization if ...
19753 122 100 100     978 if (
      100        
19754              
19755             # it is not already on, and
19756             !$rhash->{_optimization_on}
19757              
19758             # we have not taken a shortcut to get here, and
19759             && !$incomplete_loop
19760              
19761             # we have seen a good break on strength, and
19762             && $num_bs
19763              
19764             )
19765             {
19766              
19767             # To deactivate optimization for testing purposes, the next
19768             # line can be commented out. This will increase run time.
19769 69         195 $rhash->{_optimization_on} = 1;
19770 69         148 if (DEBUG_RECOMBINE) {
19771             my $num_compares = $rhash->{_num_compares};
19772             my $pair_count = @ix_list;
19773             print STDERR
19774             "Entering optimization phase at $num_compares compares, pair count = $pair_count\n";
19775             }
19776             }
19777             }
19778 807         2248 return;
19779             } ## end sub recombine_inner_loop
19780              
19781             sub recombine_section_0 {
19782 339     339 0 815 my ( $itok, $ri_beg, $ri_end, $n ) = @_;
19783              
19784             # Recombine Section 0:
19785             # Examine special candidate joining token $itok
19786              
19787             # Given:
19788             # $itok = index of token at a possible join of lines $n-1 and $n
19789              
19790             # Return:
19791             # true => ok to combine
19792             # false => do not combine lines
19793              
19794             # Here are Indexes of the endpoint tokens of the two lines:
19795             #
19796             # -----line $n-1--- | -----line $n-----
19797             # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
19798             # ^ ^
19799             # | |
19800             # ------------$itok is one of these tokens
19801              
19802             # Put as many tests in this section to avoid duplicate code
19803             # and to make formatting independent of whether breaks are
19804             # to the left or right of an operator.
19805              
19806 339         515 my $nmax = @{$ri_end} - 1;
  339         629  
19807 339         647 my $ibeg_1 = $ri_beg->[ $n - 1 ];
19808 339         594 my $iend_1 = $ri_end->[ $n - 1 ];
19809 339         583 my $ibeg_2 = $ri_beg->[$n];
19810 339         566 my $iend_2 = $ri_end->[$n];
19811              
19812 339 50       793 if ($itok) {
19813              
19814 339         595 my $type = $types_to_go[$itok];
19815              
19816 339 100       1632 if ( $type eq ':' ) {
    100          
    100          
    50          
19817              
19818             # do not join at a colon unless it disobeys the
19819             # break request
19820 103 100       273 if ( $itok eq $iend_1 ) {
19821 1 50       7 return unless $want_break_before{$type};
19822             }
19823             else {
19824 102 50       341 return if $want_break_before{$type};
19825             }
19826             } ## end if ':'
19827              
19828             # handle math operators + - * /
19829             elsif ( $is_math_op{$type} ) {
19830              
19831             # Combine these lines if this line is a single
19832             # number, or if it is a short term with same
19833             # operator as the previous line. For example, in
19834             # the following code we will combine all of the
19835             # short terms $A, $B, $C, $D, $E, $F, together
19836             # instead of leaving them one per line:
19837             # my $time =
19838             # $A * $B * $C * $D * $E * $F *
19839             # ( 2. * $eps * $sigma * $area ) *
19840             # ( 1. / $tcold**3 - 1. / $thot**3 );
19841              
19842             # This can be important in math-intensive code.
19843              
19844 87         132 my $good_combo;
19845              
19846 87         231 my $itokp = min( $inext_to_go[$itok], $iend_2 );
19847 87         186 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
19848 87         199 my $itokm = max( iprev_to_go($itok), $ibeg_1 );
19849 87         208 my $itokmm = max( iprev_to_go($itokm), $ibeg_1 );
19850              
19851             # check for a number on the right
19852 87 100       256 if ( $types_to_go[$itokp] eq 'n' ) {
19853              
19854             # ok if nothing else on right
19855 26 100       72 if ( $itokp == $iend_2 ) {
19856 2         7 $good_combo = 1;
19857             }
19858             else {
19859              
19860             # look one more token to right..
19861             # okay if math operator or some termination
19862             $good_combo =
19863             ( ( $itokpp == $iend_2 )
19864 24   100     256 && $is_math_op{ $types_to_go[$itokpp] } )
19865             || $types_to_go[$itokpp] =~ /^[#,;]$/;
19866             }
19867             }
19868              
19869             # check for a number on the left
19870 87 100 100     389 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
19871              
19872             # okay if nothing else to left
19873 15 100       57 if ( $itokm == $ibeg_1 ) {
19874 6         17 $good_combo = 1;
19875             }
19876              
19877             # otherwise look one more token to left
19878             else {
19879              
19880             # okay if math operator, comma, or assignment
19881             $good_combo = ( $itokmm == $ibeg_1 )
19882             && ( $is_math_op{ $types_to_go[$itokmm] }
19883             || $types_to_go[$itokmm] =~ /^[,]$/
19884 9   66     72 || $is_assignment{ $types_to_go[$itokmm] } );
19885             }
19886             }
19887              
19888             # look for a single short token either side of the
19889             # operator
19890 87 100       209 if ( !$good_combo ) {
19891              
19892             # Slight adjustment factor to make results
19893             # independent of break before or after operator
19894             # in long summed lists. (An operator and a
19895             # space make two spaces).
19896 68 100       194 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
19897              
19898             $good_combo =
19899              
19900             # numbers or id's on both sides of this joint
19901             $types_to_go[$itokp] =~ /^[in]$/
19902             && $types_to_go[$itokm] =~ /^[in]$/
19903              
19904             # one of the two lines must be short:
19905             && (
19906             (
19907             # no more than 2 nonblank tokens right
19908             # of joint
19909             $itokpp == $iend_2
19910              
19911             # short
19912             && token_sequence_length( $itokp, $iend_2 ) <
19913             $two + $rOpts_short_concatenation_item_length
19914             )
19915             || (
19916             # no more than 2 nonblank tokens left of
19917             # joint
19918             $itokmm == $ibeg_1
19919              
19920             # short
19921             && token_sequence_length( $ibeg_1, $itokm ) <
19922             2 - $two + $rOpts_short_concatenation_item_length
19923             )
19924              
19925             )
19926              
19927             # keep pure terms; don't mix +- with */
19928             && !(
19929             $is_plus_minus{$type}
19930             && ( $is_mult_div{ $types_to_go[$itokmm] }
19931             || $is_mult_div{ $types_to_go[$itokpp] } )
19932             )
19933             && !(
19934             $is_mult_div{$type}
19935             && ( $is_plus_minus{ $types_to_go[$itokmm] }
19936 68   66     520 || $is_plus_minus{ $types_to_go[$itokpp] } )
19937             )
19938              
19939             ;
19940             }
19941              
19942             # it is also good to combine if we can reduce to 2
19943             # lines
19944 87 100       240 if ( !$good_combo ) {
19945              
19946             # index on other line where same token would be
19947             # in a long chain.
19948 64 100       159 my $iother = ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
19949              
19950 64   33     239 $good_combo =
19951             $n == 2
19952             && $n == $nmax
19953             && $types_to_go[$iother] ne $type;
19954             }
19955              
19956 87 100       246 return unless ($good_combo);
19957              
19958             } ## end math
19959              
19960             elsif ( $is_amp_amp{$type} ) {
19961             ##TBD
19962             } ## end &&, ||
19963              
19964             elsif ( $is_assignment{$type} ) {
19965             ##TBD
19966             } ## end assignment
19967             }
19968              
19969             # ok to combine lines
19970 172         411 return 1;
19971             } ## end sub recombine_section_0
19972              
19973             sub recombine_section_2 {
19974              
19975 2318     2318 0 4572 my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
19976              
19977             # Recombine Section 2:
19978             # Examine token at $iend_1 (right end of first line of pair)
19979              
19980             # Here are Indexes of the endpoint tokens of the two lines:
19981             #
19982             # -----line $n-1--- | -----line $n-----
19983             # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
19984             # ^
19985             # |
19986             # -----Section 2 looks at this token
19987              
19988             # Returns:
19989             # (nothing) => do not join lines
19990             # 1, skip_Section_3 => ok to join lines
19991              
19992             # $skip_Section_3 is a flag for skipping the next section
19993 2318         3366 my $skip_Section_3 = 0;
19994              
19995 2318         3216 my $nmax = @{$ri_end} - 1;
  2318         3848  
19996 2318         3900 my $ibeg_1 = $ri_beg->[ $n - 1 ];
19997 2318         3550 my $iend_1 = $ri_end->[ $n - 1 ];
19998 2318         3644 my $iend_2 = $ri_end->[$n];
19999 2318         3561 my $ibeg_2 = $ri_beg->[$n];
20000 2318 100       5107 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
20001 2318         3351 my $ibeg_nmax = $ri_beg->[$nmax];
20002              
20003 2318         3676 my $type_iend_1 = $types_to_go[$iend_1];
20004 2318         3401 my $type_iend_2 = $types_to_go[$iend_2];
20005 2318         3394 my $type_ibeg_1 = $types_to_go[$ibeg_1];
20006 2318         3620 my $type_ibeg_2 = $types_to_go[$ibeg_2];
20007              
20008             # an isolated '}' may join with a ';' terminated segment
20009 2318 100       11665 if ( $type_iend_1 eq '}' ) {
    100          
    50          
    100          
    100          
    50          
    50          
    100          
    100          
    100          
    50          
20010              
20011             # Check for cases where combining a semicolon terminated
20012             # statement with a previous isolated closing paren will
20013             # allow the combined line to be outdented. This is
20014             # generally a good move. For example, we can join up
20015             # the last two lines here:
20016             # (
20017             # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
20018             # $size, $atime, $mtime, $ctime, $blksize, $blocks
20019             # )
20020             # = stat($file);
20021             #
20022             # to get:
20023             # (
20024             # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
20025             # $size, $atime, $mtime, $ctime, $blksize, $blocks
20026             # ) = stat($file);
20027             #
20028             # which makes the parens line up.
20029             #
20030             # Another example, from Joe Matarazzo, probably looks best
20031             # with the 'or' clause appended to the trailing paren:
20032             # $self->some_method(
20033             # PARAM1 => 'foo',
20034             # PARAM2 => 'bar'
20035             # ) or die "Some_method didn't work";
20036             #
20037             # But we do not want to do this for something like the -lp
20038             # option where the paren is not outdentable because the
20039             # trailing clause will be far to the right.
20040             #
20041             # The logic here is synchronized with the logic in sub
20042             # sub get_final_indentation, which actually does
20043             # the outdenting.
20044             #
20045             my $combine_ok = $this_line_is_semicolon_terminated
20046              
20047             # only one token on last line
20048             && $ibeg_1 == $iend_1
20049              
20050             # must be structural paren
20051             && $tokens_to_go[$iend_1] eq ')'
20052              
20053             # style must allow outdenting,
20054 345   66     2254 && !$closing_token_indentation{')'}
20055              
20056             # but leading colons probably line up with a
20057             # previous colon or question (count could be wrong).
20058             && $type_ibeg_2 ne ':'
20059              
20060             # only one step in depth allowed. this line must not
20061             # begin with a ')' itself.
20062             && ( $nesting_depth_to_go[$iend_1] ==
20063             $nesting_depth_to_go[$iend_2] + 1 );
20064              
20065             # But only combine leading '&&', '||', if no previous && || :
20066             # seen. This count includes these tokens at all levels. The
20067             # idea is that seeing these at any level can make it hard to read
20068             # formatting if we recombine.
20069 345 100       944 if ( $is_amp_amp{$type_ibeg_2} ) {
20070 16         60 foreach my $n_t ( reverse( 0 .. $n - 2 ) ) {
20071 15         34 my $ibeg_t = $ri_beg->[$n_t];
20072 15         25 my $type_t = $types_to_go[$ibeg_t];
20073 15 100 66     105 if ( $is_amp_amp{$type_t} || $type_t eq ':' ) {
20074 5         11 $combine_ok = 0;
20075 5         16 last;
20076             }
20077             }
20078             }
20079              
20080 345   66     1627 $skip_Section_3 ||= $combine_ok;
20081              
20082             # YVES patch 2 of 2:
20083             # Allow cuddled eval chains, like this:
20084             # eval {
20085             # #STUFF;
20086             # 1; # return true
20087             # } or do {
20088             # #handle error
20089             # };
20090             # This patch works together with a patch in
20091             # setting adjusted indentation (where the closing eval
20092             # brace is outdented if possible).
20093             # The problem is that an 'eval' block has continuation
20094             # indentation and it looks better to undo it in some
20095             # cases. If we do not use this patch we would get:
20096             # eval {
20097             # #STUFF;
20098             # 1; # return true
20099             # }
20100             # or do {
20101             # #handle error
20102             # };
20103             # The alternative, for uncuddled style, is to create
20104             # a patch in get_final_indentation which undoes
20105             # the indentation of a leading line like 'or do {'.
20106             # This doesn't work well with -icb through
20107 345 50 100     1862 if (
      100        
      100        
      33        
      66        
20108             $block_type_to_go[$iend_1]
20109             && $rOpts_brace_follower_vertical_tightness > 0
20110             && (
20111              
20112             # -bfvt=1, allow cuddled eval chains [default]
20113             (
20114             $tokens_to_go[$iend_2] eq '{'
20115             && $block_type_to_go[$iend_1] eq 'eval'
20116             && !ref( $leading_spaces_to_go[$iend_1] )
20117             && !$rOpts_indent_closing_brace
20118             )
20119              
20120             # -bfvt=2, allow most brace followers [part of git #110]
20121             || ( $rOpts_brace_follower_vertical_tightness > 1
20122             && $ibeg_1 == $iend_1 )
20123              
20124             )
20125              
20126             && (
20127             ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
20128             || ( $type_ibeg_2 eq 'k'
20129             && $is_and_or{ $tokens_to_go[$ibeg_2] } )
20130             || $is_if_unless{ $tokens_to_go[$ibeg_2] }
20131             )
20132             )
20133             {
20134 8   50     48 $skip_Section_3 ||= 1;
20135             }
20136              
20137             return
20138             unless (
20139 345 100 100     2930 $skip_Section_3
      66        
20140              
20141             # handle '.' and '?' specially below
20142             || ( $type_ibeg_2 =~ /^[\.\?]$/ )
20143              
20144             # fix for c054 (unusual -pbp case)
20145             || $type_ibeg_2 eq '=='
20146              
20147             );
20148             }
20149              
20150             elsif ( $type_iend_1 eq '{' ) {
20151              
20152             # YVES
20153             # honor breaks at opening brace
20154             # Added to prevent recombining something like this:
20155             # } || eval { package main;
20156 596 100       2407 return if ( $forced_breakpoint_to_go[$iend_1] );
20157             }
20158              
20159             # do not recombine lines with ending &&, ||,
20160             elsif ( $is_amp_amp{$type_iend_1} ) {
20161 0 0       0 return unless ( $want_break_before{$type_iend_1} );
20162             }
20163              
20164             # Identify and recombine a broken ?/: chain
20165             elsif ( $type_iend_1 eq '?' ) {
20166              
20167             # Do not recombine different levels
20168             return
20169 1 50       6 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
20170              
20171             # do not recombine unless next line ends in :
20172 1 50       5 return unless $type_iend_2 eq ':';
20173             }
20174              
20175             # for lines ending in a comma...
20176             elsif ( $type_iend_1 eq ',' ) {
20177              
20178             # Do not recombine at comma which is following the
20179             # input bias.
20180             # NOTE: this could be controlled by a special flag,
20181             # but it seems to work okay.
20182 805 100       2646 return if ( $old_breakpoint_to_go[$iend_1] );
20183              
20184             # An isolated '},' may join with an identifier + ';'
20185             # This is useful for the class of a 'bless' statement
20186             # (bless.t)
20187 140 100 100     524 if ( $type_ibeg_1 eq '}'
20188             && $type_ibeg_2 eq 'i' )
20189             {
20190             return
20191 1 50 33     11 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
      33        
20192             && ( $iend_2 == ( $ibeg_2 + 1 ) )
20193             && $this_line_is_semicolon_terminated );
20194              
20195             # override breakpoint
20196 0         0 $forced_breakpoint_to_go[$iend_1] = 0;
20197             }
20198              
20199             # but otherwise ..
20200             else {
20201              
20202             # do not recombine after a comma unless this will
20203             # leave just 1 more line
20204 139 100       438 return unless ( $n + 1 >= $nmax );
20205              
20206             # do not recombine if there is a change in
20207             # indentation depth
20208             return
20209 27 100       133 if ( $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
20210              
20211             # do not recombine a "complex expression" after a
20212             # comma. "complex" means no parens.
20213 10         22 my $saw_paren;
20214 10         68 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
20215 29 50       75 if ( $tokens_to_go[$ii] eq '(' ) {
20216 0         0 $saw_paren = 1;
20217 0         0 last;
20218             }
20219             }
20220 10 50       33 return if $saw_paren;
20221             }
20222             }
20223              
20224             # opening paren..
20225             elsif ( $type_iend_1 eq '(' ) {
20226              
20227             # No longer doing this
20228             }
20229              
20230             elsif ( $type_iend_1 eq ')' ) {
20231              
20232             # No longer doing this
20233             }
20234              
20235             # keep a terminal for-semicolon
20236             elsif ( $type_iend_1 eq 'f' ) {
20237 8         22 return;
20238             }
20239              
20240             # if '=' at end of line ...
20241             elsif ( $is_assignment{$type_iend_1} ) {
20242              
20243             # keep break after = if it was in input stream
20244             # this helps prevent 'blinkers'
20245             return
20246             if (
20247 78 100 66     521 $old_breakpoint_to_go[$iend_1]
20248              
20249             # don't strand an isolated '='
20250             && $iend_1 != $ibeg_1
20251             );
20252              
20253 42   66     273 my $is_short_quote =
20254             ( $type_ibeg_2 eq 'Q'
20255             && $ibeg_2 == $iend_2
20256             && token_sequence_length( $ibeg_2, $ibeg_2 ) <
20257             $rOpts_short_concatenation_item_length );
20258 42   33     174 my $is_ternary = (
20259             $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
20260             && $types_to_go[$ibeg_3] eq ':' )
20261             );
20262              
20263             # always join an isolated '=', a short quote, or if this
20264             # will put ?/: at start of adjacent lines
20265 42 50 33     441 if ( $ibeg_1 != $iend_1
      33        
20266             && !$is_short_quote
20267             && !$is_ternary )
20268             {
20269             return
20270             unless (
20271             (
20272              
20273             # unless we can reduce this to two lines
20274 42 50 66     576 $nmax < $n + 2
      33        
      66        
20275              
20276             # or three lines, the last with a leading
20277             # semicolon
20278             || ( $nmax == $n + 2
20279             && $types_to_go[$ibeg_nmax] eq ';' )
20280              
20281             # or the next line ends with a here doc
20282             || $type_iend_2 eq 'h'
20283              
20284             # or the next line ends in an open paren or
20285             # brace and the break hasn't been forced
20286             # [dima.t]
20287             || ( !$forced_breakpoint_to_go[$iend_1]
20288             && $type_iend_2 eq '{' )
20289             )
20290              
20291             # do not recombine if the two lines might align
20292             # well this is a very approximate test for this
20293             && (
20294              
20295             # RT#127633 - the leading tokens are not
20296             # operators
20297             ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
20298              
20299             # or they are different
20300             || ( $ibeg_3 >= 0
20301             && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
20302             )
20303             );
20304              
20305 21 100 33     126 if (
      66        
20306              
20307             # Recombine if we can make two lines
20308             $nmax >= $n + 2
20309              
20310             # -lp users often prefer this:
20311             # my $title = function($env, $env, $sysarea,
20312             # "bubba Borrower Entry");
20313             # so we will recombine if -lp is used we have
20314             # ending comma
20315             && !(
20316             $ibeg_3 > 0
20317             && ref( $leading_spaces_to_go[$ibeg_3] )
20318             && $type_iend_2 eq ','
20319             )
20320             )
20321             {
20322              
20323             # otherwise, scan the rhs line up to last token for
20324             # complexity. Note that we are not counting the last token
20325             # in case it is an opening paren.
20326 1         5 my $ok = simple_rhs( $ri_end, $n, $nmax, $ibeg_2, $iend_2 );
20327 1 50       4 return if ( !$ok );
20328              
20329             }
20330             }
20331              
20332 21 100       142 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
20333 19         56 $forced_breakpoint_to_go[$iend_1] = 0;
20334             }
20335             }
20336              
20337             # for keywords..
20338             elsif ( $type_iend_1 eq 'k' ) {
20339              
20340             # make major control keywords stand out
20341             # (recombine.t)
20342             return
20343             if (
20344              
20345             #/^(last|next|redo|return)$/
20346 26 100 100     160 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
20347              
20348             # but only if followed by multiple lines
20349             && $n < $nmax
20350             );
20351              
20352 15 50       54 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
20353             return
20354 0 0       0 unless $want_break_before{ $tokens_to_go[$iend_1] };
20355             }
20356             }
20357             elsif ( $type_iend_1 eq '.' ) {
20358              
20359             # NOTE: the logic here should match that of section 3 so that
20360             # line breaks are independent of choice of break before or after.
20361             # It would be nice to combine them in section 0, but the
20362             # special junction case ') .' makes that difficult.
20363             # This section added to fix issues c172, c174.
20364 0         0 my $i_next_nonblank = $ibeg_2;
20365 0         0 my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
20366             $summed_lengths_to_go[$ibeg_1];
20367 0         0 my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
20368             $summed_lengths_to_go[$ibeg_2];
20369 0         0 my $iend_1_minus = max( $ibeg_1, iprev_to_go($iend_1) );
20370              
20371             return
20372             unless (
20373              
20374             # ... unless there is just one and we can reduce
20375             # this to two lines if we do. For example, this
20376             #
20377             #
20378             # $bodyA .=
20379             # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
20380             #
20381             # looks better than this:
20382             # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' .
20383             # '$args .= $pat;'
20384              
20385             # check for 2 lines, not in a long broken '.' chain
20386 0 0 0     0 ( $n == 2 && $n == $nmax && $type_iend_1 ne $type_iend_2 )
      0        
      0        
      0        
      0        
      0        
      0        
20387              
20388             # ... or this would strand a short quote , like this
20389             # "some long quote" .
20390             # "\n";
20391             || (
20392             $types_to_go[$i_next_nonblank] eq 'Q'
20393             && $i_next_nonblank >= $iend_2 - 2
20394             && $token_lengths_to_go[$i_next_nonblank] <
20395             $rOpts_short_concatenation_item_length
20396              
20397             # additional constraints to fix c167
20398             && ( $types_to_go[$iend_1_minus] ne 'Q'
20399             || $summed_len_2 < $summed_len_1 )
20400             )
20401             );
20402             }
20403 618         1821 return ( 1, $skip_Section_3 );
20404             } ## end sub recombine_section_2
20405              
20406             sub simple_rhs {
20407              
20408 1     1 0 3 my ( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ) = @_;
20409              
20410             # Scan line ibeg_2 to $iend_2 up to last token for complexity.
20411             # We are not counting the last token in case it is an opening paren.
20412             # Return:
20413             # true if rhs is simple, ok to recombine
20414             # false otherwise
20415              
20416 1         2 my $tv = 0;
20417 1         3 my $depth = $nesting_depth_to_go[$ibeg_2];
20418 1         5 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
20419 2 50       6 if ( $nesting_depth_to_go[$i] != $depth ) {
20420 0         0 $tv++;
20421 0 0       0 last if ( $tv > 1 );
20422             }
20423 2         5 $depth = $nesting_depth_to_go[$i];
20424             }
20425              
20426             # ok to recombine if no level changes before
20427             # last token
20428 1 50       4 if ( $tv > 0 ) {
20429              
20430             # otherwise, do not recombine if more than
20431             # two level changes.
20432 0 0       0 return if ( $tv > 1 );
20433              
20434             # check total complexity of the two
20435             # adjacent lines that will occur if we do
20436             # this join
20437 0 0       0 my $istop =
20438             ( $n < $nmax )
20439             ? $ri_end->[ $n + 1 ]
20440             : $iend_2;
20441 0         0 foreach my $i ( $iend_2 .. $istop ) {
20442 0 0       0 if ( $nesting_depth_to_go[$i] != $depth ) {
20443 0         0 $tv++;
20444 0 0       0 last if ( $tv > 2 );
20445             }
20446 0         0 $depth = $nesting_depth_to_go[$i];
20447             }
20448              
20449             # do not recombine if total is more than 2
20450             # level changes
20451 0 0       0 return if ( $tv > 2 );
20452             }
20453 1         2 return 1;
20454             } ## end sub simple_rhs
20455              
20456             sub recombine_section_3 {
20457              
20458 606     606 0 1412 my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
20459              
20460             # Recombine Section 3:
20461             # Examine token at $ibeg_2 (right end of first line of pair)
20462              
20463             # Here are Indexes of the endpoint tokens of the two lines:
20464             #
20465             # -----line $n-1--- | -----line $n-----
20466             # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
20467             # ^
20468             # |
20469             # -----Section 3 looks at this token
20470              
20471             # Returns:
20472             # (nothing) => do not join lines
20473             # 1, bs_tweak => ok to join lines
20474              
20475             # $bstweak is a small tolerance to add to bond strengths
20476 606         1071 my $bs_tweak = 0;
20477              
20478 606         938 my $nmax = @{$ri_end} - 1;
  606         1193  
20479 606         1165 my $ibeg_1 = $ri_beg->[ $n - 1 ];
20480 606         1113 my $iend_1 = $ri_end->[ $n - 1 ];
20481 606         1206 my $iend_2 = $ri_end->[$n];
20482 606         1088 my $ibeg_2 = $ri_beg->[$n];
20483              
20484 606 100       1563 my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
20485 606 100       1705 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
20486 606 100       1527 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
20487 606         1052 my $ibeg_nmax = $ri_beg->[$nmax];
20488              
20489 606         1173 my $type_iend_1 = $types_to_go[$iend_1];
20490 606         1069 my $type_iend_2 = $types_to_go[$iend_2];
20491 606         1042 my $type_ibeg_1 = $types_to_go[$ibeg_1];
20492 606         1002 my $type_ibeg_2 = $types_to_go[$ibeg_2];
20493              
20494             # handle lines with leading &&, ||
20495 606 100       3223 if ( $is_amp_amp{$type_ibeg_2} ) {
    100          
    100          
    100          
    50          
    100          
20496              
20497             # ok to recombine if it follows a ? or :
20498             # and is followed by an open paren..
20499             my $ok =
20500             ( $is_ternary{$type_ibeg_1} && $tokens_to_go[$iend_2] eq '(' )
20501              
20502             # or is followed by a ? or : at same depth
20503             #
20504             # We are looking for something like this. We can
20505             # recombine the && line with the line above to make the
20506             # structure more clear:
20507             # return
20508             # exists $G->{Attr}->{V}
20509             # && exists $G->{Attr}->{V}->{$u}
20510             # ? %{ $G->{Attr}->{V}->{$u} }
20511             # : ();
20512             #
20513             # We should probably leave something like this alone:
20514             # return
20515             # exists $G->{Attr}->{E}
20516             # && exists $G->{Attr}->{E}->{$u}
20517             # && exists $G->{Attr}->{E}->{$u}->{$v}
20518             # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
20519             # : ();
20520             # so that we either have all of the &&'s (or ||'s)
20521             # on one line, as in the first example, or break at
20522             # each one as in the second example. However, it
20523             # sometimes makes things worse to check for this because
20524             # it prevents multiple recombinations. So this is not done.
20525             || ( $ibeg_3 >= 0
20526 44   66     351 && $is_ternary{ $types_to_go[$ibeg_3] }
20527             && $nesting_depth_to_go[$ibeg_3] ==
20528             $nesting_depth_to_go[$ibeg_2] );
20529              
20530             # Combine a trailing && term with an || term: fix for
20531             # c060 This is rare but can happen.
20532 44 50 0     385 $ok ||= 1
      100        
      66        
      33        
20533             if ( $ibeg_3 < 0
20534             && $type_ibeg_2 eq '&&'
20535             && $type_ibeg_1 eq '||'
20536             && $nesting_depth_to_go[$ibeg_2] ==
20537             $nesting_depth_to_go[$ibeg_1] );
20538              
20539 44 50 66     286 return if !$ok && $want_break_before{$type_ibeg_2};
20540 1         3 $forced_breakpoint_to_go[$iend_1] = 0;
20541              
20542             # tweak the bond strength to give this joint priority
20543             # over ? and :
20544 1         7 $bs_tweak = 0.25;
20545             }
20546              
20547             # Identify and recombine a broken ?/: chain
20548             elsif ( $type_ibeg_2 eq '?' ) {
20549              
20550             # Do not recombine different levels
20551 87         167 my $lev = $levels_to_go[$ibeg_2];
20552 87 100       275 return if ( $lev ne $levels_to_go[$ibeg_1] );
20553              
20554             # Do not recombine a '?' if either next line or
20555             # previous line does not start with a ':'. The reasons
20556             # are that (1) no alignment of the ? will be possible
20557             # and (2) the expression is somewhat complex, so the
20558             # '?' is harder to see in the interior of the line.
20559 72   66     308 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
20560 72   100     313 my $precedes_colon = $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
20561 72 100 100     308 return unless ( $follows_colon || $precedes_colon );
20562              
20563             # we will always combining a ? line following a : line
20564 55 100       166 if ( !$follows_colon ) {
20565              
20566             # ...otherwise recombine only if it looks like a
20567             # chain. we will just look at a few nearby lines
20568             # to see if this looks like a chain.
20569 29         60 my $local_count = 0;
20570 29         67 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
20571 116 100 100     499 $local_count++
      66        
20572             if $ii >= 0
20573             && $types_to_go[$ii] eq ':'
20574             && $levels_to_go[$ii] == $lev;
20575             }
20576 29 100       142 return unless ( $local_count > 1 );
20577             }
20578 31         80 $forced_breakpoint_to_go[$iend_1] = 0;
20579             }
20580              
20581             # do not recombine lines with leading '.'
20582             elsif ( $type_ibeg_2 eq '.' ) {
20583 144         330 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
20584 144         238 my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
20585             $summed_lengths_to_go[$ibeg_1];
20586 144         269 my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
20587             $summed_lengths_to_go[$ibeg_2];
20588              
20589             return
20590             unless (
20591              
20592             # ... unless there is just one and we can reduce
20593             # this to two lines if we do. For example, this
20594             #
20595             #
20596             # $bodyA .=
20597             # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
20598             #
20599             # looks better than this:
20600             # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
20601             # . '$args .= $pat;'
20602              
20603 144 50 66     1098 ( $n == 2 && $n == $nmax && $type_ibeg_1 ne $type_ibeg_2 )
      33        
      100        
      100        
      66        
      66        
      33        
20604              
20605             # ... or this would strand a short quote , like this
20606             # . "some long quote"
20607             # . "\n";
20608             || (
20609             $types_to_go[$i_next_nonblank] eq 'Q'
20610             && $i_next_nonblank >= $iend_2 - 1
20611             && $token_lengths_to_go[$i_next_nonblank] <
20612             $rOpts_short_concatenation_item_length
20613              
20614             # additional constraints to fix c167
20615             && (
20616             $types_to_go[$iend_1] ne 'Q'
20617              
20618             # allow a term shorter than the previous term
20619             || $summed_len_2 < $summed_len_1
20620              
20621             # or allow a short semicolon-terminated term if this
20622             # makes two lines (see c169)
20623             || ( $n == 2
20624             && $n == $nmax
20625             && $this_line_is_semicolon_terminated )
20626             )
20627             )
20628             );
20629             }
20630              
20631             # handle leading keyword..
20632             elsif ( $type_ibeg_2 eq 'k' ) {
20633              
20634             # handle leading "or"
20635 33 100 66     283 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
    100          
    100          
20636             return
20637             unless (
20638             $this_line_is_semicolon_terminated
20639             && (
20640             $type_ibeg_1 eq '}'
20641             || (
20642              
20643             # following 'if' or 'unless' or 'or'
20644             $type_ibeg_1 eq 'k'
20645 8 100 100     115 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
      100        
20646              
20647             # important: only combine a very simple
20648             # or statement because the step below
20649             # may have combined a trailing 'and'
20650             # with this or, and we do not want to
20651             # then combine everything together
20652             && ( $iend_2 - $ibeg_2 <= 7 )
20653             )
20654             )
20655             );
20656              
20657             #X: RT #81854
20658 4 100       21 $forced_breakpoint_to_go[$iend_1] = 0
20659             unless ( $old_breakpoint_to_go[$iend_1] );
20660             }
20661              
20662             # handle leading 'and' and 'xor'
20663             elsif ($tokens_to_go[$ibeg_2] eq 'and'
20664             || $tokens_to_go[$ibeg_2] eq 'xor' )
20665             {
20666              
20667             # Decide if we will combine a single terminal 'and'
20668             # after an 'if' or 'unless'.
20669              
20670             # This looks best with the 'and' on the same
20671             # line as the 'if':
20672             #
20673             # $a = 1
20674             # if $seconds and $nu < 2;
20675             #
20676             # But this looks better as shown:
20677             #
20678             # $a = 1
20679             # if !$this->{Parents}{$_}
20680             # or $this->{Parents}{$_} eq $_;
20681             #
20682             return
20683             unless (
20684             $this_line_is_semicolon_terminated
20685             && (
20686              
20687             # following 'if' or 'unless' or 'or'
20688             $type_ibeg_1 eq 'k'
20689 8 100 66     80 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
      66        
      100        
20690             || $tokens_to_go[$ibeg_1] eq 'or' )
20691             )
20692             );
20693             }
20694              
20695             # handle leading "if" and "unless"
20696             elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
20697              
20698             # Combine something like:
20699             # next
20700             # if ( $lang !~ /${l}$/i );
20701             # into:
20702             # next if ( $lang !~ /${l}$/i );
20703             return
20704             unless (
20705             $this_line_is_semicolon_terminated
20706              
20707             # previous line begins with 'and' or 'or'
20708             && $type_ibeg_1 eq 'k'
20709 8 50 66     49 && $is_and_or{ $tokens_to_go[$ibeg_1] }
      33        
20710              
20711             );
20712             }
20713              
20714             # handle all other leading keywords
20715             else {
20716              
20717             # keywords look best at start of lines,
20718             # but combine things like "1 while"
20719 9 100       55 unless ( $is_assignment{$type_iend_1} ) {
20720             return
20721 8 50 33     80 if ( ( $type_iend_1 ne 'k' )
20722             && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
20723             }
20724             }
20725             }
20726              
20727             # similar treatment of && and || as above for 'and' and
20728             # 'or': NOTE: This block of code is currently bypassed
20729             # because of a previous block but is retained for possible
20730             # future use.
20731             elsif ( $is_amp_amp{$type_ibeg_2} ) {
20732              
20733             # maybe looking at something like:
20734             # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
20735              
20736             return
20737             unless (
20738             $this_line_is_semicolon_terminated
20739              
20740             # previous line begins with an 'if' or 'unless'
20741             # keyword
20742             && $type_ibeg_1 eq 'k'
20743 0 0 0     0 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
      0        
20744              
20745             );
20746             }
20747              
20748             # handle line with leading = or similar
20749             elsif ( $is_assignment{$type_ibeg_2} ) {
20750 11 50 33     60 return unless ( $n == 1 || $n == $nmax );
20751 11 50       51 return if ( $old_breakpoint_to_go[$iend_1] );
20752             return
20753             unless (
20754              
20755             # unless we can reduce this to two lines
20756 11 50 66     162 $nmax == 2
      66        
      33        
      33        
      33        
20757              
20758             # or three lines, the last with a leading semicolon
20759             || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
20760              
20761             # or the next line ends with a here doc
20762             || $type_iend_2 eq 'h'
20763              
20764             # or this is a short line ending in ;
20765             || ( $n == $nmax
20766             && $this_line_is_semicolon_terminated )
20767             );
20768 1         3 $forced_breakpoint_to_go[$iend_1] = 0;
20769             }
20770 376         1081 return ( 1, $bs_tweak );
20771             } ## end sub recombine_section_3
20772              
20773             } ## end closure recombine_breakpoints
20774              
20775             sub insert_final_ternary_breaks {
20776              
20777 81     81 0 1999 my ( $self, $ri_left, $ri_right ) = @_;
20778              
20779             # Called once per batch to look for and do any final line breaks for
20780             # long ternary chains
20781              
20782 81         173 my $nmax = @{$ri_right} - 1;
  81         250  
20783              
20784             # scan the left and right end tokens of all lines
20785 81         255 my $i_first_colon = -1;
20786 81         276 for my $n ( 0 .. $nmax ) {
20787 264         469 my $il = $ri_left->[$n];
20788 264         416 my $ir = $ri_right->[$n];
20789 264         468 my $typel = $types_to_go[$il];
20790 264         480 my $typer = $types_to_go[$ir];
20791 264 100       715 return if ( $typel eq '?' );
20792 229 100       543 return if ( $typer eq '?' );
20793 228 100       757 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
  20 100       74  
  20         65  
20794 1         2 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
  1         4  
20795             }
20796              
20797             # For long ternary chains,
20798             # if the first : we see has its ? is in the interior
20799             # of a preceding line, then see if there are any good
20800             # breakpoints before the ?.
20801 45 100       302 if ( $i_first_colon > 0 ) {
20802 20         67 my $i_question = $mate_index_to_go[$i_first_colon];
20803 20 100 66     120 if ( defined($i_question) && $i_question > 0 ) {
20804 12         27 my @insert_list;
20805 12         60 foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) {
20806 133         197 my $token = $tokens_to_go[$ii];
20807 133         205 my $type = $types_to_go[$ii];
20808              
20809             # For now, a good break is either a comma or,
20810             # in a long chain, a 'return'.
20811             # Patch for RT #126633: added the $nmax>1 check to avoid
20812             # breaking after a return for a simple ternary. For longer
20813             # chains the break after return allows vertical alignment, so
20814             # it is still done. So perltidy -wba='?' will not break
20815             # immediately after the return in the following statement:
20816             # sub x {
20817             # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
20818             # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
20819             # }
20820 133 100 100     468 if (
      66        
20821             (
20822             $type eq ','
20823             || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
20824             )
20825             && $self->in_same_container_i( $ii, $i_question )
20826             )
20827             {
20828 1         4 push @insert_list, $ii;
20829 1         4 last;
20830             }
20831             }
20832              
20833             # insert any new break points
20834 12 100       110 if (@insert_list) {
20835 1         9 $self->insert_additional_breaks( \@insert_list, $ri_left,
20836             $ri_right );
20837             }
20838             }
20839             }
20840 45         230 return;
20841             } ## end sub insert_final_ternary_breaks
20842              
20843             sub insert_breaks_before_list_opening_containers {
20844              
20845 50     50 0 132 my ( $self, $ri_left, $ri_right ) = @_;
20846              
20847             # This routine is called once per batch to implement the parameters
20848             # --break-before-hash-brace, etc.
20849              
20850             # Nothing to do if none of these parameters has been set
20851 50 50       136 return unless %break_before_container_types;
20852              
20853 50         73 my $nmax = @{$ri_right} - 1;
  50         104  
20854 50 50       119 return unless ( $nmax >= 0 );
20855              
20856 50         89 my $rLL = $self->[_rLL_];
20857              
20858 50         88 my $rbreak_before_container_by_seqno =
20859             $self->[_rbreak_before_container_by_seqno_];
20860 50         88 my $rK_weld_left = $self->[_rK_weld_left_];
20861              
20862             # scan the ends of all lines
20863 50         70 my @insert_list;
20864 50         129 for my $n ( 0 .. $nmax ) {
20865 143         225 my $il = $ri_left->[$n];
20866 143         204 my $ir = $ri_right->[$n];
20867 143 100       284 next unless ( $ir > $il );
20868 122         208 my $Kl = $K_to_go[$il];
20869 122         174 my $Kr = $K_to_go[$ir];
20870 122         167 my $Kend = $Kr;
20871 122         239 my $type_end = $rLL->[$Kr]->[_TYPE_];
20872              
20873             # Backup before any side comment
20874 122 100       252 if ( $type_end eq '#' ) {
20875 4         24 $Kend = $self->K_previous_nonblank($Kr);
20876 4 50       14 next unless defined($Kend);
20877 4         9 $type_end = $rLL->[$Kend]->[_TYPE_];
20878             }
20879              
20880             # Backup to the start of any weld; fix for b1173.
20881 122 50       220 if ($total_weld_count) {
20882 0         0 my $Kend_test = $rK_weld_left->{$Kend};
20883 0 0 0     0 if ( defined($Kend_test) && $Kend_test > $Kl ) {
20884 0         0 $Kend = $Kend_test;
20885 0         0 $Kend_test = $rK_weld_left->{$Kend};
20886             }
20887              
20888             # Do not break if we did not back up to the start of a weld
20889             # (shouldn't happen)
20890 0 0       0 next if ( defined($Kend_test) );
20891             }
20892              
20893 122         186 my $token = $rLL->[$Kend]->[_TOKEN_];
20894 122 100       282 next unless ( $is_opening_token{$token} );
20895 30 50       90 next unless ( $Kl < $Kend - 1 );
20896              
20897 30         60 my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
20898 30 50       77 next unless ( defined($seqno) );
20899              
20900             # Use the flag which was previously set
20901 30 100       78 next unless ( $rbreak_before_container_by_seqno->{$seqno} );
20902              
20903             # Install a break before this opening token.
20904 14         45 my $Kbreak = $self->K_previous_nonblank($Kend);
20905 14         33 my $ibreak = $Kbreak - $Kl + $il;
20906 14 50       66 next if ( $ibreak < $il );
20907 14 50       36 next if ( $nobreak_to_go[$ibreak] );
20908 14         37 push @insert_list, $ibreak;
20909             }
20910              
20911             # insert any new break points
20912 50 100       132 if (@insert_list) {
20913 10         49 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
20914             }
20915 50         116 return;
20916             } ## end sub insert_breaks_before_list_opening_containers
20917              
20918             sub note_added_semicolon {
20919 18     18 0 47 my ( $self, $line_number ) = @_;
20920 18         46 $self->[_last_added_semicolon_at_] = $line_number;
20921 18 100       63 if ( $self->[_added_semicolon_count_] == 0 ) {
20922 15         37 $self->[_first_added_semicolon_at_] = $line_number;
20923             }
20924 18         39 $self->[_added_semicolon_count_]++;
20925 18         84 write_logfile_entry("Added ';' here\n");
20926 18         33 return;
20927             } ## end sub note_added_semicolon
20928              
20929             sub note_deleted_semicolon {
20930 13     13 0 25 my ( $self, $line_number ) = @_;
20931 13         37 $self->[_last_deleted_semicolon_at_] = $line_number;
20932 13 100       43 if ( $self->[_deleted_semicolon_count_] == 0 ) {
20933 2         6 $self->[_first_deleted_semicolon_at_] = $line_number;
20934             }
20935 13         21 $self->[_deleted_semicolon_count_]++;
20936 13         48 write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
20937 13         21 return;
20938             } ## end sub note_deleted_semicolon
20939              
20940             sub note_embedded_tab {
20941 0     0 0 0 my ( $self, $line_number ) = @_;
20942 0         0 $self->[_embedded_tab_count_]++;
20943 0         0 $self->[_last_embedded_tab_at_] = $line_number;
20944 0 0       0 if ( !$self->[_first_embedded_tab_at_] ) {
20945 0         0 $self->[_first_embedded_tab_at_] = $line_number;
20946             }
20947              
20948 0 0       0 if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
20949 0         0 write_logfile_entry("Embedded tabs in quote or pattern\n");
20950             }
20951 0         0 return;
20952             } ## end sub note_embedded_tab
20953              
20954 38     38   429 use constant DEBUG_CORRECT_LP => 0;
  38         150  
  38         66329  
20955              
20956             sub correct_lp_indentation {
20957              
20958             # When the -lp option is used, we need to make a last pass through
20959             # each line to correct the indentation positions in case they differ
20960             # from the predictions. This is necessary because perltidy uses a
20961             # predictor/corrector method for aligning with opening parens. The
20962             # predictor is usually good, but sometimes stumbles. The corrector
20963             # tries to patch things up once the actual opening paren locations
20964             # are known.
20965 134     134 0 324 my ( $self, $ri_first, $ri_last ) = @_;
20966              
20967             # first remove continuation indentation if appropriate
20968 134         223 my $max_line = @{$ri_first} - 1;
  134         267  
20969              
20970             #---------------------------------------------------------------------------
20971             # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
20972             #---------------------------------------------------------------------------
20973              
20974             # The point is that sub 'starting_one_line_block' made one-line blocks based
20975             # on default indentation, not -lp indentation. So some of the one-line
20976             # blocks may be too long when given -lp indentation. We will fix that now
20977             # if possible, using the list of these closing block indexes.
20978 134         313 my $ri_starting_one_line_block =
20979             $self->[_this_batch_]->[_ri_starting_one_line_block_];
20980 134 100       222 if ( @{$ri_starting_one_line_block} ) {
  134         425  
20981 5         37 $self->correct_lp_indentation_pass_1( $ri_first, $ri_last,
20982             $ri_starting_one_line_block );
20983             }
20984              
20985             #-------------------------------------------------------------------
20986             # PASS 2: look for and fix other problems in each line of this batch
20987             #-------------------------------------------------------------------
20988              
20989             # look at each output line ...
20990 134         347 foreach my $line ( 0 .. $max_line ) {
20991 576         891 my $ibeg = $ri_first->[$line];
20992 576         832 my $iend = $ri_last->[$line];
20993              
20994             # looking at each token in this output line ...
20995 576         1043 foreach my $i ( $ibeg .. $iend ) {
20996              
20997             # How many space characters to place before this token
20998             # for special alignment. Actual padding is done in the
20999             # continue block.
21000              
21001             # looking for next unvisited indentation item ...
21002 3869         5314 my $indentation = $leading_spaces_to_go[$i];
21003              
21004             # This is just for indentation objects (c098)
21005 3869 100       6814 next unless ( ref($indentation) );
21006              
21007             # Visit each indentation object just once
21008 3065 100       5978 next if ( $indentation->get_marked() );
21009              
21010             # Mark first visit
21011 608         1616 $indentation->set_marked(1);
21012              
21013             # Skip indentation objects which do not align with container tokens
21014 608         1249 my $align_seqno = $indentation->get_align_seqno();
21015 608 100       1450 next unless ($align_seqno);
21016              
21017             # Skip a container which is entirely on this line
21018 229         629 my $Ko = $self->[_K_opening_container_]->{$align_seqno};
21019 229         527 my $Kc = $self->[_K_closing_container_]->{$align_seqno};
21020 229 50 33     902 if ( defined($Ko) && defined($Kc) ) {
21021 229 100 100     950 next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
21022             }
21023              
21024             # Note on flag '$do_not_pad':
21025             # We want to avoid a situation like this, where the aligner
21026             # inserts whitespace before the '=' to align it with a previous
21027             # '=', because otherwise the parens might become mis-aligned in a
21028             # situation like this, where the '=' has become aligned with the
21029             # previous line, pushing the opening '(' forward beyond where we
21030             # want it.
21031             #
21032             # $mkFloor::currentRoom = '';
21033             # $mkFloor::c_entry = $c->Entry(
21034             # -width => '10',
21035             # -relief => 'sunken',
21036             # ...
21037             # );
21038             #
21039             # We leave it to the aligner to decide how to do this.
21040 130 100 66     457 if ( $line == 1 && $i == $ibeg ) {
21041 50         134 $self->[_this_batch_]->[_do_not_pad_] = 1;
21042             }
21043              
21044             #--------------------------------------------
21045             # Now see what the error is and try to fix it
21046             #--------------------------------------------
21047 130         409 my $closing_index = $indentation->get_closed();
21048 130         328 my $predicted_pos = $indentation->get_spaces();
21049              
21050             # Find actual position:
21051 130         205 my $actual_pos;
21052              
21053 130 100       329 if ( $i == $ibeg ) {
21054              
21055             # Case 1: token is first character of of batch - table lookup
21056 118 100       321 if ( $line == 0 ) {
21057              
21058 7         17 $actual_pos = $predicted_pos;
21059              
21060 7         37 my ( $indent, $offset, $is_leading, $exists ) =
21061             get_saved_opening_indentation($align_seqno);
21062 7 50       24 if ( defined($indent) ) {
21063              
21064             # NOTE: we could use '1' here if no space after
21065             # opening and '2' if want space; it is hardwired at 1
21066             # like -gnu-style. But it is probably best to leave
21067             # this alone because changing it would change
21068             # formatting of much existing code without any
21069             # significant benefit.
21070 7         18 $actual_pos = get_spaces($indent) + $offset + 1;
21071             }
21072             }
21073              
21074             # Case 2: token starts a new line - use length of previous line
21075             else {
21076              
21077 111         247 my $ibegm = $ri_first->[ $line - 1 ];
21078 111         221 my $iendm = $ri_last->[ $line - 1 ];
21079 111         280 $actual_pos = total_line_length( $ibegm, $iendm );
21080              
21081             # follow -pt style
21082 111 100       442 ++$actual_pos
21083             if ( $types_to_go[ $iendm + 1 ] eq 'b' );
21084              
21085             }
21086             }
21087              
21088             # Case 3: $i>$ibeg: token is mid-line - use length to previous token
21089             else {
21090              
21091 12         41 $actual_pos = total_line_length( $ibeg, $i - 1 );
21092              
21093             # for mid-line token, we must check to see if all
21094             # additional lines have continuation indentation,
21095             # and remove it if so. Otherwise, we do not get
21096             # good alignment.
21097 12 100       34 if ( $closing_index > $iend ) {
21098 10         20 my $ibeg_next = $ri_first->[ $line + 1 ];
21099 10 100       30 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
21100 9         25 $self->undo_lp_ci( $line, $i, $closing_index,
21101             $ri_first, $ri_last );
21102             }
21103             }
21104             }
21105              
21106             # By how many spaces (plus or minus) would we need to increase the
21107             # indentation to get alignment with the opening token?
21108 130         265 my $move_right = $actual_pos - $predicted_pos;
21109              
21110 130         215 if (DEBUG_CORRECT_LP) {
21111             my $tok = substr( $tokens_to_go[$i], 0, 8 );
21112             my $avail = $self->get_available_spaces_to_go($ibeg);
21113             print
21114             "CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
21115             }
21116              
21117             # nothing more to do if no error to correct (gnu2.t)
21118 130 100       344 if ( $move_right == 0 ) {
21119 52         203 $indentation->set_recoverable_spaces($move_right);
21120 52         154 next;
21121             }
21122              
21123             # Get any collapsed length defined for -xlp
21124             my $collapsed_length =
21125 78         173 $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
21126 78 100       224 $collapsed_length = 0 unless ( defined($collapsed_length) );
21127              
21128 78         114 if (DEBUG_CORRECT_LP) {
21129             print
21130             "CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
21131             }
21132              
21133             # if we have not seen closure for this indentation in this batch,
21134             # and do not have a collapsed length estimate, we can only pass on
21135             # a request to the vertical aligner
21136 78 100 100     294 if ( $closing_index < 0 && !$collapsed_length ) {
21137 10         53 $indentation->set_recoverable_spaces($move_right);
21138 10         27 next;
21139             }
21140              
21141             # If necessary, look ahead to see if there is really any leading
21142             # whitespace dependent on this whitespace, and also find the
21143             # longest line using this whitespace. Since it is always safe to
21144             # move left if there are no dependents, we only need to do this if
21145             # we may have dependent nodes or need to move right.
21146              
21147 68         207 my $have_child = $indentation->get_have_child();
21148 68         110 my %saw_indentation;
21149 68         124 my $line_count = 1;
21150 68         234 $saw_indentation{$indentation} = $indentation;
21151              
21152             # How far can we move right before we hit the limit?
21153             # let $right_margen = the number of spaces that we can increase
21154             # the current indentation before hitting the maximum line length.
21155 68         124 my $right_margin = 0;
21156              
21157 68 100 100     262 if ( $have_child || $move_right > 0 ) {
21158 67         115 $have_child = 0;
21159              
21160             # include estimated collapsed length for incomplete containers
21161 67         114 my $max_length = 0;
21162 67 100       183 if ( $Kc > $K_to_go[$max_index_to_go] ) {
21163 3         9 $max_length = $collapsed_length + $predicted_pos;
21164             }
21165              
21166 67 100       173 if ( $i == $ibeg ) {
21167 61         136 my $length = total_line_length( $ibeg, $iend );
21168 61 100       198 if ( $length > $max_length ) { $max_length = $length }
  60         143  
21169             }
21170              
21171             # look ahead at the rest of the lines of this batch..
21172 67         171 foreach my $line_t ( $line + 1 .. $max_line ) {
21173 523         747 my $ibeg_t = $ri_first->[$line_t];
21174 523         700 my $iend_t = $ri_last->[$line_t];
21175 523 100       973 last if ( $closing_index <= $ibeg_t );
21176              
21177             # remember all different indentation objects
21178 463         659 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
21179 463         893 $saw_indentation{$indentation_t} = $indentation_t;
21180 463         596 $line_count++;
21181              
21182             # remember longest line in the group
21183 463         763 my $length_t = total_line_length( $ibeg_t, $iend_t );
21184 463 100       1098 if ( $length_t > $max_length ) {
21185 96         201 $max_length = $length_t;
21186             }
21187             }
21188              
21189             $right_margin =
21190 67         156 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
21191             $max_length;
21192 67 50       171 if ( $right_margin < 0 ) { $right_margin = 0 }
  0         0  
21193             }
21194              
21195             my $first_line_comma_count =
21196 68         243 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
  541         1041  
21197 68         248 my $comma_count = $indentation->get_comma_count();
21198 68         204 my $arrow_count = $indentation->get_arrow_count();
21199              
21200             # This is a simple approximate test for vertical alignment:
21201             # if we broke just after an opening paren, brace, bracket,
21202             # and there are 2 or more commas in the first line,
21203             # and there are no '=>'s,
21204             # then we are probably vertically aligned. We could set
21205             # an exact flag in sub break_lists, but this is good
21206             # enough.
21207 68         159 my $indentation_count = keys %saw_indentation;
21208 68   66     399 my $is_vertically_aligned =
21209             ( $i == $ibeg
21210             && $first_line_comma_count > 1
21211             && $indentation_count == 1
21212             && ( $arrow_count == 0 || $arrow_count == $line_count ) );
21213              
21214             # Make the move if possible ..
21215 68 100 100     699 if (
      100        
      66        
      100        
      66        
      100        
21216              
21217             # we can always move left
21218             $move_right < 0
21219              
21220             # -xlp
21221              
21222             # incomplete container
21223             || ( $rOpts_extended_line_up_parentheses
21224             && $Kc > $K_to_go[$max_index_to_go] )
21225             || $closing_index < 0
21226              
21227             # but we should only move right if we are sure it will
21228             # not spoil vertical alignment
21229             || ( $comma_count == 0 )
21230             || ( $comma_count > 0 && !$is_vertically_aligned )
21231             )
21232             {
21233 62 100       261 my $move =
21234             ( $move_right <= $right_margin )
21235             ? $move_right
21236             : $right_margin;
21237              
21238 62         93 if (DEBUG_CORRECT_LP) {
21239             print
21240             "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
21241             }
21242              
21243 62         225 foreach ( keys %saw_indentation ) {
21244 237         752 $saw_indentation{$_}
21245             ->permanently_decrease_available_spaces( -$move );
21246             }
21247             }
21248              
21249             # Otherwise, record what we want and the vertical aligner
21250             # will try to recover it.
21251             else {
21252 6         37 $indentation->set_recoverable_spaces($move_right);
21253             }
21254             } ## end loop over tokens in a line
21255             } ## end loop over lines
21256 134         358 return;
21257             } ## end sub correct_lp_indentation
21258              
21259             sub correct_lp_indentation_pass_1 {
21260 5     5 0 22 my ( $self, $ri_first, $ri_last, $ri_starting_one_line_block ) = @_;
21261              
21262             # So some of the one-line blocks may be too long when given -lp
21263             # indentation. We will fix that now if possible, using the list of these
21264             # closing block indexes.
21265              
21266 5         11 my @ilist = @{$ri_starting_one_line_block};
  5         19  
21267 5 50       16 return unless (@ilist);
21268              
21269 5         13 my $max_line = @{$ri_first} - 1;
  5         13  
21270 5         17 my $inext = shift(@ilist);
21271              
21272             # loop over lines, checking length of each with a one-line block
21273 5         11 my ( $ibeg, $iend );
21274 5         20 foreach my $line ( 0 .. $max_line ) {
21275 15         40 $iend = $ri_last->[$line];
21276 15 100       35 next if ( $inext > $iend );
21277 9         33 $ibeg = $ri_first->[$line];
21278              
21279             # This is just for lines with indentation objects (c098)
21280 9 100       63 my $excess =
21281             ref( $leading_spaces_to_go[$ibeg] )
21282             ? $self->excess_line_length( $ibeg, $iend )
21283             : 0;
21284              
21285 9 50       61 if ( $excess > 0 ) {
21286 0         0 my $available_spaces = $self->get_available_spaces_to_go($ibeg);
21287              
21288 0 0       0 if ( $available_spaces > 0 ) {
21289 0         0 my $delete_want = min( $available_spaces, $excess );
21290 0         0 my $deleted_spaces =
21291             $self->reduce_lp_indentation( $ibeg, $delete_want );
21292 0         0 $available_spaces = $self->get_available_spaces_to_go($ibeg);
21293             }
21294             }
21295              
21296             # skip forward to next one-line block to check
21297 9         31 while (@ilist) {
21298 4         9 $inext = shift @ilist;
21299 4 50       14 next if ( $inext <= $iend );
21300 4 50       9 last if ( $inext > $iend );
21301             }
21302 9 100       28 last if ( $inext <= $iend );
21303             }
21304 5         18 return;
21305             } ## end sub correct_lp_indentation_pass_1
21306              
21307             sub undo_lp_ci {
21308              
21309             # If there is a single, long parameter within parens, like this:
21310             #
21311             # $self->command( "/msg "
21312             # . $infoline->chan
21313             # . " You said $1, but did you know that it's square was "
21314             # . $1 * $1 . " ?" );
21315             #
21316             # we can remove the continuation indentation of the 2nd and higher lines
21317             # to achieve this effect, which is more pleasing:
21318             #
21319             # $self->command("/msg "
21320             # . $infoline->chan
21321             # . " You said $1, but did you know that it's square was "
21322             # . $1 * $1 . " ?");
21323              
21324 9     9 0 21 my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
21325             @_;
21326 9         10 my $max_line = @{$ri_first} - 1;
  9         16  
21327              
21328             # must be multiple lines
21329 9 50       20 return unless $max_line > $line_open;
21330              
21331 9         20 my $lev_start = $levels_to_go[$i_start];
21332 9         12 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
21333              
21334             # see if all additional lines in this container have continuation
21335             # indentation
21336 9         28 my $line_1 = 1 + $line_open;
21337 9         13 my $n = $line_open;
21338              
21339 9         22 while ( ++$n <= $max_line ) {
21340 9         24 my $ibeg = $ri_first->[$n];
21341 9         15 my $iend = $ri_last->[$n];
21342 9 50       22 if ( $ibeg eq $closing_index ) { $n--; last }
  0         0  
  0         0  
21343 9 50       24 return if ( $lev_start != $levels_to_go[$ibeg] );
21344 9 50       31 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
21345 0 0       0 last if ( $closing_index <= $iend );
21346             }
21347              
21348             # we can reduce the indentation of all continuation lines
21349 0         0 my $continuation_line_count = $n - $line_open;
21350 0         0 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
  0         0  
21351             (0) x ($continuation_line_count);
21352 0         0 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
21353 0         0 @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
  0         0  
21354 0         0 return;
21355             } ## end sub undo_lp_ci
21356              
21357             ################################################
21358             # CODE SECTION 10: Code to break long statements
21359             ################################################
21360              
21361 38     38   464 use constant DEBUG_BREAK_LINES => 0;
  38         174  
  38         36073  
21362              
21363             sub break_long_lines {
21364              
21365             #-----------------------------------------------------------
21366             # Break a batch of tokens into lines which do not exceed the
21367             # maximum line length.
21368             #-----------------------------------------------------------
21369              
21370 1110     1110 0 2979 my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
21371              
21372             # Input parameters:
21373             # $saw_good_break - a flag set by break_lists
21374             # $rcolon_list - ref to a list of all the ? and : tokens in the batch,
21375             # in order.
21376             # $rbond_strength_bias - small bond strength bias values set by break_lists
21377              
21378             # Output: returns references to the arrays:
21379             # @i_first
21380             # @i_last
21381             # which contain the indexes $i of the first and last tokens on each
21382             # line.
21383              
21384             # In addition, the array:
21385             # $forced_breakpoint_to_go[$i]
21386             # may be updated to be =1 for any index $i after which there must be
21387             # a break. This signals later routines not to undo the breakpoint.
21388              
21389             # Method:
21390             # This routine is called if a statement is longer than the maximum line
21391             # length, or if a preliminary scanning located desirable break points.
21392             # Sub break_lists has already looked at these tokens and set breakpoints
21393             # (in array $forced_breakpoint_to_go[$i]) where it wants breaks (for
21394             # example after commas, after opening parens, and before closing parens).
21395             # This routine will honor these breakpoints and also add additional
21396             # breakpoints as necessary to keep the line length below the maximum
21397             # requested. It bases its decision on where the 'bond strength' is
21398             # lowest.
21399              
21400 1110         2229 my @i_first = (); # the first index to output
21401 1110         1925 my @i_last = (); # the last index to output
21402 1110         2031 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
21403 1110 100       3029 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
  1         3  
21404              
21405             # Get the 'bond strengths' between tokens
21406 1110         4711 my $rbond_strength_to_go = $self->set_bond_strengths();
21407              
21408             # Add any comma bias set by break_lists
21409 1110 100       2244 if ( @{$rbond_strength_bias} ) {
  1110         3356  
21410 13         53 foreach my $item ( @{$rbond_strength_bias} ) {
  13         55  
21411 31         62 my ( $ii, $bias ) = @{$item};
  31         78  
21412 31 50 33     138 if ( $ii >= 0 && $ii <= $max_index_to_go ) {
21413 31         88 $rbond_strength_to_go->[$ii] += $bias;
21414             }
21415 0         0 elsif (DEVEL_MODE) {
21416             my $KK = $K_to_go[0];
21417             my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
21418             Fault(
21419             "Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n"
21420             );
21421             }
21422             }
21423             }
21424              
21425 1110         2394 my $imin = 0;
21426 1110         2008 my $imax = $max_index_to_go;
21427 1110 50       3384 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
  0         0  
21428 1110 50       3268 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
  0         0  
21429              
21430 1110         1955 my $i_begin = $imin;
21431 1110         1939 my $last_break_strength = NO_BREAK;
21432 1110         1752 my $i_last_break = -1;
21433 1110         1745 my $line_count = 0;
21434              
21435             # see if any ?/:'s are in order
21436 1110         1818 my $colons_in_order = 1;
21437 1110         1958 my $last_tok = EMPTY_STRING;
21438 1110         1729 foreach ( @{$rcolon_list} ) {
  1110         2968  
21439 205 100       605 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
  9         38  
  9         25  
21440 196         391 $last_tok = $_;
21441             }
21442              
21443             # This is a sufficient but not necessary condition for colon chain
21444 1110   100     3225 my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
21445              
21446             #------------------------------------------
21447             # BEGINNING of main loop to set breakpoints
21448             # Keep iterating until we reach the end
21449             #------------------------------------------
21450 1110         3268 while ( $i_begin <= $imax ) {
21451              
21452             #------------------------------------------------------------------
21453             # Find the best next breakpoint based on token-token bond strengths
21454             #------------------------------------------------------------------
21455 3950         11374 my ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ) =
21456             $self->break_lines_inner_loop(
21457              
21458             $i_begin,
21459             $i_last_break,
21460             $imax,
21461             $last_break_strength,
21462             $line_count,
21463             $rbond_strength_to_go,
21464             $saw_good_break,
21465              
21466             );
21467              
21468             # Now make any adjustments required by ternary breakpoint rules
21469 3950 100       7091 if ( @{$rcolon_list} ) {
  3950         8586  
21470              
21471 439         815 my $i_next_nonblank = $inext_to_go[$i_lowest];
21472              
21473             #-------------------------------------------------------
21474             # ?/: rule 1 : if a break here will separate a '?' on this
21475             # line from its closing ':', then break at the '?' instead.
21476             # But do not break a sequential chain of ?/: statements
21477             #-------------------------------------------------------
21478 439 100       1023 if ( !$is_colon_chain ) {
21479 383         1001 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
21480 1835 100       3928 next unless ( $tokens_to_go[$i] eq '?' );
21481              
21482             # do not break if statement is broken by side comment
21483             next
21484 66 50 33     454 if ( $tokens_to_go[$max_index_to_go] eq '#'
21485             && terminal_type_i( 0, $max_index_to_go ) !~
21486             /^[\;\}]$/ );
21487              
21488             # no break needed if matching : is also on the line
21489             next
21490 66 100 66     496 if ( defined( $mate_index_to_go[$i] )
21491             && $mate_index_to_go[$i] <= $i_next_nonblank );
21492              
21493 5         16 $i_lowest = $i;
21494 5 100       27 if ( $want_break_before{'?'} ) { $i_lowest-- }
  4         9  
21495 5         11 $i_next_nonblank = $inext_to_go[$i_lowest];
21496 5         14 last;
21497             }
21498             }
21499              
21500 439         881 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
21501              
21502             #-------------------------------------------------------------
21503             # ?/: rule 2 : if we break at a '?', then break at its ':'
21504             #
21505             # Note: this rule is also in sub break_lists to handle a break
21506             # at the start and end of a line (in case breaks are dictated
21507             # by side comments).
21508             #-------------------------------------------------------------
21509 439 100       1494 if ( $next_nonblank_type eq '?' ) {
    100          
21510 32         193 $self->set_closing_breakpoint($i_next_nonblank);
21511             }
21512             elsif ( $types_to_go[$i_lowest] eq '?' ) {
21513 4         20 $self->set_closing_breakpoint($i_lowest);
21514             }
21515              
21516             #--------------------------------------------------------
21517             # ?/: rule 3 : if we break at a ':' then we save
21518             # its location for further work below. We may need to go
21519             # back and break at its '?'.
21520             #--------------------------------------------------------
21521 439 100       1396 if ( $next_nonblank_type eq ':' ) {
    100          
21522 88         250 push @i_colon_breaks, $i_next_nonblank;
21523             }
21524             elsif ( $types_to_go[$i_lowest] eq ':' ) {
21525 4         27 push @i_colon_breaks, $i_lowest;
21526             }
21527              
21528             # here we should set breaks for all '?'/':' pairs which are
21529             # separated by this line
21530             }
21531              
21532             # guard against infinite loop (should never happen)
21533 3950 50       8687 if ( $i_lowest <= $i_last_break ) {
21534 0         0 DEVEL_MODE
21535             && Fault("i_lowest=$i_lowest <= i_last_break=$i_last_break\n");
21536 0         0 $i_lowest = $imax;
21537             }
21538              
21539             DEBUG_BREAK_LINES
21540 3950         5446 && print STDOUT
21541             "BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
21542              
21543 3950         5757 $line_count++;
21544              
21545             # save this line segment, after trimming blanks at the ends
21546 3950 50       10521 push( @i_first,
21547             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
21548 3950 100       8716 push( @i_last,
21549             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
21550              
21551             # set a forced breakpoint at a container opening, if necessary, to
21552             # signal a break at a closing container. Excepting '(' for now.
21553 3950 100 100     15889 if (
      100        
21554             (
21555             $tokens_to_go[$i_lowest] eq '{'
21556             || $tokens_to_go[$i_lowest] eq '['
21557             )
21558             && !$forced_breakpoint_to_go[$i_lowest]
21559             )
21560             {
21561 10         69 $self->set_closing_breakpoint($i_lowest);
21562             }
21563              
21564             # get ready to find the next breakpoint
21565 3950         5970 $last_break_strength = $lowest_strength;
21566 3950         5708 $i_last_break = $i_lowest;
21567 3950         5834 $i_begin = $i_lowest + 1;
21568              
21569             # skip past a blank
21570 3950 100 100     14132 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
21571 2295         5074 $i_begin++;
21572             }
21573             }
21574              
21575             #-------------------------------------------------
21576             # END of main loop to set continuation breakpoints
21577             #-------------------------------------------------
21578              
21579             #-----------------------------------------------------------
21580             # ?/: rule 4 -- if we broke at a ':', then break at
21581             # corresponding '?' unless this is a chain of ?: expressions
21582             #-----------------------------------------------------------
21583 1110 100       3634 if (@i_colon_breaks) {
21584 49   100     353 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
21585 49 100       217 if ( !$is_chain ) {
21586 38         241 $self->do_colon_breaks( \@i_colon_breaks, \@i_first, \@i_last );
21587             }
21588             }
21589              
21590 1110         5180 return ( \@i_first, \@i_last, $rbond_strength_to_go );
21591             } ## end sub break_long_lines
21592              
21593             # small bond strength numbers to help break ties
21594 38     38   397 use constant TINY_BIAS => 0.0001;
  38         121  
  38         2699  
21595 38     38   332 use constant MAX_BIAS => 0.001;
  38         140  
  38         73795  
21596              
21597             sub break_lines_inner_loop {
21598              
21599             #-----------------------------------------------------------------
21600             # Find the best next breakpoint in index range ($i_begin .. $imax)
21601             # which, if possible, does not exceed the maximum line length.
21602             #-----------------------------------------------------------------
21603              
21604             my (
21605 3950     3950 0 9279 $self, #
21606              
21607             $i_begin,
21608             $i_last_break,
21609             $imax,
21610             $last_break_strength,
21611             $line_count,
21612             $rbond_strength_to_go,
21613             $saw_good_break,
21614              
21615             ) = @_;
21616              
21617             # Given:
21618             # $i_begin = first index of range
21619             # $i_last_break = index of previous break
21620             # $imax = last index of range
21621             # $last_break_strength = bond strength of last break
21622             # $line_count = number of output lines so far
21623             # $rbond_strength_to_go = ref to array of bond strengths
21624             # $saw_good_break = true if old line had a good breakpoint
21625              
21626             # Returns:
21627             # $i_lowest = index of best breakpoint
21628             # $lowest_strength = 'bond strength' at best breakpoint
21629             # $leading_alignment_type = special token type after break
21630             # $Msg = string of debug info
21631              
21632 3950         6415 my $Msg = EMPTY_STRING;
21633 3950         5705 my $strength = NO_BREAK;
21634 3950         6415 my $i_test = $i_begin - 1;
21635 3950         5626 my $i_lowest = -1;
21636 3950         6196 my $starting_sum = $summed_lengths_to_go[$i_begin];
21637 3950         5759 my $lowest_strength = NO_BREAK;
21638 3950         5936 my $leading_alignment_type = EMPTY_STRING;
21639 3950         8834 my $leading_spaces = leading_spaces_to_go($i_begin);
21640 3950         7953 my $maximum_line_length =
21641             $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
21642             DEBUG_BREAK_LINES
21643 3950         5439 && do {
21644             $Msg .= "updating leading spaces to be $leading_spaces at i=$i_begin\n";
21645             };
21646              
21647             # Do not separate an isolated bare word from an opening paren.
21648             # Alternate Fix #2 for issue b1299. This waits as long as possible
21649             # to make the decision.
21650 3950 100 100     12878 if ( $types_to_go[$i_begin] eq 'i'
21651             && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
21652             {
21653 56         188 my $i_next_nonblank = $inext_to_go[$i_begin];
21654 56 100       256 if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
21655 6         21 $rbond_strength_to_go->[$i_begin] = NO_BREAK;
21656             }
21657             }
21658              
21659             # Avoid a break which would strand a single punctuation
21660             # token. For example, we do not want to strand a leading
21661             # '.' which is followed by a long quoted string.
21662             # But note that we do want to do this with -extrude (l=1)
21663             # so please test any changes to this code on -extrude.
21664 3950 100 100     20296 if (
      100        
      100        
      100        
      100        
21665             ( $i_begin < $imax )
21666             && ( $tokens_to_go[$i_begin] eq $types_to_go[$i_begin] )
21667             && !$forced_breakpoint_to_go[$i_begin]
21668             && !(
21669              
21670             # Allow break after a closing eval brace. This is an
21671             # approximate way to simulate a forced breakpoint made in
21672             # Section B below. No differences have been found, but if
21673             # necessary the full logic of Section B could be used here
21674             # (see c165).
21675             $tokens_to_go[$i_begin] eq '}'
21676             && $block_type_to_go[$i_begin]
21677             && $block_type_to_go[$i_begin] eq 'eval'
21678             )
21679             && (
21680             (
21681             $leading_spaces +
21682             $summed_lengths_to_go[ $i_begin + 1 ] -
21683             $starting_sum
21684             ) < $maximum_line_length
21685             )
21686             )
21687             {
21688 521         1717 $i_test = min( $imax, $inext_to_go[$i_begin] ) - 1;
21689 521         822 DEBUG_BREAK_LINES && do {
21690             $Msg .= " :skip ahead at i=$i_test";
21691             };
21692             }
21693              
21694             #-------------------------------------------------------
21695             # Begin INNER_LOOP over the indexes in the _to_go arrays
21696             #-------------------------------------------------------
21697 3950         8486 while ( ++$i_test <= $imax ) {
21698 33260         47728 my $type = $types_to_go[$i_test];
21699 33260         44683 my $token = $tokens_to_go[$i_test];
21700 33260         43420 my $i_next_nonblank = $inext_to_go[$i_test];
21701 33260         44354 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
21702 33260         45294 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
21703 33260         45275 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
21704              
21705             #---------------------------------------------------------------
21706             # Section A: Get token-token strength and handle any adjustments
21707             #---------------------------------------------------------------
21708              
21709             # adjustments to the previous bond strength may have been made, and
21710             # we must keep the bond strength of a token and its following blank
21711             # the same;
21712 33260         43819 my $last_strength = $strength;
21713 33260         46177 $strength = $rbond_strength_to_go->[$i_test];
21714 33260 100       57786 if ( $type eq 'b' ) { $strength = $last_strength }
  10914         15359  
21715              
21716             # reduce strength a bit to break ties at an old comma breakpoint ...
21717 33260 100 100     83773 if (
      66        
      100        
      100        
      100        
21718              
21719             $old_breakpoint_to_go[$i_test]
21720              
21721             # Patch: limited to just commas to avoid blinking states
21722             && $type eq ','
21723              
21724             # which is a 'good' breakpoint, meaning ...
21725             # we don't want to break before it
21726             && !$want_break_before{$type}
21727              
21728             # and either we want to break before the next token
21729             # or the next token is not short (i.e. not a '*', '/' etc.)
21730             && $i_next_nonblank <= $imax
21731             && ( $want_break_before{$next_nonblank_type}
21732             || $token_lengths_to_go[$i_next_nonblank] > 2
21733             || $next_nonblank_type eq ','
21734             || $is_opening_type{$next_nonblank_type} )
21735             )
21736             {
21737 503         945 $strength -= TINY_BIAS;
21738 503         722 DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
21739             }
21740              
21741             # otherwise increase strength a bit if this token would be at the
21742             # maximum line length. This is necessary to avoid blinking
21743             # in the above example when the -iob flag is added.
21744             else {
21745 32757         50114 my $len =
21746             $leading_spaces +
21747             $summed_lengths_to_go[ $i_test + 1 ] -
21748             $starting_sum;
21749 32757 100       57086 if ( $len >= $maximum_line_length ) {
21750 323         645 $strength += TINY_BIAS;
21751 323         553 DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
21752             }
21753             }
21754              
21755             #-------------------------------------
21756             # Section B: Handle forced breakpoints
21757             #-------------------------------------
21758 33260         41657 my $must_break;
21759              
21760             # Force an immediate break at certain operators
21761             # with lower level than the start of the line,
21762             # unless we've already seen a better break.
21763             #
21764             # Note on an issue with a preceding '?' :
21765              
21766             # There may be a break at a previous ? if the line is long. Because
21767             # of this we do not want to force a break if there is a previous ? on
21768             # this line. For now the best way to do this is to not break if we
21769             # have seen a lower strength point, which is probably a ?.
21770             #
21771             # Example of unwanted breaks we are avoiding at a '.' following a ?
21772             # from pod2html using perltidy -gnu:
21773             # )
21774             # ? "\n&lt;A NAME=\""
21775             # . $value
21776             # . "\"&gt;\n$text&lt;/A&gt;\n"
21777             # : "\n$type$pod2.html\#" . $value . "\"&gt;$text&lt;\/A&gt;\n";
21778 33260 100 100     88627 if (
      100        
      100        
21779             ( $strength <= $lowest_strength )
21780             && ( $nesting_depth_to_go[$i_begin] >
21781             $nesting_depth_to_go[$i_next_nonblank] )
21782             && (
21783             $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
21784             || (
21785             $next_nonblank_type eq 'k'
21786              
21787             ## /^(and|or)$/ # note: includes 'xor' now
21788             && $is_and_or{$next_nonblank_token}
21789             )
21790             )
21791             )
21792             {
21793 28         127 $self->set_forced_breakpoint($i_next_nonblank);
21794             DEBUG_BREAK_LINES
21795 28         51 && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
21796             }
21797              
21798 33260 100 100     171202 if (
      100        
      66        
      100        
      66        
      100        
      100        
      100        
      100        
      100        
21799              
21800             # Try to put a break where requested by break_lists
21801             $forced_breakpoint_to_go[$i_test]
21802              
21803             # break between ) { in a continued line so that the '{' can
21804             # be outdented
21805             # See similar logic in break_lists which catches instances
21806             # where a line is just something like ') {'. We have to
21807             # be careful because the corresponding block keyword might
21808             # not be on the first line, such as 'for' here:
21809             #
21810             # eval {
21811             # for ("a") {
21812             # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
21813             # }
21814             # };
21815             #
21816             || (
21817             $line_count
21818             && ( $token eq ')' )
21819             && ( $next_nonblank_type eq '{' )
21820             && ($next_nonblank_block_type)
21821             && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
21822              
21823             # RT #104427: Dont break before opening sub brace because
21824             # sub block breaks handled at higher level, unless
21825             # it looks like the preceding list is long and broken
21826             && !(
21827              
21828             (
21829             $next_nonblank_block_type =~ /$SUB_PATTERN/
21830             || $next_nonblank_block_type =~ /$ASUB_PATTERN/
21831             )
21832             && ( $nesting_depth_to_go[$i_begin] ==
21833             $nesting_depth_to_go[$i_next_nonblank] )
21834             )
21835              
21836             && !$rOpts_opening_brace_always_on_right
21837             )
21838              
21839             # There is an implied forced break at a terminal opening brace
21840             || ( ( $type eq '{' ) && ( $i_test == $imax ) )
21841             )
21842             {
21843              
21844             # Forced breakpoints must sometimes be overridden, for example
21845             # because of a side comment causing a NO_BREAK. It is easier
21846             # to catch this here than when they are set.
21847 2702 50       6506 if ( $strength < NO_BREAK - 1 ) {
21848 2702         4428 $strength = $lowest_strength - TINY_BIAS;
21849 2702         4013 $must_break = 1;
21850             DEBUG_BREAK_LINES
21851 2702         3773 && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
21852             }
21853             }
21854              
21855             # quit if a break here would put a good terminal token on
21856             # the next line and we already have a possible break
21857 33260 100 100     105678 if (
      100        
      100        
21858             ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
21859             && !$must_break
21860             && (
21861             (
21862             $leading_spaces +
21863             $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
21864             $starting_sum
21865             ) > $maximum_line_length
21866             )
21867             )
21868             {
21869 45 100       174 if ( $i_lowest >= 0 ) {
21870 11         22 DEBUG_BREAK_LINES && do {
21871             $Msg .= " :quit at good terminal='$next_nonblank_type'";
21872             };
21873 11         24 last;
21874             }
21875             }
21876              
21877             #------------------------------------------------------------
21878             # Section C: Look for the lowest bond strength between tokens
21879             #------------------------------------------------------------
21880 33249 100 100     77856 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) {
21881              
21882             # break at previous best break if it would have produced
21883             # a leading alignment of certain common tokens, and it
21884             # is different from the latest candidate break
21885 14250 100       24411 if ($leading_alignment_type) {
21886 108         190 DEBUG_BREAK_LINES && do {
21887             $Msg .=
21888             " :last at leading_alignment='$leading_alignment_type'";
21889             };
21890 108         211 last;
21891             }
21892              
21893             # Force at least one breakpoint if old code had good
21894             # break It is only called if a breakpoint is required or
21895             # desired. This will probably need some adjustments
21896             # over time. A goal is to try to be sure that, if a new
21897             # side comment is introduced into formatted text, then
21898             # the same breakpoints will occur. scbreak.t
21899 14142 50 100     30305 if (
      100        
      66        
      100        
      66        
      66        
      33        
21900             $i_test == $imax # we are at the end
21901             && !$forced_breakpoint_count
21902             && $saw_good_break # old line had good break
21903             && $type =~ /^[#;\{]$/ # and this line ends in
21904             # ';' or side comment
21905             && $i_last_break < 0 # and we haven't made a break
21906             && $i_lowest >= 0 # and we saw a possible break
21907             && $i_lowest < $imax - 1 # (but not just before this ;)
21908             && $strength - $lowest_strength < 0.5 * WEAK # and it's good
21909             )
21910             {
21911              
21912 6         12 DEBUG_BREAK_LINES && do {
21913             $Msg .= " :last at good old break\n";
21914             };
21915 6         12 last;
21916             }
21917              
21918             # Do not skip past an important break point in a short final
21919             # segment. For example, without this check we would miss the
21920             # break at the final / in the following code:
21921             #
21922             # $depth_stop =
21923             # ( $tau * $mass_pellet * $q_0 *
21924             # ( 1. - exp( -$t_stop / $tau ) ) -
21925             # 4. * $pi * $factor * $k_ice *
21926             # ( $t_melt - $t_ice ) *
21927             # $r_pellet *
21928             # $t_stop ) /
21929             # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
21930             #
21931 14136 100 100     43545 if (
      66        
      100        
      100        
      66        
21932             $line_count > 2
21933             && $i_lowest >= 0 # and we saw a possible break
21934             && $i_lowest < $i_test
21935             && $i_test > $imax - 2
21936             && $nesting_depth_to_go[$i_begin] >
21937             $nesting_depth_to_go[$i_lowest]
21938             && $lowest_strength < $last_break_strength - .5 * WEAK
21939             )
21940             {
21941             # Make this break for math operators for now
21942 6         18 my $ir = $inext_to_go[$i_lowest];
21943 6         29 my $il = iprev_to_go($ir);
21944 6 100 100     64 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
21945             || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
21946             {
21947 3         6 DEBUG_BREAK_LINES && do {
21948             $Msg .= " :last-noskip_short";
21949             };
21950 3         8 last;
21951             }
21952             }
21953              
21954             # Update the minimum bond strength location
21955 14133         19518 $lowest_strength = $strength;
21956 14133         18246 $i_lowest = $i_test;
21957 14133 100       24559 if ($must_break) {
21958 2702         3746 DEBUG_BREAK_LINES && do {
21959             $Msg .= " :last-must_break";
21960             };
21961 2702         5043 last;
21962             }
21963              
21964             # set flags to remember if a break here will produce a
21965             # leading alignment of certain common tokens
21966 11431 100 100     40184 if ( $line_count > 0
      100        
21967             && $i_test < $imax
21968             && ( $lowest_strength - $last_break_strength <= MAX_BIAS ) )
21969             {
21970 3514         8660 my $i_last_end = iprev_to_go($i_begin);
21971 3514         6481 my $tok_beg = $tokens_to_go[$i_begin];
21972 3514         5159 my $type_beg = $types_to_go[$i_begin];
21973 3514 50 100     15956 if (
      66        
      66        
      66        
      100        
      33        
      66        
      33        
      100        
21974              
21975             # check for leading alignment of certain tokens
21976             (
21977             $tok_beg eq $next_nonblank_token
21978             && $is_chain_operator{$tok_beg}
21979             && ( $type_beg eq 'k'
21980             || $type_beg eq $tok_beg )
21981             && $nesting_depth_to_go[$i_begin] >=
21982             $nesting_depth_to_go[$i_next_nonblank]
21983             )
21984              
21985             || ( $tokens_to_go[$i_last_end] eq $token
21986             && $is_chain_operator{$token}
21987             && ( $type eq 'k' || $type eq $token )
21988             && $nesting_depth_to_go[$i_last_end] >=
21989             $nesting_depth_to_go[$i_test] )
21990             )
21991             {
21992 109         274 $leading_alignment_type = $next_nonblank_type;
21993             }
21994             }
21995             }
21996              
21997             #-----------------------------------------------------------
21998             # Section D: See if the maximum line length will be exceeded
21999             #-----------------------------------------------------------
22000              
22001             # Quit if there are no more tokens to test
22002 30430 100       51422 last if ( $i_test >= $imax );
22003              
22004             # Keep going if we have not reached the limit
22005 29783         46436 my $excess =
22006             $leading_spaces +
22007             $summed_lengths_to_go[ $i_test + 2 ] -
22008             $starting_sum -
22009             $maximum_line_length;
22010              
22011 29783 100       48334 if ( $excess < 0 ) {
    100          
22012 29205         57331 next;
22013             }
22014             elsif ( $excess == 0 ) {
22015              
22016             # To prevent blinkers we will avoid leaving a token exactly at
22017             # the line length limit unless it is the last token or one of
22018             # several "good" types.
22019             #
22020             # The following code was a blinker with -pbp before this
22021             # modification:
22022             # $last_nonblank_token eq '('
22023             # && $is_indirect_object_taker{ $paren_type
22024             # [$paren_depth] }
22025             # The issue causing the problem is that if the
22026             # term [$paren_depth] gets broken across a line then
22027             # the whitespace routine doesn't see both opening and closing
22028             # brackets and will format like '[ $paren_depth ]'. This
22029             # leads to an oscillation in length depending if we break
22030             # before the closing bracket or not.
22031 157 100 100     1469 if ( $i_test + 1 < $imax
      100        
22032             && $next_nonblank_type ne ','
22033             && !$is_closing_type{$next_nonblank_type} )
22034             {
22035             # too long
22036 115         270 DEBUG_BREAK_LINES && do {
22037             $Msg .= " :too_long";
22038             }
22039             }
22040             else {
22041 42         213 next;
22042             }
22043             }
22044             else {
22045             # too long
22046             }
22047              
22048             # a break here makes the line too long ...
22049              
22050 536         872 DEBUG_BREAK_LINES && do {
22051             my $ltok = $token;
22052             my $rtok =
22053             $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING;
22054             my $i_testp2 = $i_test + 2;
22055             if ( $i_testp2 > $max_index_to_go + 1 ) {
22056             $i_testp2 = $max_index_to_go + 1;
22057             }
22058             if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
22059             if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
22060             print STDOUT
22061             "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] str=$strength $ltok $rtok\n";
22062             };
22063              
22064             # Exception: allow one extra terminal token after exceeding line length
22065             # if it would strand this token.
22066 536 100 100     2343 if ( $i_lowest == $i_test
      100        
      100        
      100        
22067             && $token_lengths_to_go[$i_test] > 1
22068             && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
22069             && $rOpts_fuzzy_line_length )
22070             {
22071 3         12 DEBUG_BREAK_LINES && do {
22072             $Msg .= " :do_not_strand next='$next_nonblank_type'";
22073             };
22074 3         11 next;
22075             }
22076              
22077             # Stop if here if we have a solution and the line will be too long
22078 533 100       1329 if ( $i_lowest >= 0 ) {
22079 473         757 DEBUG_BREAK_LINES && do {
22080             $Msg .=
22081             " :Done-too_long && i_lowest=$i_lowest at itest=$i_test, imax=$imax";
22082             };
22083 473         856 last;
22084             }
22085             }
22086              
22087             #-----------------------------------------------------
22088             # End INNER_LOOP over the indexes in the _to_go arrays
22089             #-----------------------------------------------------
22090              
22091             # Be sure we return an index in the range ($ibegin .. $imax).
22092             # We will break at imax if no other break was found.
22093 3950 50       8674 if ( $i_lowest < 0 ) { $i_lowest = $imax }
  0         0  
22094              
22095 3950         15738 return ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg );
22096             } ## end sub break_lines_inner_loop
22097              
22098             sub do_colon_breaks {
22099 38     38 0 196 my ( $self, $ri_colon_breaks, $ri_first, $ri_last ) = @_;
22100              
22101             # using a simple method for deciding if we are in a ?/: chain --
22102             # this is a chain if it has multiple ?/: pairs all in order;
22103             # otherwise not.
22104             # Note that if line starts in a ':' we count that above as a break
22105              
22106 38         109 my @insert_list = ();
22107 38         84 foreach ( @{$ri_colon_breaks} ) {
  38         148  
22108 65         149 my $i_question = $mate_index_to_go[$_];
22109 65 100       197 if ( defined($i_question) ) {
22110 57 100       199 if ( $want_break_before{'?'} ) {
22111 56         147 $i_question = iprev_to_go($i_question);
22112             }
22113              
22114 57 50       219 if ( $i_question >= 0 ) {
22115 57         121 push @insert_list, $i_question;
22116             }
22117             }
22118 65         266 $self->insert_additional_breaks( \@insert_list, $ri_first, $ri_last );
22119             }
22120 38         104 return;
22121             } ## end sub do_colon_breaks
22122              
22123             ###########################################
22124             # CODE SECTION 11: Code to break long lists
22125             ###########################################
22126              
22127             { ## begin closure break_lists
22128              
22129             # These routines and variables are involved in finding good
22130             # places to break long lists.
22131              
22132 38     38   415 use constant DEBUG_BREAK_LISTS => 0;
  38         153  
  38         36734  
22133              
22134             my (
22135              
22136             $block_type,
22137             $current_depth,
22138             $depth,
22139             $i,
22140             $i_last_colon,
22141             $i_line_end,
22142             $i_line_start,
22143             $i_last_nonblank_token,
22144             $last_nonblank_block_type,
22145             $last_nonblank_token,
22146             $last_nonblank_type,
22147             $last_old_breakpoint_count,
22148             $minimum_depth,
22149             $next_nonblank_block_type,
22150             $next_nonblank_token,
22151             $next_nonblank_type,
22152             $old_breakpoint_count,
22153             $starting_breakpoint_count,
22154             $starting_depth,
22155             $token,
22156             $type,
22157             $type_sequence,
22158              
22159             );
22160              
22161             my (
22162              
22163             @breakpoint_stack,
22164             @breakpoint_undo_stack,
22165             @comma_index,
22166             @container_type,
22167             @identifier_count_stack,
22168             @index_before_arrow,
22169             @interrupted_list,
22170             @item_count_stack,
22171             @last_comma_index,
22172             @last_dot_index,
22173             @last_nonblank_type,
22174             @old_breakpoint_count_stack,
22175             @opening_structure_index_stack,
22176             @rfor_semicolon_list,
22177             @has_old_logical_breakpoints,
22178             @rand_or_list,
22179             @i_equals,
22180             @override_cab3,
22181             @type_sequence_stack,
22182              
22183             );
22184              
22185             # these arrays must retain values between calls
22186             my ( @has_broken_sublist, @dont_align, @want_comma_break );
22187              
22188             my $length_tol;
22189             my $lp_tol_boost;
22190              
22191             sub initialize_break_lists {
22192 555     555 0 2138 @dont_align = ();
22193 555         1794 @has_broken_sublist = ();
22194 555         1445 @want_comma_break = ();
22195              
22196             #---------------------------------------------------
22197             # Set tolerances to prevent formatting instabilities
22198             #---------------------------------------------------
22199              
22200             # Define tolerances to use when checking if closed
22201             # containers will fit on one line. This is necessary to avoid
22202             # formatting instability. The basic tolerance is based on the
22203             # following:
22204              
22205             # - Always allow for at least one extra space after a closing token so
22206             # that we do not strand a comma or semicolon. (oneline.t).
22207              
22208             # - Use an increased line length tolerance when -ci > -i to avoid
22209             # blinking states (case b923 and others).
22210 555         2531 $length_tol =
22211             1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
22212              
22213             # In addition, it may be necessary to use a few extra tolerance spaces
22214             # when -lp is used and/or when -xci is used. The history of this
22215             # so far is as follows:
22216              
22217             # FIX1: At least 3 characters were been found to be required for -lp
22218             # to fixes cases b1059 b1063 b1117.
22219              
22220             # FIX2: Further testing showed that we need a total of 3 extra spaces
22221             # when -lp is set for non-lists, and at least 2 spaces when -lp and
22222             # -xci are set.
22223             # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
22224             # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
22225             # b1165
22226              
22227             # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
22228             # 'find_token_starting_list' to go back before an initial blank space.
22229             # This fixed these three cases, and allowed the tolerances to be
22230             # reduced to continue to fix all other known cases of instability.
22231             # This gives the current tolerance formulation.
22232              
22233 555         1369 $lp_tol_boost = 0;
22234              
22235 555 100       2142 if ($rOpts_line_up_parentheses) {
22236              
22237             # boost tol for combination -lp -xci
22238 31 100       236 if ($rOpts_extended_continuation_indentation) {
22239 3         9 $lp_tol_boost = 2;
22240             }
22241              
22242             # boost tol for combination -lp and any -vtc > 0, but only for
22243             # non-list containers
22244             else {
22245 28         138 foreach ( keys %closing_vertical_tightness ) {
22246             next
22247 168 50       440 unless ( $closing_vertical_tightness{$_} );
22248 0         0 $lp_tol_boost = 1; # Fixes B1193;
22249 0         0 last;
22250             }
22251             }
22252             }
22253              
22254             # Define a level where list formatting becomes highly stressed and
22255             # needs to be simplified. Introduced for case b1262.
22256             # $list_stress_level = min($stress_level_alpha, $stress_level_beta + 2);
22257             # This is now '$high_stress_level'.
22258              
22259 555         1234 return;
22260             } ## end sub initialize_break_lists
22261              
22262             # routine to define essential variables when we go 'up' to
22263             # a new depth
22264             sub check_for_new_minimum_depth {
22265 2363     2363 0 5149 my ( $self, $depth_t, $seqno ) = @_;
22266 2363 50       5360 if ( $depth_t < $minimum_depth ) {
22267              
22268 2363         3763 $minimum_depth = $depth_t;
22269              
22270             # these arrays need not retain values between calls
22271 2363         4336 my $old_seqno = $type_sequence_stack[$depth_t];
22272 2363   100     8397 my $changed_seqno = !defined($old_seqno) || $old_seqno != $seqno;
22273 2363         4126 $type_sequence_stack[$depth_t] = $seqno;
22274 2363         4041 $override_cab3[$depth_t] = undef;
22275 2363 50 33     6615 if ( $rOpts_comma_arrow_breakpoints == 3 && $seqno ) {
22276 0         0 $override_cab3[$depth_t] = $self->[_roverride_cab3_]->{$seqno};
22277             }
22278 2363         3953 $breakpoint_stack[$depth_t] = $starting_breakpoint_count;
22279 2363         4351 $container_type[$depth_t] = EMPTY_STRING;
22280 2363         3795 $identifier_count_stack[$depth_t] = 0;
22281 2363         3737 $index_before_arrow[$depth_t] = -1;
22282 2363         3633 $interrupted_list[$depth_t] = 1;
22283 2363         4111 $item_count_stack[$depth_t] = 0;
22284 2363         3937 $last_nonblank_type[$depth_t] = EMPTY_STRING;
22285 2363         3645 $opening_structure_index_stack[$depth_t] = -1;
22286              
22287 2363         3657 $breakpoint_undo_stack[$depth_t] = undef;
22288 2363         3547 $comma_index[$depth_t] = undef;
22289 2363         3555 $last_comma_index[$depth_t] = undef;
22290 2363         3407 $last_dot_index[$depth_t] = undef;
22291 2363         3429 $old_breakpoint_count_stack[$depth_t] = undef;
22292 2363         3588 $has_old_logical_breakpoints[$depth_t] = 0;
22293 2363         5057 $rand_or_list[$depth_t] = [];
22294 2363         4496 $rfor_semicolon_list[$depth_t] = [];
22295 2363         3834 $i_equals[$depth_t] = -1;
22296              
22297             # these arrays must retain values between calls
22298 2363 100 100     9037 if ( $changed_seqno || !defined( $has_broken_sublist[$depth_t] ) ) {
22299 883         1787 $dont_align[$depth_t] = 0;
22300 883         1677 $has_broken_sublist[$depth_t] = 0;
22301 883         1892 $want_comma_break[$depth_t] = 0;
22302             }
22303             }
22304 2363         3930 return;
22305             } ## end sub check_for_new_minimum_depth
22306              
22307             # routine to decide which commas to break at within a container;
22308             # returns:
22309             # $bp_count = number of comma breakpoints set
22310             # $do_not_break_apart = a flag indicating if container need not
22311             # be broken open
22312             sub set_comma_breakpoints {
22313              
22314 543     543 0 1444 my ( $self, $dd, $rbond_strength_bias ) = @_;
22315 543         960 my $bp_count = 0;
22316 543         917 my $do_not_break_apart = 0;
22317              
22318             # anything to do?
22319 543 50       1493 if ( $item_count_stack[$dd] ) {
22320              
22321             # Do not break a list unless there are some non-line-ending commas.
22322             # This avoids getting different results with only non-essential
22323             # commas, and fixes b1192.
22324 543         1063 my $seqno = $type_sequence_stack[$dd];
22325              
22326             my $real_comma_count =
22327 543 50       2113 $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
22328              
22329             # handle commas not in containers...
22330 543 100       1906 if ( $dont_align[$dd] ) {
    100          
22331 40         229 $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias );
22332             }
22333              
22334             # handle commas within containers...
22335             elsif ($real_comma_count) {
22336 497         855 my $fbc = $forced_breakpoint_count;
22337              
22338             # always open comma lists not preceded by keywords,
22339             # barewords, identifiers (that is, anything that doesn't
22340             # look like a function call)
22341 497         1837 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
22342              
22343 497         7194 $self->table_maker(
22344             {
22345             depth => $dd,
22346             i_opening_paren => $opening_structure_index_stack[$dd],
22347             i_closing_paren => $i,
22348             item_count => $item_count_stack[$dd],
22349             identifier_count => $identifier_count_stack[$dd],
22350             rcomma_index => $comma_index[$dd],
22351             next_nonblank_type => $next_nonblank_type,
22352             list_type => $container_type[$dd],
22353             interrupted => $interrupted_list[$dd],
22354             rdo_not_break_apart => \$do_not_break_apart,
22355             must_break_open => $must_break_open,
22356             has_broken_sublist => $has_broken_sublist[$dd],
22357             }
22358             );
22359 497         1736 $bp_count = $forced_breakpoint_count - $fbc;
22360 497 100       1592 $do_not_break_apart = 0 if $must_break_open;
22361             }
22362             }
22363 543         1404 return ( $bp_count, $do_not_break_apart );
22364             } ## end sub set_comma_breakpoints
22365              
22366             # These types are excluded at breakpoints to prevent blinking
22367             # Switched from excluded to included as part of fix for b1214
22368             my %is_uncontained_comma_break_included_type;
22369              
22370             BEGIN {
22371              
22372 38     38   665 my @q = qw< k R } ) ] Y Z U w i q Q .
22373             = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
22374 38         26318 @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
22375             } ## end BEGIN
22376              
22377             sub do_uncontained_comma_breaks {
22378              
22379             # Handle commas not in containers...
22380             # This is a catch-all routine for commas that we
22381             # don't know what to do with because the don't fall
22382             # within containers. We will bias the bond strength
22383             # to break at commas which ended lines in the input
22384             # file. This usually works better than just trying
22385             # to put as many items on a line as possible. A
22386             # downside is that if the input file is garbage it
22387             # won't work very well. However, the user can always
22388             # prevent following the old breakpoints with the
22389             # -iob flag.
22390 40     40 0 138 my ( $self, $dd, $rbond_strength_bias ) = @_;
22391              
22392             # Check added for issue c131; an error here would be due to an
22393             # error initializing @comma_index when entering depth $dd.
22394 40         92 if (DEVEL_MODE) {
22395             foreach my $ii ( @{ $comma_index[$dd] } ) {
22396             if ( $ii < 0 || $ii > $max_index_to_go ) {
22397             my $KK = $K_to_go[0];
22398             my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
22399             Fault(<<EOM);
22400             Bad comma index near line $lno: i=$ii must be between 0 and $max_index_to_go
22401             EOM
22402             }
22403             }
22404             }
22405              
22406 40         93 my $bias = -.01;
22407 40         87 my $old_comma_break_count = 0;
22408 40         72 foreach my $ii ( @{ $comma_index[$dd] } ) {
  40         135  
22409              
22410 89 100       260 if ( $old_breakpoint_to_go[$ii] ) {
22411 34         61 $old_comma_break_count++;
22412              
22413             # Store the bias info for use by sub set_bond_strength
22414 34         68 push @{$rbond_strength_bias}, [ $ii, $bias ];
  34         94  
22415              
22416             # reduce bias magnitude to force breaks in order
22417 34         84 $bias *= 0.99;
22418             }
22419             }
22420              
22421             # Also put a break before the first comma if
22422             # (1) there was a break there in the input, and
22423             # (2) there was exactly one old break before the first comma break
22424             # (3) OLD: there are multiple old comma breaks
22425             # (3) NEW: there are one or more old comma breaks (see return example)
22426             # (4) the first comma is at the starting level ...
22427             # ... fixes cases b064 b065 b068 b210 b747
22428             # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
22429             # ... fixes b1220. If ci>0 we are in the middle of a snippet,
22430             # maybe because -boc has been forcing out previous lines.
22431              
22432             # For example, we will follow the user and break after
22433             # 'print' in this snippet:
22434             # print
22435             # "conformability (Not the same dimension)\n",
22436             # "\t", $have, " is ", text_unit($hu), "\n",
22437             # "\t", $want, " is ", text_unit($wu), "\n",
22438             # ;
22439             #
22440             # Another example, just one comma, where we will break after
22441             # the return:
22442             # return
22443             # $x * cos($a) - $y * sin($a),
22444             # $x * sin($a) + $y * cos($a);
22445              
22446             # Breaking a print statement:
22447             # print SAVEOUT
22448             # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
22449             # ( $? & 128 ) ? " -- core dumped" : "", "\n";
22450             #
22451             # But we will not force a break after the opening paren here
22452             # (causes a blinker):
22453             # $heap->{stream}->set_output_filter(
22454             # poe::filter::reference->new('myotherfreezer') ),
22455             # ;
22456             #
22457 40         154 my $i_first_comma = $comma_index[$dd]->[0];
22458 40         103 my $level_comma = $levels_to_go[$i_first_comma];
22459 40         88 my $ci_start = $ci_levels_to_go[0];
22460              
22461             # Here we want to use the value of ci before any -xci adjustment
22462 40 50 66     186 if ( $ci_start && $rOpts_extended_continuation_indentation ) {
22463 0         0 my $K0 = $K_to_go[0];
22464 0 0       0 if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
  0         0  
22465             }
22466 40 100 100     287 if ( !$ci_start
      100        
22467             && $old_breakpoint_to_go[$i_first_comma]
22468             && $level_comma == $levels_to_go[0] )
22469             {
22470 8         20 my $ibreak = -1;
22471 8         17 my $obp_count = 0;
22472 8         27 foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) {
22473 62 100       121 if ( $old_breakpoint_to_go[$ii] ) {
22474 3         7 $obp_count++;
22475 3 50       11 last if ( $obp_count > 1 );
22476 3 50       12 $ibreak = $ii
22477             if ( $levels_to_go[$ii] == $level_comma );
22478             }
22479             }
22480              
22481             # Changed rule from multiple old commas to just one here:
22482 8 50 66     70 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
      66        
22483             {
22484 3         7 my $ibreak_m = $ibreak;
22485 3 50       14 $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
22486 3 50       13 if ( $ibreak_m >= 0 ) {
22487              
22488             # In order to avoid blinkers we have to be fairly
22489             # restrictive:
22490              
22491             # OLD Rules:
22492             # Rule 1: Do not to break before an opening token
22493             # Rule 2: avoid breaking at ternary operators
22494             # (see b931, which is similar to the above print example)
22495             # Rule 3: Do not break at chain operators to fix case b1119
22496             # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
22497              
22498             # NEW Rule, replaced above rules after case b1214:
22499             # only break at one of the included types
22500              
22501             # Be sure to test any changes to these rules against runs
22502             # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
22503             # series.
22504 3         9 my $type_m = $types_to_go[$ibreak_m];
22505              
22506             # Switched from excluded to included for b1214. If necessary
22507             # the token could also be checked if type_m eq 'k'
22508 3 50       12 if ( $is_uncontained_comma_break_included_type{$type_m} ) {
22509              
22510             # Rule added to fix b1449:
22511             # Do not break before a '?' if -nbot is set
22512             # Otherwise, we may alternately arrive here and
22513             # set the break, or not, depending on the input.
22514 3         8 my $no_break;
22515 3         9 my $ibreak_p = $inext_to_go[$ibreak_m];
22516 3 50 33     15 if ( !$rOpts_break_at_old_ternary_breakpoints
22517             && $ibreak_p <= $max_index_to_go )
22518             {
22519 0         0 my $type_p = $types_to_go[$ibreak_p];
22520 0         0 $no_break = $type_p eq '?';
22521             }
22522              
22523 3 50       19 $self->set_forced_breakpoint($ibreak)
22524             if ( !$no_break );
22525             }
22526             }
22527             }
22528             }
22529 40         115 return;
22530             } ## end sub do_uncontained_comma_breaks
22531              
22532             my %is_logical_container;
22533             my %quick_filter;
22534              
22535             BEGIN {
22536 38     38   487 my @q = qw# if elsif unless while and or err not && | || ? : ! #;
22537 38         548 @is_logical_container{@q} = (1) x scalar(@q);
22538              
22539             # This filter will allow most tokens to skip past a section of code
22540 38         473 %quick_filter = %is_assignment;
22541 38         195 @q = qw# => . ; < > ~ #;
22542 38         125 push @q, ',';
22543 38         111 push @q, 'f'; # added for ';' for issue c154
22544 38         83692 @quick_filter{@q} = (1) x scalar(@q);
22545             } ## end BEGIN
22546              
22547             sub set_for_semicolon_breakpoints {
22548 2530     2530 0 4869 my ( $self, $dd ) = @_;
22549              
22550             # Set breakpoints for semicolons in C-style 'for' containers
22551 2530         3814 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
  2530         6191  
22552 9         26 $self->set_forced_breakpoint($_);
22553             }
22554 2530         4684 return;
22555             } ## end sub set_for_semicolon_breakpoints
22556              
22557             sub set_logical_breakpoints {
22558 69     69 0 217 my ( $self, $dd ) = @_;
22559              
22560             # Set breakpoints at logical operators
22561 69 50 100     527 if (
      66        
22562             $item_count_stack[$dd] == 0
22563             && $is_logical_container{ $container_type[$dd] }
22564              
22565             || $has_old_logical_breakpoints[$dd]
22566             )
22567             {
22568              
22569             # Look for breaks in this order:
22570             # 0 1 2 3
22571             # or and || &&
22572 69         225 foreach my $i ( 0 .. 3 ) {
22573 210 100       495 if ( $rand_or_list[$dd][$i] ) {
22574 42         91 foreach ( @{ $rand_or_list[$dd][$i] } ) {
  42         145  
22575 67         219 $self->set_forced_breakpoint($_);
22576             }
22577              
22578             # break at any 'if' and 'unless' too
22579 42         123 foreach ( @{ $rand_or_list[$dd][4] } ) {
  42         181  
22580 5         18 $self->set_forced_breakpoint($_);
22581             }
22582 42         137 $rand_or_list[$dd] = [];
22583 42         96 last;
22584             }
22585             }
22586             }
22587 69         182 return;
22588             } ## end sub set_logical_breakpoints
22589              
22590             sub is_unbreakable_container {
22591              
22592             # never break a container of one of these types
22593             # because bad things can happen (map1.t)
22594 1233     1233 0 2330 my $dd = shift;
22595 1233         7138 return $is_sort_map_grep{ $container_type[$dd] };
22596             } ## end sub is_unbreakable_container
22597              
22598             sub break_lists {
22599              
22600 1738     1738 0 4111 my ( $self, $is_long_line, $rbond_strength_bias ) = @_;
22601              
22602             #--------------------------------------------------------------------
22603             # This routine is called once per batch, if the batch is a list, to
22604             # set line breaks so that hierarchical structure can be displayed and
22605             # so that list items can be vertically aligned. The output of this
22606             # routine is stored in the array @forced_breakpoint_to_go, which is
22607             # used by sub 'break_long_lines' to set final breakpoints. This is
22608             # probably the most complex routine in perltidy, so I have
22609             # broken it into pieces and over-commented it.
22610             #--------------------------------------------------------------------
22611              
22612 1738         3030 $starting_depth = $nesting_depth_to_go[0];
22613              
22614 1738         3480 $block_type = SPACE;
22615 1738         2887 $current_depth = $starting_depth;
22616 1738         2765 $i = -1;
22617 1738         2649 $i_last_colon = -1;
22618 1738         2660 $i_line_end = -1;
22619 1738         2639 $i_line_start = -1;
22620 1738         3148 $last_nonblank_token = ';';
22621 1738         2935 $last_nonblank_type = ';';
22622 1738         2878 $last_nonblank_block_type = SPACE;
22623 1738         2571 $last_old_breakpoint_count = 0;
22624 1738         2882 $minimum_depth = $current_depth + 1; # forces update in check below
22625 1738         2796 $old_breakpoint_count = 0;
22626 1738         2662 $starting_breakpoint_count = $forced_breakpoint_count;
22627 1738         2962 $token = ';';
22628 1738         2922 $type = ';';
22629 1738         2681 $type_sequence = EMPTY_STRING;
22630              
22631 1738         2642 my $total_depth_variation = 0;
22632 1738         2577 my $i_old_assignment_break;
22633 1738         2840 my $depth_last = $starting_depth;
22634 1738         2902 my $comma_follows_last_closing_token;
22635              
22636 1738 50       8173 $self->check_for_new_minimum_depth( $current_depth,
22637             $parent_seqno_to_go[0] )
22638             if ( $current_depth < $minimum_depth );
22639              
22640 1738         2823 my $i_want_previous_break = -1;
22641              
22642 1738         2768 my $saw_good_breakpoint;
22643              
22644             #----------------------------------------
22645             # Main loop over all tokens in this batch
22646             #----------------------------------------
22647 1738         4679 while ( ++$i <= $max_index_to_go ) {
22648 34835 100       61017 if ( $type ne 'b' ) {
22649 22133         29263 $i_last_nonblank_token = $i - 1;
22650 22133         29994 $last_nonblank_type = $type;
22651 22133         29620 $last_nonblank_token = $token;
22652 22133         29250 $last_nonblank_block_type = $block_type;
22653             }
22654 34835         48925 $type = $types_to_go[$i];
22655 34835         46838 $block_type = $block_type_to_go[$i];
22656 34835         47757 $token = $tokens_to_go[$i];
22657 34835         46297 $type_sequence = $type_sequence_to_go[$i];
22658              
22659 34835         45123 my $i_next_nonblank = $inext_to_go[$i];
22660 34835         47574 $next_nonblank_type = $types_to_go[$i_next_nonblank];
22661 34835         47928 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
22662 34835         45974 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
22663              
22664             #-------------------------------------------
22665             # Loop Section A: Look for special breakpoints...
22666             #-------------------------------------------
22667              
22668             # set break if flag was set
22669 34835 100       58929 if ( $i_want_previous_break >= 0 ) {
22670 17         101 $self->set_forced_breakpoint($i_want_previous_break);
22671 17         43 $i_want_previous_break = -1;
22672             }
22673              
22674 34835         44176 $last_old_breakpoint_count = $old_breakpoint_count;
22675              
22676             # Check for a good old breakpoint ..
22677 34835 100       59356 if ( $old_breakpoint_to_go[$i] ) {
22678 2490         8442 ( $i_want_previous_break, $i_old_assignment_break ) =
22679             $self->examine_old_breakpoint( $i_next_nonblank,
22680             $i_want_previous_break, $i_old_assignment_break );
22681             }
22682              
22683 34835 100       66975 next if ( $type eq 'b' );
22684              
22685 22133         34413 $depth = $nesting_depth_to_go[ $i + 1 ];
22686              
22687 22133         31793 $total_depth_variation += abs( $depth - $depth_last );
22688 22133         29664 $depth_last = $depth;
22689              
22690             # safety check - be sure we always break after a comment
22691             # Shouldn't happen .. an error here probably means that the
22692             # nobreak flag did not get turned off correctly during
22693             # formatting.
22694 22133 100       39791 if ( $type eq '#' ) {
22695 134 50       601 if ( $i != $max_index_to_go ) {
22696 0         0 if (DEVEL_MODE) {
22697             Fault(<<EOM);
22698             Non-fatal program bug: backup logic required to break after a comment
22699             EOM
22700             }
22701 0         0 $nobreak_to_go[$i] = 0;
22702 0         0 $self->set_forced_breakpoint($i);
22703             } ## end if ( $i != $max_index_to_go)
22704             } ## end if ( $type eq '#' )
22705              
22706             # Force breakpoints at certain tokens in long lines.
22707             # Note that such breakpoints will be undone later if these tokens
22708             # are fully contained within parens on a line.
22709 22133 100 100     47280 if (
      100        
      66        
      66        
      100        
      66        
      66        
      66        
22710              
22711             # break before a keyword within a line
22712             $type eq 'k'
22713             && $i > 0
22714              
22715             # if one of these keywords:
22716             && $is_if_unless_while_until_for_foreach{$token}
22717              
22718             # but do not break at something like '1 while'
22719             && ( $last_nonblank_type ne 'n' || $i > 2 )
22720              
22721             # and let keywords follow a closing 'do' brace
22722             && ( !$last_nonblank_block_type
22723             || $last_nonblank_block_type ne 'do' )
22724              
22725             && (
22726             $is_long_line
22727              
22728             # or container is broken (by side-comment, etc)
22729             || (
22730             $next_nonblank_token eq '('
22731             && ( !defined( $mate_index_to_go[$i_next_nonblank] )
22732             || $mate_index_to_go[$i_next_nonblank] < $i )
22733             )
22734             )
22735             )
22736             {
22737 8         56 $self->set_forced_breakpoint( $i - 1 );
22738             }
22739              
22740             # remember locations of '||' and '&&' for possible breaks if we
22741             # decide this is a long logical expression.
22742 22133 100       72271 if ( $type eq '||' ) {
    100          
    100          
    100          
    100          
22743 61         224 push @{ $rand_or_list[$depth][2] }, $i;
  61         203  
22744 61 100 100     391 ++$has_old_logical_breakpoints[$depth]
      66        
22745             if ( ( $i == $i_line_start || $i == $i_line_end )
22746             && $rOpts_break_at_old_logical_breakpoints );
22747             }
22748             elsif ( $type eq '&&' ) {
22749 55         140 push @{ $rand_or_list[$depth][3] }, $i;
  55         164  
22750 55 100 100     385 ++$has_old_logical_breakpoints[$depth]
      100        
22751             if ( ( $i == $i_line_start || $i == $i_line_end )
22752             && $rOpts_break_at_old_logical_breakpoints );
22753             }
22754             elsif ( $type eq 'f' ) {
22755 28         49 push @{ $rfor_semicolon_list[$depth] }, $i;
  28         68  
22756             }
22757             elsif ( $type eq 'k' ) {
22758 1370 100 100     7227 if ( $token eq 'and' ) {
    100          
    100          
22759 44         444 push @{ $rand_or_list[$depth][1] }, $i;
  44         134  
22760 44 100 66     277 ++$has_old_logical_breakpoints[$depth]
      66        
22761             if ( ( $i == $i_line_start || $i == $i_line_end )
22762             && $rOpts_break_at_old_logical_breakpoints );
22763             }
22764              
22765             # break immediately at 'or's which are probably not in a logical
22766             # block -- but we will break in logical breaks below so that
22767             # they do not add to the forced_breakpoint_count
22768             elsif ( $token eq 'or' ) {
22769 40         132 push @{ $rand_or_list[$depth][0] }, $i;
  40         172  
22770 40 100 100     331 ++$has_old_logical_breakpoints[$depth]
      66        
22771             if ( ( $i == $i_line_start || $i == $i_line_end )
22772             && $rOpts_break_at_old_logical_breakpoints );
22773 40 100       165 if ( $is_logical_container{ $container_type[$depth] } ) {
22774             }
22775             else {
22776 31 100 100     232 if ($is_long_line) { $self->set_forced_breakpoint($i) }
  16 100 66     60  
22777             elsif ( ( $i == $i_line_start || $i == $i_line_end )
22778             && $rOpts_break_at_old_logical_breakpoints )
22779             {
22780 4         11 $saw_good_breakpoint = 1;
22781             }
22782             }
22783             }
22784             elsif ( $token eq 'if' || $token eq 'unless' ) {
22785 120         249 push @{ $rand_or_list[$depth][4] }, $i;
  120         475  
22786 120 100 66     851 if ( ( $i == $i_line_start || $i == $i_line_end )
      66        
22787             && $rOpts_break_at_old_logical_breakpoints )
22788             {
22789 7         39 $self->set_forced_breakpoint($i);
22790             }
22791             }
22792             }
22793             elsif ( $is_assignment{$type} ) {
22794 505         1505 $i_equals[$depth] = $i;
22795             }
22796              
22797             #-----------------------------------------
22798             # Loop Section B: Handle a sequenced token
22799             #-----------------------------------------
22800 22133 100       37755 if ($type_sequence) {
22801 6131         15232 $self->break_lists_type_sequence;
22802             }
22803              
22804             #------------------------------------------
22805             # Loop Section C: Handle Increasing Depth..
22806             #------------------------------------------
22807              
22808             # hardened against bad input syntax: depth jump must be 1 and type
22809             # must be opening..fixes c102
22810 22133 100 66     72447 if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
    100 66        
22811 3017         7234 $self->break_lists_increasing_depth();
22812             }
22813              
22814             #------------------------------------------
22815             # Loop Section D: Handle Decreasing Depth..
22816             #------------------------------------------
22817              
22818             # hardened against bad input syntax: depth jump must be 1 and type
22819             # must be closing .. fixes c102
22820             elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
22821              
22822 2854         8978 $self->break_lists_decreasing_depth();
22823              
22824 2854   100     9113 $comma_follows_last_closing_token =
22825             $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
22826              
22827             }
22828              
22829             #----------------------------------
22830             # Loop Section E: Handle this token
22831             #----------------------------------
22832              
22833 22133         29914 $current_depth = $depth;
22834              
22835             # most token types can skip the rest of this loop
22836 22133 100       55928 next unless ( $quick_filter{$type} );
22837              
22838             # handle comma-arrow
22839 4949 100 100     21144 if ( $type eq '=>' ) {
    100          
    100          
22840 984 50       2589 next if ( $last_nonblank_type eq '=>' );
22841 984 100       2114 next if $rOpts_break_at_old_comma_breakpoints;
22842             next
22843 978 50 33     2739 if ( $rOpts_comma_arrow_breakpoints == 3
22844             && !defined( $override_cab3[$depth] ) );
22845 978         1575 $want_comma_break[$depth] = 1;
22846 978         1496 $index_before_arrow[$depth] = $i_last_nonblank_token;
22847 978         2016 next;
22848             }
22849              
22850             elsif ( $type eq '.' ) {
22851 116         314 $last_dot_index[$depth] = $i;
22852             }
22853              
22854             # Turn off comma alignment if we are sure that this is not a list
22855             # environment. To be safe, we will do this if we see certain
22856             # non-list tokens, such as ';', '=', and also the environment is
22857             # not a list.
22858             ## $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type}
22859             elsif ( $is_non_list_type{$type}
22860             && !$self->is_in_list_by_i($i) )
22861             {
22862 1443         2810 $dont_align[$depth] = 1;
22863 1443         2413 $want_comma_break[$depth] = 0;
22864 1443         2431 $index_before_arrow[$depth] = -1;
22865              
22866             # no special comma breaks in C-style 'for' terms (c154)
22867 1443 100       3687 if ( $type eq 'f' ) { $last_comma_index[$depth] = undef }
  28         52  
22868             }
22869              
22870             # now just handle any commas
22871 3965 100       9252 next if ( $type ne ',' );
22872 2396         5604 $self->study_comma($comma_follows_last_closing_token);
22873              
22874             } ## end while ( ++$i <= $max_index_to_go)
22875              
22876             #-------------------------------------------
22877             # END of loop over all tokens in this batch
22878             # Now set breaks for any unfinished lists ..
22879             #-------------------------------------------
22880              
22881 1738         5715 foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) {
22882              
22883 2526         4337 $interrupted_list[$dd] = 1;
22884 2526 100       5756 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
22885 2526 100       5631 $self->set_comma_breakpoints( $dd, $rbond_strength_bias )
22886             if ( $item_count_stack[$dd] );
22887 2526 100       5585 $self->set_logical_breakpoints($dd)
22888             if ( $has_old_logical_breakpoints[$dd] );
22889 2526         7484 $self->set_for_semicolon_breakpoints($dd);
22890              
22891             # break open container...
22892 2526         4102 my $i_opening = $opening_structure_index_stack[$dd];
22893 2526 100 66     10571 if ( defined($i_opening) && $i_opening >= 0 ) {
22894 788 50 66     2247 $self->set_forced_breakpoint($i_opening)
      0        
      33        
      66        
22895             unless (
22896             is_unbreakable_container($dd)
22897              
22898             # Avoid a break which would place an isolated ' or "
22899             # on a line
22900             || ( $type eq 'Q'
22901             && $i_opening >= $max_index_to_go - 2
22902             && ( $token eq "'" || $token eq '"' ) )
22903             );
22904             }
22905             } ## end for ( my $dd = $current_depth...)
22906              
22907             #----------------------------------------
22908             # Return the flag '$saw_good_breakpoint'.
22909             #----------------------------------------
22910             # This indicates if the input file had some good breakpoints. This
22911             # flag will be used to force a break in a line shorter than the
22912             # allowed line length.
22913 1738 100 100     8460 if ( $has_old_logical_breakpoints[$current_depth] ) {
    100 100        
      66        
22914 31         82 $saw_good_breakpoint = 1;
22915             }
22916              
22917             # A complex line with one break at an = has a good breakpoint.
22918             # This is not complex ($total_depth_variation=0):
22919             # $res1
22920             # = 10;
22921             #
22922             # This is complex ($total_depth_variation=6):
22923             # $res2 =
22924             # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
22925              
22926             # The check ($i_old_.. < $max_index_to_go) was added to fix b1333
22927             elsif ($i_old_assignment_break
22928             && $total_depth_variation > 4
22929             && $old_breakpoint_count == 1
22930             && $i_old_assignment_break < $max_index_to_go )
22931             {
22932 12         46 $saw_good_breakpoint = 1;
22933             }
22934              
22935 1738         3946 return $saw_good_breakpoint;
22936             } ## end sub break_lists
22937              
22938             sub study_comma {
22939              
22940             # study and store info for a list comma
22941              
22942 2396     2396 0 4682 my ( $self, $comma_follows_last_closing_token ) = @_;
22943              
22944 2396         3854 $last_dot_index[$depth] = undef;
22945 2396         3657 $last_comma_index[$depth] = $i;
22946              
22947             # break here if this comma follows a '=>'
22948             # but not if there is a side comment after the comma
22949 2396 100       5059 if ( $want_comma_break[$depth] ) {
22950              
22951 610 100       2716 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
22952 145 50       531 if ($rOpts_comma_arrow_breakpoints) {
22953 145         324 $want_comma_break[$depth] = 0;
22954 145         420 return;
22955             }
22956             }
22957              
22958 465 50       1947 $self->set_forced_breakpoint($i)
22959             unless ( $next_nonblank_type eq '#' );
22960              
22961             # break before the previous token if it looks safe
22962             # Example of something that we will not try to break before:
22963             # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
22964             # Also we don't want to break at a binary operator (like +):
22965             # $c->createOval(
22966             # $x + $R, $y +
22967             # $R => $x - $R,
22968             # $y - $R, -fill => 'black',
22969             # );
22970 465         1080 my $ibreak = $index_before_arrow[$depth] - 1;
22971 465 100 66     2517 if ( $ibreak > 0
22972             && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
22973             {
22974 460 100       1293 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
  142         258  
22975 460 100       1116 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
  451         710  
22976 460 100       1658 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
22977              
22978             # don't break before a comma, as in the following:
22979             # ( LONGER_THAN,=> 1,
22980             # EIGHTY_CHARACTERS,=> 2,
22981             # CAUSES_FORMATTING,=> 3,
22982             # LIKE_THIS,=> 4,
22983             # );
22984             # This example is for -tso but should be general rule
22985 453 50 33     2036 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
22986             && $tokens_to_go[ $ibreak + 1 ] ne ',' )
22987             {
22988 453         1258 $self->set_forced_breakpoint($ibreak);
22989             }
22990             }
22991             }
22992              
22993 465         984 $want_comma_break[$depth] = 0;
22994 465         866 $index_before_arrow[$depth] = -1;
22995              
22996             # handle list which mixes '=>'s and ','s:
22997             # treat any list items so far as an interrupted list
22998 465         740 $interrupted_list[$depth] = 1;
22999 465         1190 return;
23000             }
23001              
23002             # Break after all commas above starting depth...
23003             # But only if the last closing token was followed by a comma,
23004             # to avoid breaking a list operator (issue c119)
23005 1786 100 100     4293 if ( $depth < $starting_depth
      100        
23006             && $comma_follows_last_closing_token
23007             && !$dont_align[$depth] )
23008             {
23009 8 50       42 $self->set_forced_breakpoint($i)
23010             unless ( $next_nonblank_type eq '#' );
23011 8         19 return;
23012             }
23013              
23014             # add this comma to the list..
23015 1778         2758 my $item_count = $item_count_stack[$depth];
23016 1778 100       3712 if ( $item_count == 0 ) {
23017              
23018             # but do not form a list with no opening structure
23019             # for example:
23020              
23021             # open INFILE_COPY, ">$input_file_copy"
23022             # or die ("very long message");
23023 543 100 100     2161 if ( ( $opening_structure_index_stack[$depth] < 0 )
23024             && $self->is_in_block_by_i($i) )
23025             {
23026 29         84 $dont_align[$depth] = 1;
23027             }
23028             }
23029              
23030 1778         3466 $comma_index[$depth][$item_count] = $i;
23031 1778         2679 ++$item_count_stack[$depth];
23032 1778 100       5683 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
23033 411         682 $identifier_count_stack[$depth]++;
23034             }
23035 1778         3886 return;
23036             } ## end sub study_comma
23037              
23038             my %poor_types;
23039             my %poor_keywords;
23040             my %poor_next_types;
23041             my %poor_next_keywords;
23042              
23043             BEGIN {
23044              
23045             # Setup filters for detecting very poor breaks to ignore.
23046             # b1097: old breaks after type 'L' and before 'R' are poor
23047             # b1450: old breaks at 'eq' and related operators are poor
23048 38     38   324 my @q = qw(== <= >= !=);
23049              
23050 38         284 @{poor_types}{@q} = (1) x scalar(@q);
23051 38         213 @{poor_next_types}{@q} = (1) x scalar(@q);
23052 38         182 $poor_types{'L'} = 1;
23053 38         151 $poor_next_types{'R'} = 1;
23054              
23055 38         241 @q = qw(eq ne le ge lt gt);
23056 38         390 @{poor_keywords}{@q} = (1) x scalar(@q);
23057 38         97668 @{poor_next_keywords}{@q} = (1) x scalar(@q);
23058             } ## end BEGIN
23059              
23060             sub examine_old_breakpoint {
23061              
23062 2490     2490 0 5643 my ( $self, $i_next_nonblank, $i_want_previous_break,
23063             $i_old_assignment_break )
23064             = @_;
23065              
23066             # Look at an old breakpoint and set/update certain flags:
23067              
23068             # Given indexes of three tokens in this batch:
23069             # $i_next_nonblank - index of the next nonblank token
23070             # $i_want_previous_break - we want a break before this index
23071             # $i_old_assignment_break - the index of an '=' or equivalent
23072             # Update:
23073             # $old_breakpoint_count - a counter to increment unless poor break
23074             # Update and return:
23075             # $i_want_previous_break
23076             # $i_old_assignment_break
23077              
23078             #-----------------------
23079             # Filter out poor breaks
23080             #-----------------------
23081             # Just return if this is a poor break and pretend it does not exist.
23082             # Otherwise, poor breaks made under stress can cause instability.
23083 2490         3790 my $poor_break;
23084 2490 100 33     5149 if ( $type eq 'k' ) { $poor_break ||= $poor_keywords{$token} }
  28         225  
23085 2462   66     8608 else { $poor_break ||= $poor_types{$type} }
23086              
23087 2490 100       5275 if ( $next_nonblank_type eq 'k' ) {
23088 150   33     699 $poor_break ||= $poor_next_keywords{$next_nonblank_token};
23089             }
23090 2340   66     7321 else { $poor_break ||= $poor_next_types{$next_nonblank_type} }
23091              
23092             # Also ignore any high stress level breaks; fixes b1395
23093 2490   100     10253 $poor_break ||= $levels_to_go[$i] >= $high_stress_level;
23094 2490 100       5201 if ($poor_break) { goto RETURN }
  6         37  
23095              
23096             #--------------------------------------------
23097             # Not a poor break, so continue to examine it
23098             #--------------------------------------------
23099 2484         3680 $old_breakpoint_count++;
23100 2484         3691 $i_line_end = $i;
23101 2484         3596 $i_line_start = $i_next_nonblank;
23102              
23103             #---------------------------------------
23104             # Do we want to break before this token?
23105             #---------------------------------------
23106              
23107             # Break before certain keywords if user broke there and
23108             # this is a 'safe' break point. The idea is to retain
23109             # any preferred breaks for sequential list operations,
23110             # like a schwartzian transform.
23111 2484 100       5349 if ($rOpts_break_at_old_keyword_breakpoints) {
23112 2482 50 100     5993 if (
      66        
      66        
23113             $next_nonblank_type eq 'k'
23114             && $is_keyword_returning_list{$next_nonblank_token}
23115             && ( $type =~ /^[=\)\]\}Riw]$/
23116             || $type eq 'k' && $is_keyword_returning_list{$token} )
23117             )
23118             {
23119              
23120             # we actually have to set this break next time through
23121             # the loop because if we are at a closing token (such
23122             # as '}') which forms a one-line block, this break might
23123             # get undone.
23124              
23125             # But do not do this at an '=' if:
23126             # - the user wants breaks before an equals (b434 b903)
23127             # - or -naws is set (can be unstable, see b1354)
23128             my $skip = $type eq '='
23129 12   66     77 && ( $want_break_before{$type}
23130             || !$rOpts_add_whitespace );
23131              
23132 12 50       44 $i_want_previous_break = $i
23133             unless ($skip);
23134              
23135             }
23136             }
23137              
23138             # Break before attributes if user broke there
23139 2484 100       5160 if ($rOpts_break_at_old_attribute_breakpoints) {
23140 2480 100       5200 if ( $next_nonblank_type eq 'A' ) {
23141 5         10 $i_want_previous_break = $i;
23142             }
23143             }
23144              
23145             #---------------------------------
23146             # Is this an old assignment break?
23147             #---------------------------------
23148 2484 100       6997 if ( $is_assignment{$type} ) {
    50          
23149 73         236 $i_old_assignment_break = $i;
23150             }
23151             elsif ( $is_assignment{$next_nonblank_type} ) {
23152 0         0 $i_old_assignment_break = $i_next_nonblank;
23153             }
23154              
23155             RETURN:
23156 2490         5808 return ( $i_want_previous_break, $i_old_assignment_break );
23157             } ## end sub examine_old_breakpoint
23158              
23159             sub break_lists_type_sequence {
23160              
23161 6131     6131 0 11291 my ($self) = @_;
23162              
23163             # We have encountered a sequenced token while setting list breakpoints
23164              
23165             # if closing type, one of } ) ] :
23166 6131 100       12972 if ( $is_closing_sequence_token{$token} ) {
23167              
23168 2984 100       7629 if ( $type eq ':' ) {
23169 130         378 $i_last_colon = $i;
23170              
23171             # retain break at a ':' line break
23172 130 100 100     1243 if ( ( $i == $i_line_start || $i == $i_line_end )
      100        
      66        
23173             && $rOpts_break_at_old_ternary_breakpoints
23174             && $levels_to_go[$i] < $high_stress_level )
23175             {
23176              
23177 73         1419 $self->set_forced_breakpoint($i);
23178              
23179             # Break at a previous '=', but only if it is before
23180             # the mating '?'. Mate_index test fixes b1287.
23181 73         195 my $ieq = $i_equals[$depth];
23182 73         180 my $mix = $mate_index_to_go[$i];
23183 73 100       247 if ( !defined($mix) ) { $mix = -1 }
  6         13  
23184 73 100 66     318 if ( $ieq > 0 && $ieq < $mix ) {
23185 17         90 $self->set_forced_breakpoint( $i_equals[$depth] );
23186 17         97 $i_equals[$depth] = -1;
23187             }
23188             }
23189             }
23190              
23191             # handle any postponed closing breakpoints
23192 2984 100       7260 if ( has_postponed_breakpoint($type_sequence) ) {
23193 728 100       2298 my $inc = ( $type eq ':' ) ? 0 : 1;
23194 728 100       1898 if ( $i >= $inc ) {
23195 266         1001 $self->set_forced_breakpoint( $i - $inc );
23196             }
23197             }
23198             }
23199              
23200             # must be opening token, one of { ( [ ?
23201             else {
23202              
23203             # set breaks at ?/: if they will get separated (and are
23204             # not a ?/: chain), or if the '?' is at the end of the
23205             # line
23206 3147 100       6682 if ( $token eq '?' ) {
23207 130         477 my $i_colon = $mate_index_to_go[$i];
23208 130 50 66     1016 if (
      66        
23209             !defined($i_colon) # the ':' is not in this batch
23210             || $i == 0 # this '?' is the first token of the line
23211             || $i == $max_index_to_go # or this '?' is the last token
23212             )
23213             {
23214              
23215             # don't break if # this has a side comment, and
23216             # don't break at a '?' if preceded by ':' on
23217             # this line of previous ?/: pair on this line.
23218             # This is an attempt to preserve a chain of ?/:
23219             # expressions (elsif2.t).
23220 12 100 66     105 if (
      100        
23221             (
23222             $i_last_colon < 0
23223             || $parent_seqno_to_go[$i_last_colon] !=
23224             $parent_seqno_to_go[$i]
23225             )
23226             && $tokens_to_go[$max_index_to_go] ne '#'
23227             )
23228             {
23229 8         35 $self->set_forced_breakpoint($i);
23230             }
23231 12         63 $self->set_closing_breakpoint($i);
23232             }
23233             }
23234              
23235             # must be one of { ( [
23236             else {
23237              
23238             # do requested -lp breaks at the OPENING token for BROKEN
23239             # blocks. NOTE: this can be done for both -lp and -xlp,
23240             # but only -xlp can really take advantage of this. So this
23241             # is currently restricted to -xlp to avoid excess changes to
23242             # existing -lp formatting.
23243 3017 100 100     7598 if ( $rOpts_extended_line_up_parentheses
23244             && !defined( $mate_index_to_go[$i] ) )
23245             {
23246             my $lp_object =
23247 26         55 $self->[_rlp_object_by_seqno_]->{$type_sequence};
23248 26 100       55 if ($lp_object) {
23249 13         54 my $K_begin_line = $lp_object->get_K_begin_line();
23250 13         26 my $i_begin_line = $K_begin_line - $K_to_go[0];
23251 13         44 $self->set_forced_lp_break( $i_begin_line, $i );
23252             }
23253             }
23254             }
23255             }
23256 6131         10013 return;
23257             } ## end sub break_lists_type_sequence
23258              
23259             sub break_lists_increasing_depth {
23260              
23261 3017     3017 0 5830 my ($self) = @_;
23262              
23263             #--------------------------------------------
23264             # prepare for a new list when depth increases
23265             # token $i is a '(','{', or '['
23266             #--------------------------------------------
23267              
23268             #----------------------------------------------------------
23269             # BEGIN initialize depth arrays
23270             # ... use the same order as sub check_for_new_minimum_depth
23271             #----------------------------------------------------------
23272 3017         6194 $type_sequence_stack[$depth] = $type_sequence;
23273              
23274 3017         4838 $override_cab3[$depth] = undef;
23275 3017 50 33     7532 if ( $rOpts_comma_arrow_breakpoints == 3 && $type_sequence ) {
23276             $override_cab3[$depth] =
23277 0         0 $self->[_roverride_cab3_]->{$type_sequence};
23278             }
23279              
23280 3017         5022 $breakpoint_stack[$depth] = $forced_breakpoint_count;
23281             $container_type[$depth] =
23282              
23283             # k => && || ? : .
23284 3017 100       8037 $is_container_label_type{$last_nonblank_type}
23285             ? $last_nonblank_token
23286             : EMPTY_STRING;
23287 3017         4924 $identifier_count_stack[$depth] = 0;
23288 3017         4771 $index_before_arrow[$depth] = -1;
23289 3017         4537 $interrupted_list[$depth] = 0;
23290 3017         4512 $item_count_stack[$depth] = 0;
23291 3017         5191 $last_nonblank_type[$depth] = $last_nonblank_type;
23292 3017         4593 $opening_structure_index_stack[$depth] = $i;
23293              
23294 3017         5007 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
23295 3017         4985 $comma_index[$depth] = undef;
23296 3017         4400 $last_comma_index[$depth] = undef;
23297 3017         4455 $last_dot_index[$depth] = undef;
23298 3017         4473 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
23299 3017         4486 $has_old_logical_breakpoints[$depth] = 0;
23300 3017         6308 $rand_or_list[$depth] = [];
23301 3017         5752 $rfor_semicolon_list[$depth] = [];
23302 3017         4835 $i_equals[$depth] = -1;
23303              
23304             # if line ends here then signal closing token to break
23305 3017 100 100     10691 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) {
23306 609         2164 $self->set_closing_breakpoint($i);
23307             }
23308              
23309             # Not all lists of values should be vertically aligned..
23310 3017   66     11005 $dont_align[$depth] =
23311              
23312             # code BLOCKS are handled at a higher level
23313             ##( $block_type ne EMPTY_STRING )
23314             $block_type
23315              
23316             # certain paren lists
23317             || ( $type eq '(' ) && (
23318              
23319             # it does not usually look good to align a list of
23320             # identifiers in a parameter list, as in:
23321             # my($var1, $var2, ...)
23322             # (This test should probably be refined, for now I'm just
23323             # testing for any keyword)
23324             ( $last_nonblank_type eq 'k' )
23325              
23326             # a trailing '(' usually indicates a non-list
23327             || ( $next_nonblank_type eq '(' )
23328             );
23329 3017         5013 $has_broken_sublist[$depth] = 0;
23330 3017         4866 $want_comma_break[$depth] = 0;
23331              
23332             #----------------------------
23333             # END initialize depth arrays
23334             #----------------------------
23335              
23336             # patch to outdent opening brace of long if/for/..
23337             # statements (like this one). See similar coding in
23338             # set_continuation breaks. We have also catch it here for
23339             # short line fragments which otherwise will not go through
23340             # break_long_lines.
23341 3017 50 100     9718 if (
      100        
      66        
      66        
      33        
23342             $block_type
23343              
23344             # if we have the ')' but not its '(' in this batch..
23345             && ( $last_nonblank_token eq ')' )
23346             && !defined( $mate_index_to_go[$i_last_nonblank_token] )
23347              
23348             # and user wants brace to left
23349             && !$rOpts_opening_brace_always_on_right
23350              
23351             && ( $type eq '{' ) # should be true
23352             && ( $token eq '{' ) # should be true
23353             )
23354             {
23355 4         17 $self->set_forced_breakpoint( $i - 1 );
23356             }
23357              
23358 3017         5068 return;
23359             } ## end sub break_lists_increasing_depth
23360              
23361             sub break_lists_decreasing_depth {
23362              
23363 2854     2854 0 5892 my ( $self, $rbond_strength_bias ) = @_;
23364              
23365             # We have arrived at a closing container token in sub break_lists:
23366             # the token at index $i is one of these: ')','}', ']'
23367             # A number of important breakpoints for this container can now be set
23368             # based on the information that we have collected. This includes:
23369             # - breaks at commas to format tables
23370             # - breaks at certain logical operators and other good breakpoints
23371             # - breaks at opening and closing containers if needed by selected
23372             # formatting styles
23373             # These breaks are made by calling sub 'set_forced_breakpoint'
23374              
23375 2854 100       7683 $self->check_for_new_minimum_depth( $depth, $parent_seqno_to_go[$i] )
23376             if ( $depth < $minimum_depth );
23377              
23378             # force all outer logical containers to break after we see on
23379             # old breakpoint
23380 2854   100     12140 $has_old_logical_breakpoints[$depth] ||=
23381             $has_old_logical_breakpoints[$current_depth];
23382              
23383             # Patch to break between ') {' if the paren list is broken.
23384             # There is similar logic in break_long_lines for
23385             # non-broken lists.
23386 2854 50 100     10492 if ( $token eq ')'
      100        
      66        
      66        
23387             && $next_nonblank_block_type
23388             && $interrupted_list[$current_depth]
23389             && $next_nonblank_type eq '{'
23390             && !$rOpts_opening_brace_always_on_right )
23391             {
23392 4         19 $self->set_forced_breakpoint($i);
23393             }
23394              
23395             #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
23396              
23397             #-----------------------------------------------------------------
23398             # Set breaks at commas to display a table of values if appropriate
23399             #-----------------------------------------------------------------
23400 2854         5588 my ( $bp_count, $do_not_break_apart ) = ( 0, 0 );
23401 2854 100       7375 ( $bp_count, $do_not_break_apart ) =
23402             $self->set_comma_breakpoints( $current_depth, $rbond_strength_bias )
23403             if ( $item_count_stack[$current_depth] );
23404              
23405             #-----------------------------------------------------------
23406             # Now set flags needed to decide if we should break open the
23407             # container ... This is a long rambling section which has
23408             # grown over time to handle all situations.
23409             #-----------------------------------------------------------
23410 2854         4679 my $i_opening = $opening_structure_index_stack[$current_depth];
23411 2854         5376 my $saw_opening_structure = ( $i_opening >= 0 );
23412 2854         4286 my $lp_object;
23413 2854 100 100     7679 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
23414             $lp_object = $self->[_rlp_object_by_seqno_]
23415 279         804 ->{ $type_sequence_to_go[$i_opening] };
23416             }
23417              
23418             # this term is long if we had to break at interior commas..
23419 2854         4814 my $is_long_term = $bp_count > 0;
23420              
23421             # If this is a short container with one or more comma arrows,
23422             # then we will mark it as a long term to open it if requested.
23423             # $rOpts_comma_arrow_breakpoints =
23424             # 0 - open only if comma precedes closing brace
23425             # 1 - stable: except for one line blocks
23426             # 2 - try to form 1 line blocks
23427             # 3 - ignore =>
23428             # 4 - always open up if vt=0
23429             # 5 - stable: even for one line blocks if vt=0
23430              
23431 2854         4496 my $cab_flag = $rOpts_comma_arrow_breakpoints;
23432              
23433             # replace -cab=3 if overriden
23434 2854 50 33     7076 if ( $cab_flag == 3 && $type_sequence ) {
23435 0         0 my $test_cab = $self->[_roverride_cab3_]->{$type_sequence};
23436 0 0       0 if ( defined($test_cab) ) { $cab_flag = $test_cab }
  0         0  
23437             }
23438              
23439             # PATCH: Modify the -cab flag if we are not processing a list:
23440             # We only want the -cab flag to apply to list containers, so
23441             # for non-lists we use the default and stable -cab=5 value.
23442             # Fixes case b939a.
23443 2854 100 66     11924 if ( $type_sequence && !$self->[_ris_list_by_seqno_]->{$type_sequence} )
23444             {
23445 1924         3377 $cab_flag = 5;
23446             }
23447              
23448             # Ignore old breakpoints when under stress.
23449             # Fixes b1203 b1204 as well as b1197-b1200.
23450             # But not if -lp: fixes b1264, b1265. NOTE: rechecked with
23451             # b1264 to see if this check is still required at all, and
23452             # these still require a check, but at higher level beta+3
23453             # instead of beta: b1193 b780
23454 2854 100 100     13383 if ( $saw_opening_structure
      100        
23455             && !$lp_object
23456             && $levels_to_go[$i_opening] >= $high_stress_level )
23457             {
23458 29         43 $cab_flag = 2;
23459              
23460             # Do not break hash braces under stress (fixes b1238)
23461 29   100     124 $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
23462              
23463             # This option fixes b1235, b1237, b1240 with old and new
23464             # -lp, but formatting is nicer with next option.
23465             ## $is_long_term ||=
23466             ## $levels_to_go[$i_opening] > $stress_level_beta + 1;
23467              
23468             # This option fixes b1240 but not b1235, b1237 with new -lp,
23469             # but this gives better formatting than the previous option.
23470             # TODO: see if stress_level_alpha should also be considered
23471 29   100     100 $do_not_break_apart ||=
23472             $levels_to_go[$i_opening] > $stress_level_beta;
23473             }
23474              
23475 2854 100 100     19163 if ( !$is_long_term
      66        
      100        
      100        
23476             && $saw_opening_structure
23477             && $is_opening_token{ $tokens_to_go[$i_opening] }
23478             && $index_before_arrow[ $depth + 1 ] > 0
23479             && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } )
23480             {
23481 430   66     3321 $is_long_term =
23482             $cab_flag == 4
23483             || $cab_flag == 0 && $last_nonblank_token eq ','
23484             || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
23485             }
23486              
23487             # mark term as long if the length between opening and closing
23488             # parens exceeds allowed line length
23489 2854 100 100     9683 if ( !$is_long_term && $saw_opening_structure ) {
23490              
23491 1949         5333 my $i_opening_minus = $self->find_token_starting_list($i_opening);
23492              
23493 1949         4980 my $excess = $self->excess_line_length( $i_opening_minus, $i );
23494              
23495             # Use standard spaces for indentation of lists in -lp mode
23496             # if it gives a longer line length. This helps to avoid an
23497             # instability due to forming and breaking one-line blocks.
23498             # This fixes case b1314.
23499 1949         3899 my $indentation = $leading_spaces_to_go[$i_opening_minus];
23500 1949 100 100     4909 if ( ref($indentation)
23501             && $self->[_ris_broken_container_]->{$type_sequence} )
23502             {
23503 25         58 my $lp_spaces = $indentation->get_spaces();
23504 25         54 my $std_spaces = $indentation->get_standard_spaces();
23505 25         37 my $diff = $std_spaces - $lp_spaces;
23506 25 50       67 if ( $diff > 0 ) { $excess += $diff }
  0         0  
23507             }
23508              
23509 1949         3216 my $tol = $length_tol;
23510              
23511             # boost tol for an -lp container
23512 1949 50 100     4871 if (
      33        
      66        
23513             $lp_tol_boost
23514             && $lp_object
23515             && ( $rOpts_extended_continuation_indentation
23516             || !$self->[_ris_list_by_seqno_]->{$type_sequence} )
23517             )
23518             {
23519 25         42 $tol += $lp_tol_boost;
23520             }
23521              
23522             # Patch to avoid blinking with -bbxi=2 and -cab=2
23523             # in which variations in -ci cause unstable formatting
23524             # in edge cases. We just always add one ci level so that
23525             # the formatting is independent of the -BBX results.
23526             # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
23527             # b1161 b1166 b1167 b1168
23528 1949 50 66     5868 if ( !$ci_levels_to_go[$i_opening]
23529             && $self->[_rbreak_before_container_by_seqno_]->{$type_sequence}
23530             )
23531             {
23532 0         0 $tol += $rOpts_continuation_indentation;
23533             }
23534              
23535 1949         4200 $is_long_term = $excess + $tol > 0;
23536              
23537             }
23538              
23539             # We've set breaks after all comma-arrows. Now we have to
23540             # undo them if this can be a one-line block
23541             # (the only breakpoints set will be due to comma-arrows)
23542              
23543 2854 100 33     22316 if (
      66        
      66        
      100        
      100        
      100        
23544              
23545             # user doesn't require breaking after all comma-arrows
23546             ( $cab_flag != 0 ) && ( $cab_flag != 4 )
23547              
23548             # and if the opening structure is in this batch
23549             && $saw_opening_structure
23550              
23551             # and either on the same old line
23552             && (
23553             $old_breakpoint_count_stack[$current_depth] ==
23554             $last_old_breakpoint_count
23555              
23556             # or user wants to form long blocks with arrows
23557             || $cab_flag == 2
23558             )
23559              
23560             # and we made breakpoints between the opening and closing
23561             && ( $breakpoint_undo_stack[$current_depth] <
23562             $forced_breakpoint_undo_count )
23563              
23564             # and this block is short enough to fit on one line
23565             # Note: use < because need 1 more space for possible comma
23566             && !$is_long_term
23567              
23568             )
23569             {
23570 96         382 $self->undo_forced_breakpoint_stack(
23571             $breakpoint_undo_stack[$current_depth] );
23572             }
23573              
23574             # now see if we have any comma breakpoints left
23575 2854         5498 my $has_comma_breakpoints =
23576             ( $breakpoint_stack[$current_depth] != $forced_breakpoint_count );
23577              
23578             # update broken-sublist flag of the outer container
23579 2854   100     13471 $has_broken_sublist[$depth] =
23580             $has_broken_sublist[$depth]
23581             || $has_broken_sublist[$current_depth]
23582             || $is_long_term
23583             || $has_comma_breakpoints;
23584              
23585             # Having come to the closing ')', '}', or ']', now we have to decide
23586             # if we should 'open up' the structure by placing breaks at the
23587             # opening and closing containers. This is a tricky decision. Here
23588             # are some of the basic considerations:
23589             #
23590             # -If this is a BLOCK container, then any breakpoints will have
23591             # already been set (and according to user preferences), so we need do
23592             # nothing here.
23593             #
23594             # -If we have a comma-separated list for which we can align the list
23595             # items, then we need to do so because otherwise the vertical aligner
23596             # cannot currently do the alignment.
23597             #
23598             # -If this container does itself contain a container which has been
23599             # broken open, then it should be broken open to properly show the
23600             # structure.
23601             #
23602             # -If there is nothing to align, and no other reason to break apart,
23603             # then do not do it.
23604             #
23605             # We will not break open the parens of a long but 'simple' logical
23606             # expression. For example:
23607             #
23608             # This is an example of a simple logical expression and its formatting:
23609             #
23610             # if ( $bigwasteofspace1 && $bigwasteofspace2
23611             # || $bigwasteofspace3 && $bigwasteofspace4 )
23612             #
23613             # Most people would prefer this than the 'spacey' version:
23614             #
23615             # if (
23616             # $bigwasteofspace1 && $bigwasteofspace2
23617             # || $bigwasteofspace3 && $bigwasteofspace4
23618             # )
23619             #
23620             # To illustrate the rules for breaking logical expressions, consider:
23621             #
23622             # FULLY DENSE:
23623             # if ( $opt_excl
23624             # and ( exists $ids_excl_uc{$id_uc}
23625             # or grep $id_uc =~ /$_/, @ids_excl_uc ))
23626             #
23627             # This is on the verge of being difficult to read. The current
23628             # default is to open it up like this:
23629             #
23630             # DEFAULT:
23631             # if (
23632             # $opt_excl
23633             # and ( exists $ids_excl_uc{$id_uc}
23634             # or grep $id_uc =~ /$_/, @ids_excl_uc )
23635             # )
23636             #
23637             # This is a compromise which tries to avoid being too dense and to
23638             # spacey. A more spaced version would be:
23639             #
23640             # SPACEY:
23641             # if (
23642             # $opt_excl
23643             # and (
23644             # exists $ids_excl_uc{$id_uc}
23645             # or grep $id_uc =~ /$_/, @ids_excl_uc
23646             # )
23647             # )
23648             #
23649             # Some people might prefer the spacey version -- an option could be
23650             # added. The innermost expression contains a long block '( exists
23651             # $ids_... ')'.
23652             #
23653             # Here is how the logic goes: We will force a break at the 'or' that
23654             # the innermost expression contains, but we will not break apart its
23655             # opening and closing containers because (1) it contains no
23656             # multi-line sub-containers itself, and (2) there is no alignment to
23657             # be gained by breaking it open like this
23658             #
23659             # and (
23660             # exists $ids_excl_uc{$id_uc}
23661             # or grep $id_uc =~ /$_/, @ids_excl_uc
23662             # )
23663             #
23664             # (although this looks perfectly ok and might be good for long
23665             # expressions). The outer 'if' container, though, contains a broken
23666             # sub-container, so it will be broken open to avoid too much density.
23667             # Also, since it contains no 'or's, there will be a forced break at
23668             # its 'and'.
23669              
23670             # Handle the experimental flag --break-open-compact-parens
23671             # NOTE: This flag is not currently used and may eventually be removed.
23672             # If this flag is set, we will implement it by
23673             # pretending we did not see the opening structure, since in that case
23674             # parens always get opened up.
23675 2854 50 66     9004 if ( $saw_opening_structure
23676             && $rOpts_break_open_compact_parens )
23677             {
23678              
23679             # This parameter is a one-character flag, as follows:
23680             # '0' matches no parens -> break open NOT OK
23681             # '1' matches all parens -> break open OK
23682             # Other values are same as used by the weld-exclusion-list
23683 0         0 my $flag = $rOpts_break_open_compact_parens;
23684 0 0 0     0 if ( $flag eq '*'
23685             || $flag eq '1' )
23686             {
23687 0         0 $saw_opening_structure = 0;
23688             }
23689             else {
23690              
23691             # NOTE: $seqno will be equal to closure var $type_sequence here
23692 0         0 my $seqno = $type_sequence_to_go[$i_opening];
23693 0         0 $saw_opening_structure =
23694             !$self->match_paren_control_flag( $seqno, $flag );
23695             }
23696             }
23697              
23698             # Set some more flags telling something about this container..
23699 2854         4218 my $is_simple_logical_expression;
23700 2854 100 100     15455 if ( $item_count_stack[$current_depth] == 0
      100        
      100        
23701             && $saw_opening_structure
23702             && $tokens_to_go[$i_opening] eq '('
23703             && $is_logical_container{ $container_type[$current_depth] } )
23704             {
23705              
23706             # This seems to be a simple logical expression with
23707             # no existing breakpoints. Set a flag to prevent
23708             # opening it up.
23709 204 100       702 if ( !$has_comma_breakpoints ) {
23710 191         444 $is_simple_logical_expression = 1;
23711             }
23712              
23713             #---------------------------------------------------
23714             # This seems to be a simple logical expression with
23715             # breakpoints (broken sublists, for example). Break
23716             # at all 'or's and '||'s.
23717             #---------------------------------------------------
23718             else {
23719 13         102 $self->set_logical_breakpoints($current_depth);
23720             }
23721             }
23722              
23723             # break long terms at any C-style for semicolons (c154)
23724 2854 100 100     7166 if ( $is_long_term
23725 550         2050 && @{ $rfor_semicolon_list[$current_depth] } )
23726             {
23727 4         17 $self->set_for_semicolon_breakpoints($current_depth);
23728              
23729             # and open up a long 'for' or 'foreach' container to allow
23730             # leading term alignment unless -lp is used.
23731 4 100       15 $has_comma_breakpoints = 1 unless ($lp_object);
23732             }
23733              
23734             #----------------------------------------------------------------
23735             # FINALLY: Break open container according to the flags which have
23736             # been set.
23737             #----------------------------------------------------------------
23738 2854 100 100     22954 if (
    100 100        
    100 100        
      66        
23739              
23740             # breaks for code BLOCKS are handled at a higher level
23741             !$block_type
23742              
23743             # we do not need to break at the top level of an 'if'
23744             # type expression
23745             && !$is_simple_logical_expression
23746              
23747             ## modification to keep ': (' containers vertically tight;
23748             ## but probably better to let user set -vt=1 to avoid
23749             ## inconsistency with other paren types
23750             ## && ($container_type[$current_depth] ne ':')
23751              
23752             # otherwise, we require one of these reasons for breaking:
23753             && (
23754              
23755             # - this term has forced line breaks
23756             $has_comma_breakpoints
23757              
23758             # - the opening container is separated from this batch
23759             # for some reason (comment, blank line, code block)
23760             # - this is a non-paren container spanning multiple lines
23761             || !$saw_opening_structure
23762              
23763             # - this is a long block contained in another breakable
23764             # container
23765             || $is_long_term && !$self->is_in_block_by_i($i_opening)
23766             )
23767             )
23768             {
23769              
23770             # do special -lp breaks at the CLOSING token for INTACT
23771             # blocks (because we might not do them if the block does
23772             # not break open)
23773 681 100       1884 if ($lp_object) {
23774 96         352 my $K_begin_line = $lp_object->get_K_begin_line();
23775 96         214 my $i_begin_line = $K_begin_line - $K_to_go[0];
23776 96         310 $self->set_forced_lp_break( $i_begin_line, $i_opening );
23777             }
23778              
23779             # break after opening structure.
23780             # note: break before closing structure will be automatic
23781 681 50       1952 if ( $minimum_depth <= $current_depth ) {
23782              
23783 681 100       1907 if ( $i_opening >= 0 ) {
23784 485 50 66     2056 if ( !$do_not_break_apart
23785             && !is_unbreakable_container($current_depth) )
23786             {
23787 445         1506 $self->set_forced_breakpoint($i_opening);
23788              
23789             # Do not let brace types L/R use vertical tightness
23790             # flags to recombine if we have to break on length
23791             # because instability is possible if both vt and vtc
23792             # flags are set ... see issue b1444.
23793 445 0 100     2544 if ( $is_long_term
      66        
      33        
23794             && $types_to_go[$i_opening] eq 'L'
23795             && $opening_vertical_tightness{'{'}
23796             && $closing_vertical_tightness{'}'} )
23797             {
23798 0         0 my $seqno = $type_sequence_to_go[$i_opening];
23799 0 0       0 if ($seqno) {
23800 0         0 $self->[_rbreak_container_]->{$seqno} = 1;
23801             }
23802             }
23803             }
23804             }
23805              
23806             # break at ',' of lower depth level before opening token
23807 681 100       1938 if ( $last_comma_index[$depth] ) {
23808 107         263 $self->set_forced_breakpoint( $last_comma_index[$depth] );
23809             }
23810              
23811             # break at '.' of lower depth level before opening token
23812 681 100       1771 if ( $last_dot_index[$depth] ) {
23813 5         22 $self->set_forced_breakpoint( $last_dot_index[$depth] );
23814             }
23815              
23816             # break before opening structure if preceded by another
23817             # closing structure and a comma. This is normally
23818             # done by the previous closing brace, but not
23819             # if it was a one-line block.
23820 681 100       1780 if ( $i_opening > 2 ) {
23821 427 100       1377 my $i_prev =
23822             ( $types_to_go[ $i_opening - 1 ] eq 'b' )
23823             ? $i_opening - 2
23824             : $i_opening - 1;
23825              
23826 427         853 my $type_prev = $types_to_go[$i_prev];
23827 427         770 my $token_prev = $tokens_to_go[$i_prev];
23828 427 100 66     4077 if (
    100 100        
      66        
23829             $type_prev eq ','
23830             && ( $types_to_go[ $i_prev - 1 ] eq ')'
23831             || $types_to_go[ $i_prev - 1 ] eq '}' )
23832             )
23833             {
23834 11         32 $self->set_forced_breakpoint($i_prev);
23835             }
23836              
23837             # also break before something like ':(' or '?('
23838             # if appropriate.
23839             elsif ($type_prev =~ /^([k\:\?]|&&|\|\|)$/
23840             && $want_break_before{$token_prev} )
23841             {
23842 6         28 $self->set_forced_breakpoint($i_prev);
23843             }
23844             }
23845             }
23846              
23847             # break after comma following closing structure
23848 681 100       2282 if ( $types_to_go[ $i + 1 ] eq ',' ) {
23849 79         235 $self->set_forced_breakpoint( $i + 1 );
23850             }
23851              
23852             # break before an '=' following closing structure
23853 681 50 33     2391 if (
23854             $is_assignment{$next_nonblank_type}
23855             && ( $breakpoint_stack[$current_depth] !=
23856             $forced_breakpoint_count )
23857             )
23858             {
23859 0         0 $self->set_forced_breakpoint($i);
23860             }
23861              
23862             # break at any comma before the opening structure Added
23863             # for -lp, but seems to be good in general. It isn't
23864             # obvious how far back to look; the '5' below seems to
23865             # work well and will catch the comma in something like
23866             # push @list, myfunc( $param, $param, ..
23867              
23868 681         1244 my $icomma = $last_comma_index[$depth];
23869 681 100 100     3274 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
23870 25 50       74 unless ( $forced_breakpoint_to_go[$icomma] ) {
23871 0         0 $self->set_forced_breakpoint($icomma);
23872             }
23873             }
23874             }
23875              
23876             #-----------------------------------------------------------
23877             # Break open a logical container open if it was already open
23878             #-----------------------------------------------------------
23879             elsif ($is_simple_logical_expression
23880             && $has_old_logical_breakpoints[$current_depth] )
23881             {
23882 10         60 $self->set_logical_breakpoints($current_depth);
23883             }
23884              
23885             # Handle long container which does not get opened up
23886             elsif ($is_long_term) {
23887              
23888             # must set fake breakpoint to alert outer containers that
23889             # they are complex
23890 78         393 set_fake_breakpoint();
23891             }
23892              
23893 2854         5415 return;
23894             } ## end sub break_lists_decreasing_depth
23895             } ## end closure break_lists
23896              
23897             my %is_kwiZ;
23898             my %is_key_type;
23899              
23900             BEGIN {
23901              
23902             # Added 'w' to fix b1172
23903 38     38   3759 my @q = qw(k w i Z ->);
23904 38         318 @is_kwiZ{@q} = (1) x scalar(@q);
23905              
23906             # added = for b1211
23907 38         175 @q = qw<( [ { L R } ] ) = b>;
23908 38         104 push @q, ',';
23909 38         1803 @is_key_type{@q} = (1) x scalar(@q);
23910             } ## end BEGIN
23911              
23912 38     38   390 use constant DEBUG_FIND_START => 0;
  38         138  
  38         16106  
23913              
23914             sub find_token_starting_list {
23915              
23916             # When testing to see if a block will fit on one line, some
23917             # previous token(s) may also need to be on the line; particularly
23918             # if this is a sub call. So we will look back at least one
23919             # token.
23920 2264     2264 0 4503 my ( $self, $i_opening_paren ) = @_;
23921              
23922             # This will be the return index
23923 2264         3639 my $i_opening_minus = $i_opening_paren;
23924              
23925 2264 100       5065 if ( $i_opening_minus <= 0 ) {
23926 22         60 return $i_opening_minus;
23927             }
23928              
23929 2242         3663 my $im1 = $i_opening_paren - 1;
23930 2242         4662 my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
23931 2242 100 66     8148 if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
23932 1188         2110 $iprev_nb -= 1;
23933 1188         2355 $type_prev_nb = $types_to_go[$iprev_nb];
23934             }
23935              
23936 2242 100 66     9580 if ( $type_prev_nb eq ',' ) {
    100          
23937              
23938             # a previous comma is a good break point
23939             # $i_opening_minus = $i_opening_paren;
23940             }
23941              
23942             elsif (
23943             $tokens_to_go[$i_opening_paren] eq '('
23944              
23945             # non-parens added here to fix case b1186
23946             || $is_kwiZ{$type_prev_nb}
23947             )
23948             {
23949 1702         2864 $i_opening_minus = $im1;
23950              
23951             # Walk back to improve length estimate...
23952             # FIX for cases b1169 b1170 b1171: start walking back
23953             # at the previous nonblank. This makes the result insensitive
23954             # to the flag --space-function-paren, and similar.
23955             # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
23956 1702         7351 foreach my $j ( reverse( 0 .. $iprev_nb ) ) {
23957 3742 100       8812 if ( $is_key_type{ $types_to_go[$j] } ) {
23958              
23959             # fix for b1211
23960 1401 100       3658 if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
  106         270  
23961 1401         2534 last;
23962             }
23963 2341         3773 $i_opening_minus = $j;
23964             }
23965 1702 100       5673 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
  61         153  
23966             }
23967              
23968 2242         3147 DEBUG_FIND_START && print <<EOM;
23969             FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
23970             EOM
23971              
23972 2242         4604 return $i_opening_minus;
23973             } ## end sub find_token_starting_list
23974              
23975             { ## begin closure table_maker
23976              
23977             my %is_keyword_with_special_leading_term;
23978              
23979             BEGIN {
23980              
23981             # These keywords have prototypes which allow a special leading item
23982             # followed by a list
23983 38     38   302 my @q = qw(
23984             chmod
23985             formline
23986             grep
23987             join
23988             kill
23989             map
23990             pack
23991             printf
23992             push
23993             sprintf
23994             unshift
23995             );
23996 38         1806 @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
23997             } ## end BEGIN
23998              
23999 38     38   332 use constant DEBUG_SPARSE => 0;
  38         111  
  38         234711  
24000              
24001             sub table_maker {
24002              
24003             # Given a list of comma-separated items, set breakpoints at some of
24004             # the commas, if necessary, to make it easy to read.
24005             # This is done by making calls to 'set_forced_breakpoint'.
24006             # This is a complex routine because there are many special cases.
24007              
24008             # Returns: nothing
24009              
24010             # The numerous variables involved are contained three hashes:
24011             # $rhash_IN : For contents see the calling routine
24012             # $rhash_A: For contents see return from sub 'table_layout_A'
24013             # $rhash_B: For contents see return from sub 'table_layout_B'
24014              
24015 497     497 0 1214 my ( $self, $rhash_IN ) = @_;
24016              
24017             # Find lengths of all list items needed for calculating page layout
24018 497         1871 my $rhash_A = table_layout_A($rhash_IN);
24019 497 100       1596 return if ( !defined($rhash_A) );
24020              
24021             # Some variables received from caller...
24022 489         1029 my $i_closing_paren = $rhash_IN->{i_closing_paren};
24023 489         907 my $i_opening_paren = $rhash_IN->{i_opening_paren};
24024 489         962 my $has_broken_sublist = $rhash_IN->{has_broken_sublist};
24025 489         926 my $interrupted = $rhash_IN->{interrupted};
24026              
24027             #-----------------------------------------
24028             # Section A: Handle some special cases ...
24029             #-----------------------------------------
24030              
24031             #-------------------------------------------------------------
24032             # Special Case A1: Compound List Rule 1:
24033             # Break at (almost) every comma for a list containing a broken
24034             # sublist. This has higher priority than the Interrupted List
24035             # Rule.
24036             #-------------------------------------------------------------
24037 489 100       1311 if ($has_broken_sublist) {
24038              
24039 80         325 $self->apply_broken_sublist_rule( $rhash_A, $interrupted );
24040              
24041 80         330 return;
24042             }
24043              
24044             #--------------------------------------------------------------
24045             # Special Case A2: Interrupted List Rule:
24046             # A list is forced to use old breakpoints if it was interrupted
24047             # by side comments or blank lines, or requested by user.
24048             #--------------------------------------------------------------
24049 409 100 100     2670 if ( $rOpts_break_at_old_comma_breakpoints
      66        
24050             || $interrupted
24051             || $i_opening_paren < 0 )
24052             {
24053 94         191 my $i_first_comma = $rhash_A->{_i_first_comma};
24054 94         168 my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
24055 94         455 $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
24056 94         422 return;
24057             }
24058              
24059             #-----------------------------------------------------------------
24060             # Special Case A3: If it fits on one line, return and let the line
24061             # break logic decide if and where to break.
24062             #-----------------------------------------------------------------
24063              
24064             # The -bbxi=2 parameters can add an extra hidden level of indentation
24065             # so they need a tolerance to avoid instability. Fixes b1259, 1260.
24066 315         816 my $opening_token = $tokens_to_go[$i_opening_paren];
24067 315         601 my $tol = 0;
24068 315 0 33     1002 if ( $break_before_container_types{$opening_token}
      0        
24069             && $container_indentation_options{$opening_token}
24070             && $container_indentation_options{$opening_token} == 2 )
24071             {
24072 0         0 $tol = $rOpts_indent_columns;
24073              
24074             # use greater of -ci and -i (fix for case b1334)
24075 0 0       0 if ( $tol < $rOpts_continuation_indentation ) {
24076 0         0 $tol = $rOpts_continuation_indentation;
24077             }
24078             }
24079              
24080             # Increase tol when -atc and -dtc are both used to allow for
24081             # possible loss in length on next pass due to a comma. Fixes b1455.
24082 315 100 100     1093 if ( $rOpts_delete_trailing_commas && $rOpts_add_trailing_commas ) {
24083 20         35 $tol += 1;
24084             }
24085              
24086 315         1059 my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
24087 315         1127 my $excess =
24088             $self->excess_line_length( $i_opening_minus, $i_closing_paren );
24089 315 100       1744 return if ( $excess + $tol <= 0 );
24090              
24091             #---------------------------------------
24092             # Section B: Handle a multiline list ...
24093             #---------------------------------------
24094              
24095 135         883 $self->break_multiline_list( $rhash_IN, $rhash_A, $i_opening_minus );
24096 135         627 return;
24097              
24098             } ## end sub table_maker
24099              
24100             sub apply_broken_sublist_rule {
24101              
24102 80     80 0 189 my ( $self, $rhash_A, $interrupted ) = @_;
24103              
24104             # Break at (almost) every comma for a list containing a broken
24105             # sublist.
24106              
24107 80         152 my $ritem_lengths = $rhash_A->{_ritem_lengths};
24108 80         153 my $ri_term_begin = $rhash_A->{_ri_term_begin};
24109 80         149 my $ri_term_end = $rhash_A->{_ri_term_end};
24110 80         139 my $ri_term_comma = $rhash_A->{_ri_term_comma};
24111 80         155 my $item_count = $rhash_A->{_item_count_A};
24112 80         135 my $i_first_comma = $rhash_A->{_i_first_comma};
24113 80         145 my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
24114              
24115             # Break at every comma except for a comma between two
24116             # simple, small terms. This prevents long vertical
24117             # columns of, say, just 0's.
24118 80         125 my $small_length = 10; # 2 + actual maximum length wanted
24119              
24120             # We'll insert a break in long runs of small terms to
24121             # allow alignment in uniform tables.
24122 80         122 my $skipped_count = 0;
24123 80         234 my $columns = table_columns_available($i_first_comma);
24124 80         242 my $fields = int( $columns / $small_length );
24125 80 50 33     263 if ( $rOpts_maximum_fields_per_table
24126             && $fields > $rOpts_maximum_fields_per_table )
24127             {
24128 0         0 $fields = $rOpts_maximum_fields_per_table;
24129             }
24130 80         139 my $max_skipped_count = $fields - 1;
24131              
24132 80         139 my $is_simple_last_term = 0;
24133 80         167 my $is_simple_next_term = 0;
24134 80         216 foreach my $j ( 0 .. $item_count ) {
24135 278         385 $is_simple_last_term = $is_simple_next_term;
24136 278         384 $is_simple_next_term = 0;
24137 278 100 100     2013 if ( $j < $item_count
      100        
24138             && $ri_term_end->[$j] == $ri_term_begin->[$j]
24139             && $ritem_lengths->[$j] <= $small_length )
24140             {
24141 25         72 $is_simple_next_term = 1;
24142             }
24143 278 100       568 next if $j == 0;
24144 198 100 100     643 if ( $is_simple_last_term
      66        
24145             && $is_simple_next_term
24146             && $skipped_count < $max_skipped_count )
24147             {
24148 6         16 $skipped_count++;
24149             }
24150             else {
24151 192         325 $skipped_count = 0;
24152 192         335 my $i_tc = $ri_term_comma->[ $j - 1 ];
24153 192 100       444 last unless defined $i_tc;
24154 127         294 $self->set_forced_breakpoint($i_tc);
24155             }
24156             }
24157              
24158             # always break at the last comma if this list is
24159             # interrupted; we wouldn't want to leave a terminal '{', for
24160             # example.
24161 80 100       280 if ($interrupted) {
24162 8         32 $self->set_forced_breakpoint($i_true_last_comma);
24163             }
24164 80         216 return;
24165             } ## end sub apply_broken_sublist_rule
24166              
24167             sub set_emergency_comma_breakpoints {
24168              
24169             my (
24170              
24171 7     7 0 32 $self, #
24172              
24173             $number_of_fields_best,
24174             $rhash_IN,
24175             $comma_count,
24176             $i_first_comma,
24177              
24178             ) = @_;
24179              
24180             # The computed number of table fields is negative, so we have to make
24181             # an emergency fix.
24182              
24183 7         22 my $rcomma_index = $rhash_IN->{rcomma_index};
24184 7         22 my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
24185 7         18 my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
24186 7         18 my $must_break_open = $rhash_IN->{must_break_open};
24187              
24188             # are we an item contained in an outer list?
24189 7         33 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
24190              
24191             # In many cases, it may be best to not force a break if there is just
24192             # one comma, because the standard continuation break logic will do a
24193             # better job without it.
24194              
24195             # In the common case that all but one of the terms can fit
24196             # on a single line, it may look better not to break open the
24197             # containing parens. Consider, for example
24198              
24199             # $color =
24200             # join ( '/',
24201             # sort { $color_value{$::a} <=> $color_value{$::b}; }
24202             # keys %colors );
24203              
24204             # which will look like this with the container broken:
24205              
24206             # $color = join (
24207             # '/',
24208             # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
24209             # );
24210              
24211             # Here is an example of this rule for a long last term:
24212              
24213             # log_message( 0, 256, 128,
24214             # "Number of routes in adj-RIB-in to be considered: $peercount" );
24215              
24216             # And here is an example with a long first term:
24217              
24218             # $s = sprintf(
24219             # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
24220             # $r, $pu, $ps, $cu, $cs, $tt
24221             # )
24222             # if $style eq 'all';
24223              
24224 7         25 my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
24225              
24226 7         29 my $long_last_term = $self->excess_line_length( 0, $i_last_comma ) <= 0;
24227 7         45 my $long_first_term =
24228             $self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) <=
24229             0;
24230              
24231             # break at every comma ...
24232 7 100 66     155 if (
    100 0        
    50 33        
      66        
24233              
24234             # if requested by user or is best looking
24235             $number_of_fields_best == 1
24236              
24237             # or if this is a sublist of a larger list
24238             || $in_hierarchical_list
24239              
24240             # or if multiple commas and we don't have a long first or last
24241             # term
24242             || ( $comma_count > 1
24243             && !( $long_last_term || $long_first_term ) )
24244             )
24245             {
24246 2         11 foreach ( 0 .. $comma_count - 1 ) {
24247 3         13 $self->set_forced_breakpoint( $rcomma_index->[$_] );
24248             }
24249             }
24250             elsif ($long_last_term) {
24251              
24252 2         14 $self->set_forced_breakpoint($i_last_comma);
24253 2 100       10 ${$rdo_not_break_apart} = 1 unless $must_break_open;
  1         2  
24254             }
24255             elsif ($long_first_term) {
24256              
24257 3         15 $self->set_forced_breakpoint($i_first_comma);
24258             }
24259             else {
24260              
24261             # let breaks be defined by default bond strength logic
24262             }
24263 7         19 return;
24264             } ## end sub set_emergency_comma_breakpoints
24265              
24266             sub break_multiline_list {
24267 135     135 0 437 my ( $self, $rhash_IN, $rhash_A, $i_opening_minus ) = @_;
24268              
24269             # We have a list spanning multiple lines and are trying
24270             # to decide the best way to set comma breakpoints.
24271              
24272             # Overriden variables
24273 135         354 my $item_count = $rhash_A->{_item_count_A};
24274 135         313 my $identifier_count = $rhash_A->{_identifier_count_A};
24275              
24276             # Derived variables:
24277 135         300 my $ritem_lengths = $rhash_A->{_ritem_lengths};
24278 135         268 my $ri_term_begin = $rhash_A->{_ri_term_begin};
24279 135         275 my $ri_term_end = $rhash_A->{_ri_term_end};
24280 135         292 my $ri_term_comma = $rhash_A->{_ri_term_comma};
24281 135         290 my $rmax_length = $rhash_A->{_rmax_length};
24282 135         280 my $comma_count = $rhash_A->{_comma_count};
24283 135         289 my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
24284 135         285 my $first_term_length = $rhash_A->{_first_term_length};
24285 135         305 my $i_first_comma = $rhash_A->{_i_first_comma};
24286 135         257 my $i_last_comma = $rhash_A->{_i_last_comma};
24287 135         268 my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
24288              
24289             # Variables received from caller
24290 135         301 my $i_opening_paren = $rhash_IN->{i_opening_paren};
24291 135         319 my $i_closing_paren = $rhash_IN->{i_closing_paren};
24292 135         289 my $rcomma_index = $rhash_IN->{rcomma_index};
24293 135         297 my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
24294 135         268 my $list_type = $rhash_IN->{list_type};
24295 135         337 my $interrupted = $rhash_IN->{interrupted};
24296 135         278 my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
24297 135         281 my $must_break_open = $rhash_IN->{must_break_open};
24298             ## NOTE: these input vars from caller use the values from rhash_A (see above):
24299             ## my $item_count = $rhash_IN->{item_count};
24300             ## my $identifier_count = $rhash_IN->{identifier_count};
24301              
24302             # NOTE: i_opening_paren changes value below so we need to get these here
24303 135         683 my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
24304 135         354 my $opening_token = $tokens_to_go[$i_opening_paren];
24305              
24306             #---------------------------------------------------------------
24307             # Section B1: Determine '$number_of_fields' = the best number of
24308             # fields to use if this is to be formatted as a table.
24309             #---------------------------------------------------------------
24310              
24311             # Now we know that this block spans multiple lines; we have to set
24312             # at least one breakpoint -- real or fake -- as a signal to break
24313             # open any outer containers.
24314 135         615 set_fake_breakpoint();
24315              
24316             # Set a flag indicating if we need to break open to keep -lp
24317             # items aligned. This is necessary if any of the list terms
24318             # exceeds the available space after the '('.
24319 135         287 my $need_lp_break_open = $must_break_open;
24320 135         341 my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
24321 135 100 100     586 if ( $is_lp_formatting && !$must_break_open ) {
24322 18         71 my $columns_if_unbroken =
24323             $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
24324             - total_line_length( $i_opening_minus, $i_opening_paren );
24325 18   100     147 $need_lp_break_open =
24326             ( $rmax_length->[0] > $columns_if_unbroken )
24327             || ( $rmax_length->[1] > $columns_if_unbroken )
24328             || ( $first_term_length > $columns_if_unbroken );
24329             }
24330              
24331 135         616 my $hash_B =
24332             $self->table_layout_B( $rhash_IN, $rhash_A, $is_lp_formatting );
24333 135 100       609 return if ( !defined($hash_B) );
24334              
24335             # Updated variables
24336 125         304 $i_first_comma = $hash_B->{_i_first_comma_B};
24337 125         266 $i_opening_paren = $hash_B->{_i_opening_paren_B};
24338 125         265 $item_count = $hash_B->{_item_count_B};
24339              
24340             # New variables
24341 125         267 my $columns = $hash_B->{_columns};
24342 125         258 my $formatted_columns = $hash_B->{_formatted_columns};
24343 125         256 my $formatted_lines = $hash_B->{_formatted_lines};
24344 125         261 my $max_width = $hash_B->{_max_width};
24345 125         255 my $new_identifier_count = $hash_B->{_new_identifier_count};
24346 125         244 my $number_of_fields = $hash_B->{_number_of_fields};
24347 125         240 my $odd_or_even = $hash_B->{_odd_or_even};
24348 125         244 my $packed_columns = $hash_B->{_packed_columns};
24349 125         274 my $packed_lines = $hash_B->{_packed_lines};
24350 125         261 my $pair_width = $hash_B->{_pair_width};
24351 125         267 my $ri_ragged_break_list = $hash_B->{_ri_ragged_break_list};
24352 125         248 my $use_separate_first_term = $hash_B->{_use_separate_first_term};
24353              
24354             # are we an item contained in an outer list?
24355 125         426 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
24356              
24357 125         289 my $unused_columns = $formatted_columns - $packed_columns;
24358              
24359             # set some empirical parameters to help decide if we should try to
24360             # align; high sparsity does not look good, especially with few lines
24361 125         286 my $sparsity = ($unused_columns) / ($formatted_columns);
24362 125 100       664 my $max_allowed_sparsity =
    100          
    100          
24363             ( $item_count < 3 ) ? 0.1
24364             : ( $packed_lines == 1 ) ? 0.15
24365             : ( $packed_lines == 2 ) ? 0.4
24366             : 0.7;
24367              
24368 125         207 my $two_line_word_wrap_ok;
24369 125 100       423 if ( $opening_token eq '(' ) {
24370              
24371             # default is to allow wrapping of short paren lists
24372 107         214 $two_line_word_wrap_ok = 1;
24373              
24374             # but turn off word wrap where requested
24375 107 50       384 if ($rOpts_break_open_compact_parens) {
24376              
24377             # This parameter is a one-character flag, as follows:
24378             # '0' matches no parens -> break open NOT OK -> word wrap OK
24379             # '1' matches all parens -> break open OK -> word wrap NOT OK
24380             # Other values are the same as used by the weld-exclusion-list
24381 0         0 my $flag = $rOpts_break_open_compact_parens;
24382 0 0 0     0 if ( $flag eq '*'
    0          
24383             || $flag eq '1' )
24384             {
24385 0         0 $two_line_word_wrap_ok = 0;
24386             }
24387             elsif ( $flag eq '0' ) {
24388 0         0 $two_line_word_wrap_ok = 1;
24389             }
24390             else {
24391 0         0 my $seqno = $type_sequence_to_go[$i_opening_paren];
24392 0         0 $two_line_word_wrap_ok =
24393             !$self->match_paren_control_flag( $seqno, $flag );
24394             }
24395             }
24396             }
24397              
24398             #-------------------------------------------------------------------
24399             # Section B2: Check for shortcut methods, which avoid treating
24400             # a list as a table for relatively small parenthesized lists. These
24401             # are usually easier to read if not formatted as tables.
24402             #-------------------------------------------------------------------
24403 125 100 100     1013 if (
      100        
      100        
24404             $packed_lines <= 2 # probably can fit in 2 lines
24405             && $item_count < 9 # doesn't have too many items
24406             && $opening_is_in_block # not a sub-container
24407             && $two_line_word_wrap_ok # ok to wrap this paren list
24408             )
24409             {
24410              
24411             # Section B2A: Shortcut method 1: for -lp and just one comma:
24412             # This is a no-brainer, just break at the comma.
24413 55 100 100     257 if (
      66        
24414             $is_lp_formatting # -lp
24415             && $item_count == 2 # two items, one comma
24416             && !$must_break_open
24417             )
24418             {
24419 5         24 my $i_break = $rcomma_index->[0];
24420 5         27 $self->set_forced_breakpoint($i_break);
24421 5         8 ${$rdo_not_break_apart} = 1;
  5         11  
24422 5         28 return;
24423              
24424             }
24425              
24426             # Section B2B: Shortcut method 2 is for most small ragged lists
24427             # which might look best if not displayed as a table.
24428 50 100 100     475 if (
      100        
      100        
24429             ( $number_of_fields == 2 && $item_count == 3 )
24430             || (
24431             $new_identifier_count > 0 # isn't all quotes
24432             && $sparsity > 0.15
24433             ) # would be fairly spaced gaps if aligned
24434             )
24435             {
24436              
24437 26         146 my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
24438             $ri_ragged_break_list );
24439 26 100       81 ++$break_count if ($use_separate_first_term);
24440              
24441             # NOTE: we should really use the true break count here,
24442             # which can be greater if there are large terms and
24443             # little space, but usually this will work well enough.
24444 26 100       96 unless ($must_break_open) {
24445              
24446 23 100 66     94 if ( $break_count <= 1 ) {
    100          
24447 21         37 ${$rdo_not_break_apart} = 1;
  21         57  
24448             }
24449             elsif ( $is_lp_formatting && !$need_lp_break_open ) {
24450 1         3 ${$rdo_not_break_apart} = 1;
  1         3  
24451             }
24452             }
24453 26         144 return;
24454             }
24455              
24456             } ## end shortcut methods
24457              
24458             # debug stuff
24459 94         209 DEBUG_SPARSE && do {
24460              
24461             # How many spaces across the page will we fill?
24462             my $columns_per_line =
24463             ( int $number_of_fields / 2 ) * $pair_width +
24464             ( $number_of_fields % 2 ) * $max_width;
24465              
24466             print STDOUT
24467             "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
24468              
24469             };
24470              
24471             #------------------------------------------------------------------
24472             # Section B3: Compound List Rule 2:
24473             # If this list is too long for one line, and it is an item of a
24474             # larger list, then we must format it, regardless of sparsity
24475             # (ian.t). One reason that we have to do this is to trigger
24476             # Compound List Rule 1, above, which causes breaks at all commas of
24477             # all outer lists. In this way, the structure will be properly
24478             # displayed.
24479             #------------------------------------------------------------------
24480              
24481             # Decide if this list is too long for one line unless broken
24482 94         301 my $total_columns = table_columns_available($i_opening_paren);
24483 94         364 my $too_long = $packed_columns > $total_columns;
24484              
24485             # For a paren list, include the length of the token just before the
24486             # '(' because this is likely a sub call, and we would have to
24487             # include the sub name on the same line as the list. This is still
24488             # imprecise, but not too bad. (steve.t)
24489 94 50 66     485 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
      66        
24490              
24491 1         5 $too_long = $self->excess_line_length( $i_opening_minus,
24492             $i_effective_last_comma + 1 ) > 0;
24493             }
24494              
24495             # TODO: For an item after a '=>', try to include the length of the
24496             # thing before the '=>'. This is crude and should be improved by
24497             # actually looking back token by token.
24498 94 0 33     372 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
      33        
24499 0         0 my $i_opening_minus_test = $i_opening_paren - 4;
24500 0 0       0 if ( $i_opening_minus >= 0 ) {
24501 0         0 $too_long = $self->excess_line_length( $i_opening_minus_test,
24502             $i_effective_last_comma + 1 ) > 0;
24503             }
24504             }
24505              
24506             # Always break lists contained in '[' and '{' if too long for 1 line,
24507             # and always break lists which are too long and part of a more complex
24508             # structure.
24509 94   100     508 my $must_break_open_container = $must_break_open
24510             || ( $too_long
24511             && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
24512              
24513             #--------------------------------------------------------------------
24514             # Section B4: A table will work here. But do not attempt to align
24515             # columns if this is a tiny table or it would be too spaced. It
24516             # seems that the more packed lines we have, the sparser the list that
24517             # can be allowed and still look ok.
24518             #--------------------------------------------------------------------
24519              
24520 94 100 66     1015 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
      66        
      100        
24521             || ( $formatted_lines < 2 )
24522             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
24523             )
24524             {
24525             #----------------------------------------------------------------
24526             # Section B4A: too sparse: would not look good aligned in a table
24527             #----------------------------------------------------------------
24528              
24529             # use old breakpoints if this is a 'big' list
24530 12 50 33     55 if ( $packed_lines > 2 && $item_count > 10 ) {
24531 0         0 write_logfile_entry("List sparse: using old breakpoints\n");
24532 0         0 $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
24533             }
24534              
24535             # let the continuation logic handle it if 2 lines
24536             else {
24537              
24538 12         215 my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
24539             $ri_ragged_break_list );
24540 12 50       39 ++$break_count if ($use_separate_first_term);
24541              
24542 12 50       50 unless ($must_break_open_container) {
24543 0 0 0     0 if ( $break_count <= 1 ) {
    0          
24544 0         0 ${$rdo_not_break_apart} = 1;
  0         0  
24545             }
24546             elsif ( $is_lp_formatting && !$need_lp_break_open ) {
24547 0         0 ${$rdo_not_break_apart} = 1;
  0         0  
24548             }
24549             }
24550             }
24551 12         62 return;
24552             }
24553              
24554             #--------------------------------------------
24555             # Section B4B: Go ahead and format as a table
24556             #--------------------------------------------
24557 82         440 $self->write_formatted_table( $number_of_fields, $comma_count,
24558             $rcomma_index, $use_separate_first_term );
24559              
24560 82         459 return;
24561             } ## end sub break_multiline_list
24562              
24563             sub table_layout_A {
24564              
24565 497     497 0 1060 my ($rhash_IN) = @_;
24566              
24567             # Find lengths of all list items needed to calculate page layout
24568              
24569             # Returns:
24570             # - nothing if this list is empty, or
24571             # - a ref to a hash containing some derived parameters
24572              
24573 497         1188 my $i_opening_paren = $rhash_IN->{i_opening_paren};
24574 497         888 my $i_closing_paren = $rhash_IN->{i_closing_paren};
24575 497         922 my $identifier_count = $rhash_IN->{identifier_count};
24576 497         937 my $rcomma_index = $rhash_IN->{rcomma_index};
24577 497         849 my $item_count = $rhash_IN->{item_count};
24578              
24579             # nothing to do if no commas seen
24580 497 50       1428 return if ( $item_count < 1 );
24581              
24582 497         931 my $i_first_comma = $rcomma_index->[0];
24583 497         1142 my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
24584 497         860 my $i_last_comma = $i_true_last_comma;
24585 497 100       1263 if ( $i_last_comma >= $max_index_to_go ) {
24586 21         52 $item_count -= 1;
24587 21 100       100 return if ( $item_count < 1 );
24588 13         43 $i_last_comma = $rcomma_index->[ $item_count - 1 ];
24589             }
24590              
24591 489         889 my $comma_count = $item_count;
24592              
24593 489         1105 my $ritem_lengths = [];
24594 489         1072 my $ri_term_begin = [];
24595 489         945 my $ri_term_end = [];
24596 489         850 my $ri_term_comma = [];
24597              
24598 489         1209 my $rmax_length = [ 0, 0 ];
24599              
24600 489         1289 my $i_prev_plus;
24601             my $first_term_length;
24602 489         877 my $i = $i_opening_paren;
24603 489         856 my $is_odd = 1;
24604              
24605 489         1506 foreach my $j ( 0 .. $comma_count - 1 ) {
24606 1662         2325 $is_odd = 1 - $is_odd;
24607 1662         2449 $i_prev_plus = $i + 1;
24608 1662         2390 $i = $rcomma_index->[$j];
24609              
24610 1662 100 66     5980 my $i_term_end =
24611             ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' )
24612             ? $i - 2
24613             : $i - 1;
24614 1662 100       3527 my $i_term_begin =
24615             ( $types_to_go[$i_prev_plus] eq 'b' )
24616             ? $i_prev_plus + 1
24617             : $i_prev_plus;
24618 1662         2425 push @{$ri_term_begin}, $i_term_begin;
  1662         3128  
24619 1662         2372 push @{$ri_term_end}, $i_term_end;
  1662         3204  
24620 1662         2341 push @{$ri_term_comma}, $i;
  1662         2633  
24621              
24622             # note: currently adding 2 to all lengths (for comma and space)
24623 1662         3467 my $length =
24624             2 + token_sequence_length( $i_term_begin, $i_term_end );
24625 1662         2496 push @{$ritem_lengths}, $length;
  1662         2846  
24626              
24627 1662 100       3271 if ( $j == 0 ) {
24628 489         1140 $first_term_length = $length;
24629             }
24630             else {
24631              
24632 1173 100       2906 if ( $length > $rmax_length->[$is_odd] ) {
24633 562         1215 $rmax_length->[$is_odd] = $length;
24634             }
24635             }
24636             }
24637              
24638             # now we have to make a distinction between the comma count and item
24639             # count, because the item count will be one greater than the comma
24640             # count if the last item is not terminated with a comma
24641 489 100       1919 my $i_b =
24642             ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
24643             ? $i_last_comma + 1
24644             : $i_last_comma;
24645 489 100       1530 my $i_e =
24646             ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
24647             ? $i_closing_paren - 2
24648             : $i_closing_paren - 1;
24649 489         829 my $i_effective_last_comma = $i_last_comma;
24650              
24651 489         1172 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
24652              
24653 489 100       1597 if ( $last_item_length > 0 ) {
24654              
24655             # add 2 to length because other lengths include a comma and a blank
24656 416         790 $last_item_length += 2;
24657 416         690 push @{$ritem_lengths}, $last_item_length;
  416         825  
24658 416         707 push @{$ri_term_begin}, $i_b + 1;
  416         826  
24659 416         750 push @{$ri_term_end}, $i_e;
  416         789  
24660 416         748 push @{$ri_term_comma}, undef;
  416         823  
24661              
24662 416         1141 my $i_odd = $item_count % 2;
24663              
24664 416 100       1149 if ( $last_item_length > $rmax_length->[$i_odd] ) {
24665 360         693 $rmax_length->[$i_odd] = $last_item_length;
24666             }
24667              
24668 416         685 $item_count++;
24669 416         764 $i_effective_last_comma = $i_e + 1;
24670              
24671 416 100       1872 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
24672 144         298 $identifier_count++;
24673             }
24674             }
24675              
24676             # be sure we do not extend beyond the current list length
24677 489 100       1389 if ( $i_effective_last_comma >= $max_index_to_go ) {
24678 50         119 $i_effective_last_comma = $max_index_to_go - 1;
24679             }
24680              
24681             # Return the hash of derived variables.
24682             return {
24683              
24684             # Updated variables
24685 489         5777 _item_count_A => $item_count,
24686             _identifier_count_A => $identifier_count,
24687              
24688             # New variables
24689             _ritem_lengths => $ritem_lengths,
24690             _ri_term_begin => $ri_term_begin,
24691             _ri_term_end => $ri_term_end,
24692             _ri_term_comma => $ri_term_comma,
24693             _rmax_length => $rmax_length,
24694             _comma_count => $comma_count,
24695             _i_effective_last_comma => $i_effective_last_comma,
24696             _first_term_length => $first_term_length,
24697             _i_first_comma => $i_first_comma,
24698             _i_last_comma => $i_last_comma,
24699             _i_true_last_comma => $i_true_last_comma,
24700             };
24701              
24702             } ## end sub table_layout_A
24703              
24704             sub table_layout_B {
24705              
24706 135     135 0 378 my ( $self, $rhash_IN, $rhash_A, $is_lp_formatting ) = @_;
24707              
24708             # Determine variables for the best table layout, including
24709             # the best number of fields.
24710              
24711             # Returns:
24712             # - nothing if nothing more to do
24713             # - a ref to a hash containg some derived parameters
24714              
24715             # Variables from caller
24716 135         316 my $i_opening_paren = $rhash_IN->{i_opening_paren};
24717 135         299 my $list_type = $rhash_IN->{list_type};
24718 135         271 my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
24719 135         301 my $rcomma_index = $rhash_IN->{rcomma_index};
24720 135         277 my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
24721              
24722             # Table size variables
24723 135         630 my $comma_count = $rhash_A->{_comma_count};
24724 135         295 my $first_term_length = $rhash_A->{_first_term_length};
24725 135         269 my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
24726 135         317 my $i_first_comma = $rhash_A->{_i_first_comma};
24727 135         282 my $identifier_count = $rhash_A->{_identifier_count_A};
24728 135         258 my $item_count = $rhash_A->{_item_count_A};
24729 135         250 my $ri_term_begin = $rhash_A->{_ri_term_begin};
24730 135         259 my $ri_term_comma = $rhash_A->{_ri_term_comma};
24731 135         287 my $ri_term_end = $rhash_A->{_ri_term_end};
24732 135         259 my $ritem_lengths = $rhash_A->{_ritem_lengths};
24733 135         269 my $rmax_length = $rhash_A->{_rmax_length};
24734              
24735             # Specify if the list must have an even number of fields or not.
24736             # It is generally safest to assume an even number, because the
24737             # list items might be a hash list. But if we can be sure that
24738             # it is not a hash, then we can allow an odd number for more
24739             # flexibility.
24740             # 1 = odd field count ok, 2 = want even count
24741 135         232 my $odd_or_even = 2;
24742 135 100 66     1162 if (
      100        
      66        
      66        
24743             $identifier_count >= $item_count - 1
24744             || $is_assignment{$next_nonblank_type}
24745             || ( $list_type
24746             && $list_type ne '=>'
24747             && $list_type !~ /^[\:\?]$/ )
24748             )
24749             {
24750 32         87 $odd_or_even = 1;
24751             }
24752              
24753             # do we have a long first term which should be
24754             # left on a line by itself?
24755 135   33     768 my $use_separate_first_term = (
24756             $odd_or_even == 1 # only if we can use 1 field/line
24757             && $item_count > 3 # need several items
24758             && $first_term_length >
24759             2 * $rmax_length->[0] - 2 # need long first term
24760             && $first_term_length >
24761             2 * $rmax_length->[1] - 2 # need long first term
24762             );
24763              
24764             # or do we know from the type of list that the first term should
24765             # be placed alone?
24766 135 50       431 if ( !$use_separate_first_term ) {
24767 135 100       500 if ( $is_keyword_with_special_leading_term{$list_type} ) {
24768 4         8 $use_separate_first_term = 1;
24769              
24770             # should the container be broken open?
24771 4 100 33     24 if ( $item_count < 3 ) {
    50          
24772 3 50       24 if ( $i_first_comma - $i_opening_paren < 4 ) {
24773 3         8 ${$rdo_not_break_apart} = 1;
  3         22  
24774             }
24775             }
24776             elsif ($first_term_length < 20
24777             && $i_first_comma - $i_opening_paren < 4 )
24778             {
24779 1         5 my $columns = table_columns_available($i_first_comma);
24780 1 50       4 if ( $first_term_length < $columns ) {
24781 1         2 ${$rdo_not_break_apart} = 1;
  1         3  
24782             }
24783             }
24784             }
24785             }
24786              
24787             # if so,
24788 135 100       395 if ($use_separate_first_term) {
24789              
24790             # ..set a break and update starting values
24791 4         20 $self->set_forced_breakpoint($i_first_comma);
24792 4         23 $item_count--;
24793              
24794             #---------------------------------------------------------------
24795             # Section B1A: Stop if one item remains ($i_first_comma = undef)
24796             #---------------------------------------------------------------
24797             # Fix for b1442: use '$item_count' here instead of '$comma_count'
24798             # to make the result independent of any trailing comma.
24799 4 100       32 return if ( $item_count <= 1 );
24800              
24801 1         3 $i_opening_paren = $i_first_comma;
24802 1         3 $i_first_comma = $rcomma_index->[1];
24803 1         2 shift @{$ritem_lengths};
  1         3  
24804 1         2 shift @{$ri_term_begin};
  1         3  
24805 1         3 shift @{$ri_term_end};
  1         2  
24806 1         2 shift @{$ri_term_comma};
  1         2  
24807             }
24808              
24809             # if not, update the metrics to include the first term
24810             else {
24811 131 100       451 if ( $first_term_length > $rmax_length->[0] ) {
24812 44         104 $rmax_length->[0] = $first_term_length;
24813             }
24814             }
24815              
24816             # Field width parameters
24817 132         366 my $pair_width = ( $rmax_length->[0] + $rmax_length->[1] );
24818 132 100       445 my $max_width =
24819             ( $rmax_length->[0] > $rmax_length->[1] )
24820             ? $rmax_length->[0]
24821             : $rmax_length->[1];
24822              
24823             # Number of free columns across the page width for laying out tables
24824 132         522 my $columns = table_columns_available($i_first_comma);
24825              
24826             # Patch for b1210 and b1216-b1218 when -vmll is set. If we are unable
24827             # to break after an opening paren, then the maximum line length for the
24828             # first line could be less than the later lines. So we need to reduce
24829             # the line length. Normally, we will get a break after an opening
24830             # paren, but in some cases we might not.
24831 132 0 33     638 if ( $rOpts_variable_maximum_line_length
      33        
24832             && $tokens_to_go[$i_opening_paren] eq '('
24833 0         0 && @{$ri_term_begin} )
24834             {
24835 0         0 my $ib = $ri_term_begin->[0];
24836 0         0 my $type = $types_to_go[$ib];
24837              
24838             # So far, the only known instance of this problem is when
24839             # a bareword follows an opening paren with -vmll
24840 0 0       0 if ( $type eq 'w' ) {
24841              
24842             # If a line starts with paren+space+terms, then its max length
24843             # could be up to ci+2-i spaces less than if the term went out
24844             # on a line after the paren. So..
24845 0         0 my $tol_w = max( 0,
24846             2 + $rOpts_continuation_indentation -
24847             $rOpts_indent_columns );
24848 0         0 $columns = max( 0, $columns - $tol_w );
24849              
24850             ## Here is the original b1210 fix, but it failed on b1216-b1218
24851             ##my $columns2 = table_columns_available($i_opening_paren);
24852             ##$columns = min( $columns, $columns2 );
24853             }
24854             }
24855              
24856             # Estimated maximum number of fields which fit this space.
24857             # This will be our first guess:
24858 132         502 my $number_of_fields_max =
24859             maximum_number_of_fields( $columns, $odd_or_even, $max_width,
24860             $pair_width );
24861 132         314 my $number_of_fields = $number_of_fields_max;
24862              
24863             # Find the best-looking number of fields.
24864             # This will be our second guess, if possible.
24865 132         613 my ( $number_of_fields_best, $ri_ragged_break_list,
24866             $new_identifier_count )
24867             = $self->study_list_complexity( $ri_term_begin, $ri_term_end,
24868             $ritem_lengths, $max_width );
24869              
24870 132 100 100     1045 if ( $number_of_fields_best != 0
    50 33        
24871             && $number_of_fields_best < $number_of_fields_max )
24872             {
24873 18         50 $number_of_fields = $number_of_fields_best;
24874             }
24875              
24876             # fix b1427
24877             elsif ($number_of_fields_best > 1
24878             && $number_of_fields_best > $number_of_fields_max )
24879             {
24880 0         0 $number_of_fields_best = $number_of_fields_max;
24881             }
24882              
24883             # If we are crowded and the -lp option is being used, try
24884             # to undo some indentation
24885 132 100 100     606 if (
      100        
24886             $is_lp_formatting
24887             && (
24888             $number_of_fields == 0
24889             || ( $number_of_fields == 1
24890             && $number_of_fields != $number_of_fields_best )
24891             )
24892             )
24893             {
24894 16         90 ( $number_of_fields, $number_of_fields_best, $columns ) =
24895             $self->lp_table_fix(
24896              
24897             $columns,
24898             $i_first_comma,
24899             $max_width,
24900             $number_of_fields,
24901             $number_of_fields_best,
24902             $odd_or_even,
24903             $pair_width,
24904             $ritem_lengths,
24905              
24906             );
24907             }
24908              
24909             # try for one column if two won't work
24910 132 100       435 if ( $number_of_fields <= 0 ) {
24911 46         127 $number_of_fields = int( $columns / $max_width );
24912             }
24913              
24914             # The user can place an upper bound on the number of fields,
24915             # which can be useful for doing maintenance on tables
24916 132 50 33     485 if ( $rOpts_maximum_fields_per_table
24917             && $number_of_fields > $rOpts_maximum_fields_per_table )
24918             {
24919 0         0 $number_of_fields = $rOpts_maximum_fields_per_table;
24920             }
24921              
24922             # How many columns (characters) and lines would this container take
24923             # if no additional whitespace were added?
24924 132         459 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
24925             $i_effective_last_comma + 1 );
24926 132 50       562 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
  0         0  
24927 132         433 my $packed_lines = 1 + int( $packed_columns / $columns );
24928              
24929             #-----------------------------------------------------------------
24930             # Section B1B: Stop here if we did not compute a positive number of
24931             # fields. In this case we just have to bail out.
24932             #-----------------------------------------------------------------
24933 132 100       442 if ( $number_of_fields <= 0 ) {
24934              
24935 7         54 $self->set_emergency_comma_breakpoints(
24936              
24937             $number_of_fields_best,
24938             $rhash_IN,
24939             $comma_count,
24940             $i_first_comma,
24941              
24942             );
24943 7         41 return;
24944             }
24945              
24946             #------------------------------------------------------------------
24947             # Section B1B: We have a tentative field count that seems to work.
24948             # Now we must look more closely to determine if a table layout will
24949             # actually look okay.
24950             #------------------------------------------------------------------
24951              
24952             # How many lines will this require?
24953 125         307 my $formatted_lines = $item_count / ($number_of_fields);
24954 125 100       424 if ( $formatted_lines != int $formatted_lines ) {
24955 38         117 $formatted_lines = 1 + int $formatted_lines;
24956             }
24957              
24958             # So far we've been trying to fill out to the right margin. But
24959             # compact tables are easier to read, so let's see if we can use fewer
24960             # fields without increasing the number of lines.
24961 125         566 $number_of_fields = compactify_table( $item_count, $number_of_fields,
24962             $formatted_lines, $odd_or_even );
24963              
24964 125         254 my $formatted_columns;
24965              
24966 125 100       414 if ( $number_of_fields > 1 ) {
24967 61         258 $formatted_columns =
24968             ( $pair_width * ( int( $item_count / 2 ) ) +
24969             ( $item_count % 2 ) * $max_width );
24970             }
24971             else {
24972 64         148 $formatted_columns = $max_width * $item_count;
24973             }
24974 125 100       430 if ( $formatted_columns < $packed_columns ) {
24975 7         18 $formatted_columns = $packed_columns;
24976             }
24977              
24978             # Construce hash_B:
24979             return {
24980              
24981             # Updated variables
24982 125         1803 _i_first_comma_B => $i_first_comma,
24983             _i_opening_paren_B => $i_opening_paren,
24984             _item_count_B => $item_count,
24985              
24986             # New variables
24987             _columns => $columns,
24988             _formatted_columns => $formatted_columns,
24989             _formatted_lines => $formatted_lines,
24990             _max_width => $max_width,
24991             _new_identifier_count => $new_identifier_count,
24992             _number_of_fields => $number_of_fields,
24993             _odd_or_even => $odd_or_even,
24994             _packed_columns => $packed_columns,
24995             _packed_lines => $packed_lines,
24996             _pair_width => $pair_width,
24997             _ri_ragged_break_list => $ri_ragged_break_list,
24998             _use_separate_first_term => $use_separate_first_term,
24999             };
25000             } ## end sub table_layout_B
25001              
25002             sub lp_table_fix {
25003              
25004             # try to undo some -lp indentation to improve table formatting
25005              
25006             my (
25007              
25008 16     16 0 71 $self, #
25009              
25010             $columns,
25011             $i_first_comma,
25012             $max_width,
25013             $number_of_fields,
25014             $number_of_fields_best,
25015             $odd_or_even,
25016             $pair_width,
25017             $ritem_lengths,
25018              
25019             ) = @_;
25020              
25021 16         66 my $available_spaces =
25022             $self->get_available_spaces_to_go($i_first_comma);
25023 16 100       70 if ( $available_spaces > 0 ) {
25024              
25025 9         28 my $spaces_wanted = $max_width - $columns; # for 1 field
25026              
25027 9 100       34 if ( $number_of_fields_best == 0 ) {
25028 5         18 $number_of_fields_best =
25029             get_maximum_fields_wanted($ritem_lengths);
25030             }
25031              
25032 9 100       39 if ( $number_of_fields_best != 1 ) {
25033 3         10 my $spaces_wanted_2 = 1 + $pair_width - $columns; # for 2 fields
25034 3 50       7 if ( $available_spaces > $spaces_wanted_2 ) {
25035 3         8 $spaces_wanted = $spaces_wanted_2;
25036             }
25037             }
25038              
25039 9 100       40 if ( $spaces_wanted > 0 ) {
25040 6         35 my $deleted_spaces =
25041             $self->reduce_lp_indentation( $i_first_comma,
25042             $spaces_wanted );
25043              
25044             # redo the math
25045 6 100       26 if ( $deleted_spaces > 0 ) {
25046 5         18 $columns = table_columns_available($i_first_comma);
25047 5         29 $number_of_fields =
25048             maximum_number_of_fields( $columns, $odd_or_even,
25049             $max_width, $pair_width );
25050              
25051 5 50 66     33 if ( $number_of_fields_best == 1
25052             && $number_of_fields >= 1 )
25053             {
25054 0         0 $number_of_fields = $number_of_fields_best;
25055             }
25056             }
25057             }
25058             }
25059 16         61 return ( $number_of_fields, $number_of_fields_best, $columns );
25060             } ## end sub lp_table_fix
25061              
25062             sub write_formatted_table {
25063              
25064             # Write a table of comma separated items with fixed number of fields
25065 82     82 0 287 my ( $self, $number_of_fields, $comma_count, $rcomma_index,
25066             $use_separate_first_term )
25067             = @_;
25068              
25069 82         527 write_logfile_entry(
25070             "List: auto formatting with $number_of_fields fields/row\n");
25071              
25072 82 50       354 my $j_first_break =
25073             $use_separate_first_term
25074             ? $number_of_fields
25075             : $number_of_fields - 1;
25076              
25077 82         178 my $j = $j_first_break;
25078 82         338 while ( $j < $comma_count ) {
25079 245         449 my $i_comma = $rcomma_index->[$j];
25080 245         743 $self->set_forced_breakpoint($i_comma);
25081 245         602 $j += $number_of_fields;
25082             }
25083 82         240 return;
25084             } ## end sub write_formatted_table
25085              
25086             } ## end closure set_comma_breakpoint_final
25087              
25088             sub study_list_complexity {
25089              
25090             # Look for complex tables which should be formatted with one term per line.
25091             # Returns the following:
25092             #
25093             # \@i_ragged_break_list = list of good breakpoints to avoid lines
25094             # which are hard to read
25095             # $number_of_fields_best = suggested number of fields based on
25096             # complexity; = 0 if any number may be used.
25097             #
25098 132     132 0 448 my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
25099 132         230 my $item_count = @{$ri_term_begin};
  132         275  
25100 132         272 my $complex_item_count = 0;
25101 132         268 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
25102 132         273 my $i_max = @{$ritem_lengths} - 1;
  132         287  
25103             ##my @item_complexity;
25104              
25105 132         244 my $i_last_last_break = -3;
25106 132         238 my $i_last_break = -2;
25107 132         249 my @i_ragged_break_list;
25108              
25109 132         257 my $definitely_complex = 30;
25110 132         261 my $definitely_simple = 12;
25111 132         241 my $quote_count = 0;
25112              
25113 132         368 for my $i ( 0 .. $i_max ) {
25114 938         1442 my $ib = $ri_term_begin->[$i];
25115 938         1326 my $ie = $ri_term_end->[$i];
25116              
25117             # define complexity: start with the actual term length
25118 938         1350 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
25119              
25120             ##TBD: join types here and check for variations
25121             ##my $str=join "", @tokens_to_go[$ib..$ie];
25122              
25123 938         1238 my $is_quote = 0;
25124 938 100       2962 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
    100          
25125 298         432 $is_quote = 1;
25126 298         434 $quote_count++;
25127             }
25128             elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
25129 36         81 $quote_count++;
25130             }
25131              
25132 938 100       1875 if ( $ib eq $ie ) {
25133 727 100 100     2139 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
25134 50         90 $complex_item_count++;
25135 50         87 $weighted_length *= 2;
25136             }
25137             else {
25138             }
25139             }
25140             else {
25141 211 100       705 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
  2525         4603  
25142 181         351 $complex_item_count++;
25143 181         309 $weighted_length *= 2;
25144             }
25145 211 100       518 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
  2525         4164  
25146 24         85 $weighted_length += 4;
25147             }
25148             }
25149              
25150             # add weight for extra tokens.
25151 938         1638 $weighted_length += 2 * ( $ie - $ib );
25152              
25153             ## my $BUB = join '', @tokens_to_go[$ib..$ie];
25154             ## print "# COMPLEXITY:$weighted_length $BUB\n";
25155              
25156             ##push @item_complexity, $weighted_length;
25157              
25158             # now mark a ragged break after this item it if it is 'long and
25159             # complex':
25160 938 100 100     2996 if ( $weighted_length >= $definitely_complex ) {
    100 66        
25161              
25162             # if we broke after the previous term
25163             # then break before it too
25164 239 100 100     1168 if ( $i_last_break == $i - 1
      100        
25165             && $i > 1
25166             && $i_last_last_break != $i - 2 )
25167             {
25168              
25169             ## TODO: don't strand a small term
25170 21         41 pop @i_ragged_break_list;
25171 21         43 push @i_ragged_break_list, $i - 2;
25172 21         41 push @i_ragged_break_list, $i - 1;
25173             }
25174              
25175 239         487 push @i_ragged_break_list, $i;
25176 239         384 $i_last_last_break = $i_last_break;
25177 239         498 $i_last_break = $i;
25178             }
25179              
25180             # don't break before a small last term -- it will
25181             # not look good on a line by itself.
25182             elsif ($i == $i_max
25183             && $i_last_break == $i - 1
25184             && $weighted_length <= $definitely_simple )
25185             {
25186 11         36 pop @i_ragged_break_list;
25187             }
25188             }
25189              
25190 132         420 my $identifier_count = $i_max + 1 - $quote_count;
25191              
25192             # Need more tuning here..
25193 132 100 100     945 if ( $max_width > 12
      66        
25194             && $complex_item_count > $item_count / 2
25195             && $number_of_fields_best != 2 )
25196             {
25197 49         124 $number_of_fields_best = 1;
25198             }
25199              
25200 132         624 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
25201             } ## end sub study_list_complexity
25202              
25203             sub get_maximum_fields_wanted {
25204              
25205             # Not all tables look good with more than one field of items.
25206             # This routine looks at a table and decides if it should be
25207             # formatted with just one field or not.
25208             # This coding is still under development.
25209 5     5 0 13 my ($ritem_lengths) = @_;
25210              
25211 5         11 my $number_of_fields_best = 0;
25212              
25213             # For just a few items, we tentatively assume just 1 field.
25214 5         13 my $item_count = @{$ritem_lengths};
  5         13  
25215 5 100       33 if ( $item_count <= 5 ) {
25216 2         8 $number_of_fields_best = 1;
25217             }
25218              
25219             # For larger tables, look at it both ways and see what looks best
25220             else {
25221              
25222 3         8 my $is_odd = 1;
25223 3         7 my @max_length = ( 0, 0 );
25224 3         9 my @last_length_2 = ( undef, undef );
25225 3         9 my @first_length_2 = ( undef, undef );
25226 3         6 my $last_length = undef;
25227 3         6 my $total_variation_1 = 0;
25228 3         5 my $total_variation_2 = 0;
25229 3         10 my @total_variation_2 = ( 0, 0 );
25230              
25231 3         15 foreach my $j ( 0 .. $item_count - 1 ) {
25232              
25233 24         39 $is_odd = 1 - $is_odd;
25234 24         40 my $length = $ritem_lengths->[$j];
25235 24 100       45 if ( $length > $max_length[$is_odd] ) {
25236 9         16 $max_length[$is_odd] = $length;
25237             }
25238              
25239 24 100       43 if ( defined($last_length) ) {
25240 21         32 my $dl = abs( $length - $last_length );
25241 21         31 $total_variation_1 += $dl;
25242             }
25243 24         30 $last_length = $length;
25244              
25245 24         34 my $ll = $last_length_2[$is_odd];
25246 24 100       38 if ( defined($ll) ) {
25247 18         33 my $dl = abs( $length - $ll );
25248 18         25 $total_variation_2[$is_odd] += $dl;
25249             }
25250             else {
25251 6         11 $first_length_2[$is_odd] = $length;
25252             }
25253 24         47 $last_length_2[$is_odd] = $length;
25254             }
25255 3         8 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
25256              
25257 3 50       9 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
    50          
25258 3 50       28 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
25259 0         0 $number_of_fields_best = 1;
25260             }
25261             }
25262 5         17 return ($number_of_fields_best);
25263             } ## end sub get_maximum_fields_wanted
25264              
25265             sub table_columns_available {
25266 312     312 0 624 my $i_first_comma = shift;
25267 312         1150 my $columns =
25268             $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
25269             leading_spaces_to_go($i_first_comma);
25270              
25271             # Patch: the vertical formatter does not line up lines whose lengths
25272             # exactly equal the available line length because of allowances
25273             # that must be made for side comments. Therefore, the number of
25274             # available columns is reduced by 1 character.
25275 312         644 $columns -= 1;
25276 312         666 return $columns;
25277             } ## end sub table_columns_available
25278              
25279             sub maximum_number_of_fields {
25280              
25281             # how many fields will fit in the available space?
25282 137     137 0 408 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
25283 137         439 my $max_pairs = int( $columns / $pair_width );
25284 137         329 my $number_of_fields = $max_pairs * 2;
25285 137 100 100     551 if ( $odd_or_even == 1
25286             && $max_pairs * $pair_width + $max_width <= $columns )
25287             {
25288 7         19 $number_of_fields++;
25289             }
25290 137         341 return $number_of_fields;
25291             } ## end sub maximum_number_of_fields
25292              
25293             sub compactify_table {
25294              
25295             # given a table with a certain number of fields and a certain number
25296             # of lines, see if reducing the number of fields will make it look
25297             # better.
25298 125     125 0 380 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
25299 125 100 66     669 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
25300              
25301 43         112 my $min_fields = $number_of_fields;
25302              
25303 43   66     235 while ($min_fields >= $odd_or_even
25304             && $min_fields * $formatted_lines >= $item_count )
25305             {
25306 53         92 $number_of_fields = $min_fields;
25307 53         207 $min_fields -= $odd_or_even;
25308             }
25309             }
25310 125         318 return $number_of_fields;
25311             } ## end sub compactify_table
25312              
25313             sub set_ragged_breakpoints {
25314              
25315             # Set breakpoints in a list that cannot be formatted nicely as a
25316             # table.
25317 38     38 0 126 my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
25318              
25319 38         139 my $break_count = 0;
25320 38         87 foreach ( @{$ri_ragged_break_list} ) {
  38         132  
25321 70         136 my $j = $ri_term_comma->[$_];
25322 70 100       204 if ($j) {
25323 38         118 $self->set_forced_breakpoint($j);
25324 38         82 $break_count++;
25325             }
25326             }
25327 38         113 return $break_count;
25328             } ## end sub set_ragged_breakpoints
25329              
25330             sub copy_old_breakpoints {
25331 94     94 0 302 my ( $self, $i_first_comma, $i_last_comma ) = @_;
25332              
25333             # We are formatting a list and have decided to make comma breaks
25334             # the same as in the input file.
25335 94         275 for my $i ( $i_first_comma .. $i_last_comma ) {
25336 1177 100       2077 if ( $old_breakpoint_to_go[$i] ) {
25337              
25338             # If the comma style is under certain controls, and if this is a
25339             # comma breakpoint with the comma is at the beginning of the next
25340             # line, then we must pass that index instead. This will allow sub
25341             # set_forced_breakpoints to check and follow the user settings. This
25342             # produces a uniform style and can prevent instability (b1422).
25343             #
25344             # The flag '$controlled_comma_style' will be set if the user
25345             # entered any of -wbb=',' -wba=',' -kbb=',' -kba=','. It is not
25346             # needed or set for the -boc flag.
25347 121         207 my $ibreak = $i;
25348 121 50 33     429 if ( $types_to_go[$ibreak] ne ',' && $controlled_comma_style ) {
25349 0         0 my $index = $inext_to_go[$ibreak];
25350 0 0 0     0 if ( $index > $ibreak && $types_to_go[$index] eq ',' ) {
25351 0         0 $ibreak = $index;
25352             }
25353             }
25354 121         316 $self->set_forced_breakpoint($ibreak);
25355             }
25356             }
25357 94         221 return;
25358             } ## end sub copy_old_breakpoints
25359              
25360             sub set_nobreaks {
25361 351     351 0 855 my ( $self, $i, $j ) = @_;
25362 351 50 33     2791 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
      33        
25363              
25364 351         587 0 && do {
25365             my ( $a, $b, $c ) = caller();
25366             print STDOUT
25367             "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
25368             };
25369              
25370 351         2049 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
25371             }
25372              
25373             # shouldn't happen; non-critical error
25374             else {
25375 0         0 if (DEVEL_MODE) {
25376             my ( $a, $b, $c ) = caller();
25377             Fault(<<EOM);
25378             NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go
25379             EOM
25380             }
25381             }
25382 351         863 return;
25383             } ## end sub set_nobreaks
25384              
25385             ###############################################
25386             # CODE SECTION 12: Code for setting indentation
25387             ###############################################
25388              
25389             sub token_sequence_length {
25390              
25391             # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
25392 3938     3938 0 7334 my ( $ibeg, $iend ) = @_;
25393              
25394             # fix possible negative starting index
25395 3938 50       7975 if ( $ibeg < 0 ) { $ibeg = 0 }
  0         0  
25396              
25397             # returns 0 if index range is empty (some subs assume this)
25398 3938 100       7623 if ( $ibeg > $iend ) {
25399 74         202 return 0;
25400             }
25401              
25402 3864         8777 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
25403             } ## end sub token_sequence_length
25404              
25405             sub total_line_length {
25406              
25407             # return length of a line of tokens ($ibeg .. $iend)
25408 1855     1855 0 3760 my ( $ibeg, $iend ) = @_;
25409              
25410             # get the leading spaces on this line ...
25411 1855         3067 my $spaces = $leading_spaces_to_go[$ibeg];
25412 1855 100       4012 if ( ref($spaces) ) { $spaces = $spaces->get_spaces() }
  603         1172  
25413              
25414             # ... then add the net token length
25415 1855         4627 return $spaces + $summed_lengths_to_go[ $iend + 1 ] -
25416             $summed_lengths_to_go[$ibeg];
25417              
25418             } ## end sub total_line_length
25419              
25420             sub excess_line_length {
25421              
25422             # return number of characters by which a line of tokens ($ibeg..$iend)
25423             # exceeds the allowable line length.
25424             # NOTE: profiling shows that efficiency of this routine is essential.
25425              
25426 11559     11559 0 23320 my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
25427              
25428             # Start with the leading spaces on this line ...
25429 11559         17457 my $excess = $leading_spaces_to_go[$ibeg];
25430 11559 100       22499 if ( ref($excess) ) { $excess = $excess->get_spaces() }
  871         2119  
25431              
25432             # ... and include right weld lengths unless requested not to
25433 11559 100 100     23259 if ( $total_weld_count
      100        
25434             && $type_sequence_to_go[$iend]
25435             && !$ignore_right_weld )
25436             {
25437 231         583 my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
25438 231 100       595 $excess += $wr if defined($wr);
25439             }
25440              
25441             # ... then add the net token length, minus the maximum length
25442 11559         30428 return $excess +
25443             $summed_lengths_to_go[ $iend + 1 ] -
25444             $summed_lengths_to_go[$ibeg] -
25445             $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
25446              
25447             } ## end sub excess_line_length
25448              
25449             sub get_spaces {
25450              
25451             # return the number of leading spaces associated with an indentation
25452             # variable $indentation is either a constant number of spaces or an object
25453             # with a get_spaces method.
25454 1953     1953 0 3001 my $indentation = shift;
25455 1953 100       5654 return ref($indentation) ? $indentation->get_spaces() : $indentation;
25456             } ## end sub get_spaces
25457              
25458             sub get_recoverable_spaces {
25459              
25460             # return the number of spaces (+ means shift right, - means shift left)
25461             # that we would like to shift a group of lines with the same indentation
25462             # to get them to line up with their opening parens
25463 38     38 0 89 my $indentation = shift;
25464 38 50       176 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
25465             } ## end sub get_recoverable_spaces
25466              
25467             sub get_available_spaces_to_go {
25468              
25469 16     16 0 42 my ( $self, $ii ) = @_;
25470 16         39 my $item = $leading_spaces_to_go[$ii];
25471              
25472             # return the number of available leading spaces associated with an
25473             # indentation variable. $indentation is either a constant number of
25474             # spaces or an object with a get_available_spaces method.
25475 16 50       81 return ref($item) ? $item->get_available_spaces() : 0;
25476             } ## end sub get_available_spaces_to_go
25477              
25478             { ## begin closure set_lp_indentation
25479              
25480 38     38   372 use constant DEBUG_LP => 0;
  38         111  
  38         4530  
25481              
25482             # Stack of -lp index objects which survives between batches.
25483             my $rLP;
25484             my $max_lp_stack;
25485              
25486             # The predicted position of the next opening container which may start
25487             # an -lp indentation level. This survives between batches.
25488             my $lp_position_predictor;
25489              
25490 0         0 BEGIN {
25491              
25492             # Index names for the -lp stack variables.
25493             # Do not combine with other BEGIN blocks (c101).
25494              
25495 38     38   11207 my $i = 0;
25496             use constant {
25497 38         3314 _lp_ci_level_ => $i++,
25498             _lp_level_ => $i++,
25499             _lp_object_ => $i++,
25500             _lp_container_seqno_ => $i++,
25501             _lp_space_count_ => $i++,
25502 38     38   376 };
  38         113  
25503             } ## end BEGIN
25504              
25505             sub initialize_lp_vars {
25506              
25507             # initialize gnu variables for a new file;
25508             # must be called once at the start of a new file.
25509              
25510 555     555 0 1353 $lp_position_predictor = 0;
25511 555         1331 $max_lp_stack = 0;
25512              
25513             # we can turn off -lp if all levels will be at or above the cutoff
25514 555 100       2100 if ( $high_stress_level <= 1 ) {
25515 6         12 $rOpts_line_up_parentheses = 0;
25516 6         16 $rOpts_extended_line_up_parentheses = 0;
25517             }
25518              
25519 555         2374 $rLP = [];
25520              
25521             # initialize the leading whitespace stack to negative levels
25522             # so that we can never run off the end of the stack
25523 555         1885 $rLP->[$max_lp_stack]->[_lp_ci_level_] = -1;
25524 555         1555 $rLP->[$max_lp_stack]->[_lp_level_] = -1;
25525 555         1456 $rLP->[$max_lp_stack]->[_lp_object_] = undef;
25526 555         1615 $rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
25527 555         1452 $rLP->[$max_lp_stack]->[_lp_space_count_] = 0;
25528              
25529 555         1129 return;
25530             } ## end sub initialize_lp_vars
25531              
25532             # hashes for efficient testing
25533             my %hash_test1;
25534             my %hash_test2;
25535             my %hash_test3;
25536              
25537             BEGIN {
25538 38     38   277 my @q = qw< } ) ] >;
25539 38         268 @hash_test1{@q} = (1) x scalar(@q);
25540 38         166 @q = qw(: ? f);
25541 38         121 push @q, ',';
25542 38         248 @hash_test2{@q} = (1) x scalar(@q);
25543 38         158 @q = qw( . || && );
25544 38         264496 @hash_test3{@q} = (1) x scalar(@q);
25545             } ## end BEGIN
25546              
25547             # shared variables, re-initialized for each batch
25548             my $rlp_object_list;
25549             my $max_lp_object_list;
25550             my %lp_comma_count;
25551             my %lp_arrow_count;
25552             my $space_count;
25553             my $current_level;
25554             my $current_ci_level;
25555             my $ii_begin_line;
25556             my $in_lp_mode;
25557             my $stack_changed;
25558             my $K_last_nonblank;
25559             my $last_nonblank_token;
25560             my $last_nonblank_type;
25561             my $last_last_nonblank_type;
25562              
25563             sub set_lp_indentation {
25564              
25565 302     302 0 564 my ($self) = @_;
25566              
25567             #------------------------------------------------------------------
25568             # Define the leading whitespace for all tokens in the current batch
25569             # when the -lp formatting is selected.
25570             #------------------------------------------------------------------
25571              
25572             # Returns number of tokens in this batch which have leading spaces
25573             # defined by an lp object:
25574 302         547 my $lp_object_count_this_batch = 0;
25575              
25576             # Safety check, should not be needed:
25577 302 50 33     1810 if ( !$rOpts_line_up_parentheses
      33        
25578             || !defined($max_index_to_go)
25579             || $max_index_to_go < 0 )
25580             {
25581 0         0 return $lp_object_count_this_batch;
25582             }
25583              
25584             # List of -lp indentation objects created in this batch
25585 302         798 $rlp_object_list = [];
25586 302         534 $max_lp_object_list = -1;
25587              
25588 302         696 %lp_comma_count = ();
25589 302         530 %lp_arrow_count = ();
25590 302         475 $space_count = undef;
25591 302         441 $current_level = undef;
25592 302         444 $current_ci_level = undef;
25593 302         478 $ii_begin_line = 0;
25594 302         476 $in_lp_mode = 0;
25595 302         468 $stack_changed = 1;
25596 302         444 $K_last_nonblank = undef;
25597 302         488 $last_nonblank_token = EMPTY_STRING;
25598 302         471 $last_nonblank_type = EMPTY_STRING;
25599 302         531 $last_last_nonblank_type = EMPTY_STRING;
25600              
25601 302         499 my %last_lp_equals = ();
25602              
25603 302         539 my $rLL = $self->[_rLL_];
25604 302         570 my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_];
25605              
25606 302         480 my $imin = 0;
25607              
25608             # The 'starting_in_quote' flag means that the first token is the first
25609             # token of a line and it is also the continuation of some kind of
25610             # multi-line quote or pattern. It must have no added leading
25611             # whitespace, so we can skip it.
25612 302 100       711 if ($starting_in_quote) {
25613 2         20 $imin += 1;
25614             }
25615              
25616 302         590 my $Kpnb = $K_to_go[0] - 1;
25617 302 100 100     1564 if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
25618 210         424 $Kpnb -= 1;
25619             }
25620 302 100 66     1238 if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
25621 272         440 $K_last_nonblank = $Kpnb;
25622             }
25623              
25624 302 100       713 if ( defined($K_last_nonblank) ) {
25625 272         533 $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
25626 272         520 $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
25627             }
25628              
25629             #-----------------------------------
25630             # Loop over all tokens in this batch
25631             #-----------------------------------
25632 302         810 foreach my $ii ( $imin .. $max_index_to_go ) {
25633              
25634 5767         8841 my $type = $types_to_go[$ii];
25635 5767         8322 my $token = $tokens_to_go[$ii];
25636 5767         7996 my $level = $levels_to_go[$ii];
25637 5767         7613 my $ci_level = $ci_levels_to_go[$ii];
25638 5767         7907 my $total_depth = $nesting_depth_to_go[$ii];
25639              
25640             # get the top state from the stack if it has changed
25641 5767 100       10218 if ($stack_changed) {
25642 1757         2555 my $rLP_top = $rLP->[$max_lp_stack];
25643 1757         2451 my $lp_object = $rLP_top->[_lp_object_];
25644 1757 100       3091 if ($lp_object) {
25645             ( $space_count, $current_level, $current_ci_level ) =
25646 808         1306 @{ $lp_object->get_spaces_level_ci() };
  808         2170  
25647             }
25648             else {
25649 949         1323 $current_ci_level = $rLP_top->[_lp_ci_level_];
25650 949         1417 $current_level = $rLP_top->[_lp_level_];
25651 949         1348 $space_count = $rLP_top->[_lp_space_count_];
25652             }
25653 1757         3216 $stack_changed = 0;
25654             }
25655              
25656             #------------------------------------------------------------
25657             # Break at a previous '=' if necessary to control line length
25658             #------------------------------------------------------------
25659 5767 100 66     16979 if ( $type eq '{' || $type eq '(' ) {
25660 335         931 $lp_comma_count{ $total_depth + 1 } = 0;
25661 335         697 $lp_arrow_count{ $total_depth + 1 } = 0;
25662              
25663             # If we come to an opening token after an '=' token of some
25664             # type, see if it would be helpful to 'break' after the '=' to
25665             # save space
25666 335         650 my $ii_last_equals = $last_lp_equals{$total_depth};
25667 335 100       769 if ($ii_last_equals) {
25668 141         480 $self->lp_equals_break_check( $ii, $ii_last_equals );
25669             }
25670             }
25671              
25672             #------------------------
25673             # Handle decreasing depth
25674             #------------------------
25675             # Note that one token may have both decreasing and then increasing
25676             # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
25677             # in this example we would first go back to (1,0) then up to (2,0)
25678             # in a single call.
25679 5767 100 100     15813 if ( $level < $current_level || $ci_level < $current_ci_level ) {
25680 935         2411 $self->lp_decreasing_depth($ii);
25681             }
25682              
25683             #------------------------
25684             # handle increasing depth
25685             #------------------------
25686 5767 100 100     16600 if ( $level > $current_level || $ci_level > $current_ci_level ) {
25687 1485         3323 $self->lp_increasing_depth($ii);
25688             }
25689              
25690             #------------------
25691             # Handle all tokens
25692             #------------------
25693 5767 100       10593 if ( $type ne 'b' ) {
25694              
25695             # Count commas and look for non-list characters. Once we see a
25696             # non-list character, we give up and don't look for any more
25697             # commas.
25698 3772 100       10242 if ( $type eq '=>' ) {
    100          
    100          
25699 227         461 $lp_arrow_count{$total_depth}++;
25700              
25701             # remember '=>' like '=' for estimating breaks (but see
25702             # above note for b1035)
25703 227         462 $last_lp_equals{$total_depth} = $ii;
25704             }
25705              
25706             elsif ( $type eq ',' ) {
25707 615         1178 $lp_comma_count{$total_depth}++;
25708             }
25709              
25710             elsif ( $is_assignment{$type} ) {
25711 85         330 $last_lp_equals{$total_depth} = $ii;
25712             }
25713              
25714             # this token might start a new line if ..
25715 3772 100 66     44154 if (
      66        
25716             $ii > $ii_begin_line
25717              
25718             && (
25719              
25720             # this is the first nonblank token of the line
25721             $ii == 1 && $types_to_go[0] eq 'b'
25722              
25723             # or previous character was one of these:
25724             # /^([\:\?\,f])$/
25725             || $hash_test2{$last_nonblank_type}
25726              
25727             # or previous character was opening and this is not
25728             # closing
25729             || ( $last_nonblank_type eq '{' && $type ne '}' )
25730             || ( $last_nonblank_type eq '(' and $type ne ')' )
25731              
25732             # or this token is one of these:
25733             # /^([\.]|\|\||\&\&)$/
25734             || $hash_test3{$type}
25735              
25736             # or this is a closing structure
25737             || ( $last_nonblank_type eq '}'
25738             && $last_nonblank_token eq $last_nonblank_type )
25739              
25740             # or previous token was keyword 'return'
25741             || (
25742             $last_nonblank_type eq 'k'
25743             && ( $last_nonblank_token eq 'return'
25744             && $type ne '{' )
25745             )
25746              
25747             # or starting a new line at certain keywords is fine
25748             || ( $type eq 'k'
25749             && $is_if_unless_and_or_last_next_redo_return{
25750             $token} )
25751              
25752             # or this is after an assignment after a closing
25753             # structure
25754             || (
25755             $is_assignment{$last_nonblank_type}
25756             && (
25757             # /^[\}\)\]]$/
25758             $hash_test1{$last_last_nonblank_type}
25759              
25760             # and it is significantly to the right
25761             || $lp_position_predictor > (
25762             $maximum_line_length_at_level[$level] -
25763             $rOpts_maximum_line_length / 2
25764             )
25765             )
25766             )
25767             )
25768             )
25769             {
25770 1057         2902 check_for_long_gnu_style_lines($ii);
25771 1057         1533 $ii_begin_line = $ii;
25772              
25773             # back up 1 token if we want to break before that type
25774             # otherwise, we may strand tokens like '?' or ':' on a line
25775 1057 50       2104 if ( $ii_begin_line > 0 ) {
25776             my $wbb =
25777             $last_nonblank_type eq 'k'
25778             ? $want_break_before{$last_nonblank_token}
25779 1057 100       2402 : $want_break_before{$last_nonblank_type};
25780 1057 100       2083 $ii_begin_line-- if ($wbb);
25781             }
25782             }
25783              
25784 3772         6190 $K_last_nonblank = $K_to_go[$ii];
25785 3772         5429 $last_last_nonblank_type = $last_nonblank_type;
25786 3772         5092 $last_nonblank_type = $type;
25787 3772         5348 $last_nonblank_token = $token;
25788              
25789             } ## end if ( $type ne 'b' )
25790              
25791             # remember the predicted position of this token on the output line
25792 5767 100       9543 if ( $ii > $ii_begin_line ) {
25793              
25794             ## NOTE: this is a critical loop - the following call has been
25795             ## expanded for about 2x speedup:
25796             ## $lp_position_predictor =
25797             ## total_line_length( $ii_begin_line, $ii );
25798              
25799 4414         6193 my $indentation = $leading_spaces_to_go[$ii_begin_line];
25800 4414 100       8386 if ( ref($indentation) ) {
25801 2746         6539 $indentation = $indentation->get_spaces();
25802             }
25803             $lp_position_predictor =
25804 4414         8029 $indentation +
25805             $summed_lengths_to_go[ $ii + 1 ] -
25806             $summed_lengths_to_go[$ii_begin_line];
25807             }
25808             else {
25809 1353         2044 $lp_position_predictor =
25810             $space_count + $token_lengths_to_go[$ii];
25811             }
25812              
25813             # Store the indentation object for this token.
25814             # This allows us to manipulate the leading whitespace
25815             # (in case we have to reduce indentation to fit a line) without
25816             # having to change any token values.
25817              
25818             #---------------------------------------------------------------
25819             # replace leading whitespace with indentation objects where used
25820             #---------------------------------------------------------------
25821 5767 100       11730 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
25822 3398         4537 $lp_object_count_this_batch++;
25823 3398         4596 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
25824 3398         4745 $leading_spaces_to_go[$ii] = $lp_object;
25825 3398 100 66     12325 if ( $max_lp_stack > 0
      100        
25826             && $ci_level
25827             && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
25828             {
25829 1379         2918 $reduced_spaces_to_go[$ii] =
25830             $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
25831             }
25832             else {
25833 2019         3669 $reduced_spaces_to_go[$ii] = $lp_object;
25834             }
25835             }
25836             } ## end loop over all tokens in this batch
25837              
25838             undo_incomplete_lp_indentation()
25839 302 100       1117 if ( !$rOpts_extended_line_up_parentheses );
25840              
25841 302         1165 return $lp_object_count_this_batch;
25842             } ## end sub set_lp_indentation
25843              
25844             sub lp_equals_break_check {
25845              
25846 141     141 0 363 my ( $self, $ii, $ii_last_equals ) = @_;
25847              
25848             # If we come to an opening token after an '=' token of some
25849             # type, see if it would be helpful to 'break' after the '=' to
25850             # save space.
25851              
25852             # Given:
25853             # $ii = index of an opening token in the output batch
25854             # $ii_begin_line = index of token starting next output line
25855             # Update:
25856             # $lp_position_predictor - updated position predictor
25857             # $ii_begin_line = updated starting token index
25858              
25859             # Skip an empty set of parens, such as after channel():
25860             # my $exchange = $self->_channel()->exchange(
25861             # This fixes issues b1318 b1322 b1323 b1328
25862 141         244 my $is_empty_container;
25863 141 100 66     625 if ( $ii_last_equals && $ii < $max_index_to_go ) {
25864 132         326 my $seqno = $type_sequence_to_go[$ii];
25865 132         264 my $inext_nb = $ii + 1;
25866 132 100       391 $inext_nb++
25867             if ( $types_to_go[$inext_nb] eq 'b' );
25868 132         229 my $seqno_nb = $type_sequence_to_go[$inext_nb];
25869 132   100     671 $is_empty_container = $seqno && $seqno_nb && $seqno_nb == $seqno;
25870             }
25871              
25872 141 100 66     789 if ( $ii_last_equals
      66        
25873             && $ii_last_equals > $ii_begin_line
25874             && !$is_empty_container )
25875             {
25876              
25877 104         243 my $seqno = $type_sequence_to_go[$ii];
25878              
25879             # find the position if we break at the '='
25880 104         188 my $i_test = $ii_last_equals;
25881              
25882             # Fix for issue b1229, check if want break before this token
25883             # Fix for issue b1356, if i_test is a blank, the leading spaces may
25884             # be incorrect (if it was an interline blank).
25885             # Fix for issue b1357 .. b1370, i_test must be prev nonblank
25886             # ( the ci value for blanks can vary )
25887             # See also case b223
25888             # Fix for issue b1371-b1374 : all of these and the above are fixed
25889             # by simply backing up one index and setting the leading spaces of
25890             # a blank equal to that of the equals.
25891 104 50       524 if ( $want_break_before{ $types_to_go[$i_test] } ) {
    50          
25892 0         0 $i_test -= 1;
25893 0 0       0 $leading_spaces_to_go[$i_test] =
25894             $leading_spaces_to_go[$ii_last_equals]
25895             if ( $types_to_go[$i_test] eq 'b' );
25896             }
25897 104         192 elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
25898              
25899 104         340 my $test_position = total_line_length( $i_test, $ii );
25900 104         250 my $mll = $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
25901              
25902             #------------------------------------------------------
25903             # Break if structure will reach the maximum line length
25904             #------------------------------------------------------
25905              
25906             # Historically, -lp just used one-half line length here
25907 104         240 my $len_increase = $rOpts_maximum_line_length / 2;
25908              
25909             # For -xlp, we can also use the pre-computed lengths
25910 104         240 my $min_len = $self->[_rcollapsed_length_by_seqno_]->{$seqno};
25911 104 100 100     392 if ( $min_len && $min_len > $len_increase ) {
25912 2         6 $len_increase = $min_len;
25913             }
25914              
25915 104 100 66     1418 if (
      100        
      100        
      33        
      66        
      66        
25916              
25917             # if we might exceed the maximum line length
25918             $lp_position_predictor + $len_increase > $mll
25919              
25920             # if a -bbx flag WANTS a break before this opening token
25921             || ( $seqno
25922             && $self->[_rbreak_before_container_by_seqno_]->{$seqno} )
25923              
25924             # or we are beyond the 1/4 point and there was an old
25925             # break at an assignment (not '=>') [fix for b1035]
25926             || (
25927             $lp_position_predictor >
25928             $mll - $rOpts_maximum_line_length * 3 / 4
25929             && $types_to_go[$ii_last_equals] ne '=>'
25930             && (
25931             $old_breakpoint_to_go[$ii_last_equals]
25932             || ( $ii_last_equals > 0
25933             && $old_breakpoint_to_go[ $ii_last_equals - 1 ] )
25934             || ( $ii_last_equals > 1
25935             && $types_to_go[ $ii_last_equals - 1 ] eq 'b'
25936             && $old_breakpoint_to_go[ $ii_last_equals - 2 ] )
25937             )
25938             )
25939             )
25940             {
25941              
25942             # then make the switch -- note that we do not set a
25943             # real breakpoint here because we may not really need
25944             # one; sub break_lists will do that if necessary.
25945              
25946 16         47 my $Kc = $self->[_K_closing_container_]->{$seqno};
25947 16 100 66     107 if (
      100        
25948              
25949             # For -lp, only if the closing token is in this
25950             # batch (c117). Otherwise it cannot be done by sub
25951             # break_lists.
25952             defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
25953              
25954             # For -xlp, we only need one nonblank token after
25955             # the opening token.
25956             || $rOpts_extended_line_up_parentheses
25957             )
25958             {
25959 15         34 $ii_begin_line = $i_test + 1;
25960 15         28 $lp_position_predictor = $test_position;
25961              
25962             #--------------------------------------------------
25963             # Fix for an opening container terminating a batch:
25964             #--------------------------------------------------
25965             # To get alignment of a -lp container with its
25966             # contents, we have to put a break after $i_test.
25967             # For $ii<$max_index_to_go, this will be done by
25968             # sub break_lists based on the indentation object.
25969             # But for $ii=$max_index_to_go, the indentation
25970             # object for this seqno will not be created until
25971             # the next batch, so we have to set a break at
25972             # $i_test right now in order to get one.
25973 15 0 66     109 if ( $ii == $max_index_to_go
      33        
      33        
      0        
25974             && !$block_type_to_go[$ii]
25975             && $types_to_go[$ii] eq '{'
25976             && $seqno
25977             && !$self->[_ris_excluded_lp_container_]->{$seqno} )
25978             {
25979 0         0 $self->set_forced_lp_break( $ii_begin_line, $ii );
25980             }
25981             }
25982             }
25983             }
25984 141         307 return;
25985             } ## end sub lp_equals_break_check
25986              
25987             sub lp_decreasing_depth {
25988 935     935 0 1826 my ( $self, $ii ) = @_;
25989              
25990 935         1481 my $rLL = $self->[_rLL_];
25991              
25992 935         1408 my $level = $levels_to_go[$ii];
25993 935         1364 my $ci_level = $ci_levels_to_go[$ii];
25994              
25995             # loop to find the first entry at or completely below this level
25996 935         1351 while (1) {
25997              
25998             # Be sure we have not hit the stack bottom - should never
25999             # happen because only negative levels can get here, and
26000             # $level was forced to be positive above.
26001 1064 50       2278 if ( !$max_lp_stack ) {
26002              
26003             # non-fatal, just keep going except in DEVEL_MODE
26004 0         0 if (DEVEL_MODE) {
26005             Fault(<<EOM);
26006             program bug with -lp: stack_error. level=$level; ci_level=$ci_level; rerun with -nlp
26007             EOM
26008             }
26009 0         0 last;
26010             }
26011              
26012             # save index of token which closes this level
26013 1064 100       2278 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
26014 608         1050 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
26015              
26016 608         1875 $lp_object->set_closed($ii);
26017              
26018 608         1027 my $comma_count = 0;
26019 608         874 my $arrow_count = 0;
26020 608         1021 my $type = $types_to_go[$ii];
26021 608 100 66     1879 if ( $type eq '}' || $type eq ')' ) {
26022 340         581 my $total_depth = $nesting_depth_to_go[$ii];
26023 340         612 $comma_count = $lp_comma_count{$total_depth};
26024 340         526 $arrow_count = $lp_arrow_count{$total_depth};
26025 340 100       762 $comma_count = 0 unless $comma_count;
26026 340 100       733 $arrow_count = 0 unless $arrow_count;
26027             }
26028              
26029 608         1789 $lp_object->set_comma_count($comma_count);
26030 608         1570 $lp_object->set_arrow_count($arrow_count);
26031              
26032             # Undo any extra indentation if we saw no commas
26033 608         1564 my $available_spaces = $lp_object->get_available_spaces();
26034 608         1451 my $K_start = $lp_object->get_K_begin_line();
26035              
26036 608 100 100     2442 if ( $available_spaces > 0
      100        
      100        
26037             && $K_start >= $K_to_go[0]
26038             && ( $comma_count <= 0 || $arrow_count > 0 ) )
26039             {
26040              
26041 62         247 my $i = $lp_object->get_lp_item_index();
26042              
26043             # Safety check for a valid stack index. It
26044             # should be ok because we just checked that the
26045             # index K of the token associated with this
26046             # indentation is in this batch.
26047 62 50 33     301 if ( $i < 0 || $i > $max_lp_object_list ) {
26048 0         0 my $KK = $K_to_go[$ii];
26049 0         0 my $lno = $rLL->[$KK]->[_LINE_INDEX_];
26050 0         0 DEVEL_MODE && Fault(<<EOM);
26051             Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list
26052             EOM
26053 0         0 last;
26054             }
26055              
26056 62 100       192 if ( $arrow_count == 0 ) {
26057 36         141 $rlp_object_list->[$i]
26058             ->permanently_decrease_available_spaces(
26059             $available_spaces);
26060             }
26061             else {
26062 26         109 $rlp_object_list->[$i]
26063             ->tentatively_decrease_available_spaces(
26064             $available_spaces);
26065             }
26066 62         302 foreach my $j ( $i + 1 .. $max_lp_object_list ) {
26067 310         584 $rlp_object_list->[$j]
26068             ->decrease_SPACES($available_spaces);
26069             }
26070             }
26071             }
26072              
26073             # go down one level
26074 1064         1612 --$max_lp_stack;
26075              
26076 1064         1682 my $rLP_top = $rLP->[$max_lp_stack];
26077 1064         1599 my $ci_lev = $rLP_top->[_lp_ci_level_];
26078 1064         1944 my $lev = $rLP_top->[_lp_level_];
26079 1064         1519 my $spaces = $rLP_top->[_lp_space_count_];
26080 1064 100       2176 if ( $rLP_top->[_lp_object_] ) {
26081 498         800 my $lp_obj = $rLP_top->[_lp_object_];
26082             ( $spaces, $lev, $ci_lev ) =
26083 498         728 @{ $lp_obj->get_spaces_level_ci() };
  498         1156  
26084             }
26085              
26086             # stop when we reach a level at or below the current
26087             # level
26088 1064 100 66     3751 if ( $lev <= $level && $ci_lev <= $ci_level ) {
26089 935         1429 $space_count = $spaces;
26090 935         1502 $current_level = $lev;
26091 935         1329 $current_ci_level = $ci_lev;
26092 935         1678 last;
26093             }
26094             }
26095 935         1688 return;
26096             } ## end sub lp_decreasing_depth
26097              
26098             sub lp_increasing_depth {
26099 1485     1485 0 2570 my ( $self, $ii ) = @_;
26100              
26101 1485         2343 my $rLL = $self->[_rLL_];
26102              
26103 1485         2347 my $type = $types_to_go[$ii];
26104 1485         2240 my $level = $levels_to_go[$ii];
26105 1485         2067 my $ci_level = $ci_levels_to_go[$ii];
26106              
26107 1485         2025 $stack_changed = 1;
26108              
26109             # Compute the standard incremental whitespace. This will be
26110             # the minimum incremental whitespace that will be used. This
26111             # choice results in a smooth transition between the gnu-style
26112             # and the standard style.
26113 1485         2582 my $standard_increment =
26114             ( $level - $current_level ) * $rOpts_indent_columns +
26115             ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
26116              
26117             # Now we have to define how much extra incremental space
26118             # ("$available_space") we want. This extra space will be
26119             # reduced as necessary when long lines are encountered or when
26120             # it becomes clear that we do not have a good list.
26121 1485         1991 my $available_spaces = 0;
26122 1485         1978 my $align_seqno = 0;
26123 1485         3212 my $K_extra_space;
26124              
26125             my $last_nonblank_seqno;
26126 1485         0 my $last_nonblank_block_type;
26127 1485 100       2950 if ( defined($K_last_nonblank) ) {
26128 1455         3050 $last_nonblank_seqno = $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
26129             $last_nonblank_block_type =
26130             $last_nonblank_seqno
26131 1455 100       3139 ? $self->[_rblock_type_of_seqno_]->{$last_nonblank_seqno}
26132             : undef;
26133             }
26134              
26135 1485         2349 $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
26136              
26137             #-----------------------------------------------
26138             # Initialize indentation spaces on empty stack..
26139             #-----------------------------------------------
26140 1485 100 100     9078 if ( $max_lp_stack == 0 ) {
    100 66        
      100        
      100        
      66        
      66        
26141 31         81 $space_count = $level * $rOpts_indent_columns;
26142             }
26143              
26144             #----------------------------------------
26145             # Add the standard space increment if ...
26146             #----------------------------------------
26147             elsif (
26148              
26149             # if this is a BLOCK, add the standard increment
26150             $last_nonblank_block_type
26151              
26152             # or if this is not a sequenced item
26153             || !$last_nonblank_seqno
26154              
26155             # or this container is excluded by user rules
26156             # or contains here-docs or multiline qw text
26157             || defined($last_nonblank_seqno)
26158             && $self->[_ris_excluded_lp_container_]->{$last_nonblank_seqno}
26159              
26160             # or if last nonblank token was not structural indentation
26161             || $last_nonblank_type ne '{'
26162              
26163             # and do not start -lp under stress .. fixes b1244, b1255
26164             || !$in_lp_mode && $level >= $high_stress_level
26165              
26166             )
26167             {
26168              
26169             # If we have entered lp mode, use the top lp object to get
26170             # the current indentation spaces because it may have
26171             # changed. Fixes b1285, b1286.
26172 1189 100       2356 if ($in_lp_mode) {
26173 509         1419 $space_count = $in_lp_mode->get_spaces();
26174             }
26175 1189         1805 $space_count += $standard_increment;
26176             }
26177              
26178             #---------------------------------------------------------------
26179             # -lp mode: try to use space to the first non-blank level change
26180             #---------------------------------------------------------------
26181             else {
26182              
26183             # see how much space we have available
26184 265         472 my $test_space_count = $lp_position_predictor;
26185 265         412 my $excess = 0;
26186             my $min_len =
26187 265         469 $self->[_rcollapsed_length_by_seqno_]->{$last_nonblank_seqno};
26188 265         394 my $next_opening_too_far;
26189              
26190 265 100       592 if ( defined($min_len) ) {
26191 54         107 $excess =
26192             $test_space_count +
26193             $min_len -
26194             $maximum_line_length_at_level[$level];
26195 54 100       126 if ( $excess > 0 ) {
26196 3         9 $test_space_count -= $excess;
26197              
26198             # will the next opening token be a long way out?
26199 3         8 $next_opening_too_far =
26200             $lp_position_predictor + $excess >
26201             $maximum_line_length_at_level[$level];
26202             }
26203             }
26204              
26205 265         460 my $rLP_top = $rLP->[$max_lp_stack];
26206 265         412 my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
26207 265 100       605 if ( $rLP_top->[_lp_object_] ) {
26208 148         459 $min_gnu_indentation = $rLP_top->[_lp_object_]->get_spaces();
26209             }
26210 265         432 $available_spaces = $test_space_count - $min_gnu_indentation;
26211              
26212             # Do not startup -lp indentation mode if no space ...
26213             # ... or if it puts the opening far to the right
26214 265 50 33     996 if ( !$in_lp_mode
      66        
26215             && ( $available_spaces <= 0 || $next_opening_too_far ) )
26216             {
26217 0         0 $space_count += $standard_increment;
26218 0         0 $available_spaces = 0;
26219             }
26220              
26221             # Use -lp mode
26222             else {
26223 265         439 $space_count = $test_space_count;
26224              
26225 265         412 $in_lp_mode = 1;
26226 265 100       643 if ( $available_spaces >= $standard_increment ) {
    100          
    50          
26227 202         332 $min_gnu_indentation += $standard_increment;
26228             }
26229             elsif ( $available_spaces > 1 ) {
26230 41         95 $min_gnu_indentation += $available_spaces + 1;
26231              
26232             # The "+1" space can cause mis-alignment if there is no
26233             # blank space between the opening paren and the next
26234             # nonblank token (i.e., -pt=2) and the container does not
26235             # get broken open. So we will mark this token for later
26236             # space removal by sub 'xlp_tweak' if this container
26237             # remains intact (issue git #106).
26238 41 100 66     466 if (
      66        
      33        
      66        
26239             $type ne 'b'
26240              
26241             # Skip if the maximum line length is exceeded here
26242             && $excess <= 0
26243              
26244             # This is only for level changes, not ci level changes.
26245             # But note: this test is here out of caution but I have
26246             # not found a case where it is actually necessary.
26247             && $is_opening_token{$last_nonblank_token}
26248              
26249             # Be sure we are at consecutive nonblanks. This test
26250             # should be true, but it guards against future coding
26251             # changes to level values assigned to blank spaces.
26252             && $ii > 0
26253             && $types_to_go[ $ii - 1 ] ne 'b'
26254              
26255             )
26256             {
26257 8         24 $K_extra_space = $K_to_go[$ii];
26258             }
26259             }
26260             elsif ( $is_opening_token{$last_nonblank_token} ) {
26261 22 100       90 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
26262 13         32 $min_gnu_indentation += 2;
26263             }
26264             else {
26265 9         15 $min_gnu_indentation += 1;
26266             }
26267             }
26268             else {
26269 0         0 $min_gnu_indentation += $standard_increment;
26270             }
26271 265         402 $available_spaces = $space_count - $min_gnu_indentation;
26272              
26273 265 100       625 if ( $available_spaces < 0 ) {
26274 54         82 $space_count = $min_gnu_indentation;
26275 54         85 $available_spaces = 0;
26276             }
26277 265         566 $align_seqno = $last_nonblank_seqno;
26278             }
26279             }
26280              
26281             #-------------------------------------------
26282             # update the state, but not on a blank token
26283             #-------------------------------------------
26284 1485 100       3186 if ( $type ne 'b' ) {
26285              
26286 1122 100       2473 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
26287 498         1616 $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
26288 498         689 $in_lp_mode = 1;
26289             }
26290              
26291             #----------------------------------------
26292             # Create indentation object if in lp-mode
26293             #----------------------------------------
26294 1122         1636 ++$max_lp_stack;
26295 1122         1589 my $lp_object;
26296 1122 100       2169 if ($in_lp_mode) {
26297              
26298             # A negative level implies not to store the item in the
26299             # item_list
26300 608         1015 my $lp_item_index = 0;
26301 608 50       1296 if ( $level >= 0 ) {
26302 608         943 $lp_item_index = ++$max_lp_object_list;
26303             }
26304              
26305 608         933 my $K_begin_line = 0;
26306 608 50 33     2156 if ( $ii_begin_line >= 0
26307             && $ii_begin_line <= $max_index_to_go )
26308             {
26309 608         1061 $K_begin_line = $K_to_go[$ii_begin_line];
26310             }
26311              
26312             # Minor Fix: when creating indentation at a side
26313             # comment we don't know what the space to the actual
26314             # next code token will be. We will allow a space for
26315             # sub correct_lp to move it in if necessary.
26316 608 100 100     1657 if ( $type eq '#'
      66        
26317             && $max_index_to_go > 0
26318             && $align_seqno )
26319             {
26320 2         3 $available_spaces += 1;
26321             }
26322              
26323 608         966 my $standard_spaces = $leading_spaces_to_go[$ii];
26324 608         2294 $lp_object = Perl::Tidy::IndentationItem->new(
26325             spaces => $space_count,
26326             level => $level,
26327             ci_level => $ci_level,
26328             available_spaces => $available_spaces,
26329             lp_item_index => $lp_item_index,
26330             align_seqno => $align_seqno,
26331             stack_depth => $max_lp_stack,
26332             K_begin_line => $K_begin_line,
26333             standard_spaces => $standard_spaces,
26334             K_extra_space => $K_extra_space,
26335             );
26336              
26337 608         931 DEBUG_LP && do {
26338             my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
26339             my $token = $tokens_to_go[$ii];
26340             print STDERR <<EOM;
26341             DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
26342             EOM
26343             };
26344              
26345 608 50       1360 if ( $level >= 0 ) {
26346 608         1126 $rlp_object_list->[$max_lp_object_list] = $lp_object;
26347             }
26348              
26349 608 100 66     2220 if ( $is_opening_token{$last_nonblank_token}
26350             && $last_nonblank_seqno )
26351             {
26352 259         703 $self->[_rlp_object_by_seqno_]->{$last_nonblank_seqno} =
26353             $lp_object;
26354             }
26355             }
26356              
26357             #------------------------------------
26358             # Store this indentation on the stack
26359             #------------------------------------
26360 1122         1991 $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
26361 1122         1727 $rLP->[$max_lp_stack]->[_lp_level_] = $level;
26362 1122         1804 $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object;
26363 1122         1809 $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
26364             $last_nonblank_seqno;
26365 1122         1657 $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
26366              
26367             # If the opening paren is beyond the half-line length, then
26368             # we will use the minimum (standard) indentation. This will
26369             # help avoid problems associated with running out of space
26370             # near the end of a line. As a result, in deeply nested
26371             # lists, there will be some indentations which are limited
26372             # to this minimum standard indentation. But the most deeply
26373             # nested container will still probably be able to shift its
26374             # parameters to the right for proper alignment, so in most
26375             # cases this will not be noticeable.
26376 1122 100 66     3146 if ( $available_spaces > 0 && $lp_object ) {
26377 169         511 my $halfway =
26378             $maximum_line_length_at_level[$level] -
26379             $rOpts_maximum_line_length / 2;
26380 169 100       558 $lp_object->tentatively_decrease_available_spaces(
26381             $available_spaces)
26382             if ( $space_count > $halfway );
26383             }
26384             }
26385 1485         2673 return;
26386             } ## end sub lp_increasing_depth
26387              
26388             sub check_for_long_gnu_style_lines {
26389              
26390             # look at the current estimated maximum line length, and
26391             # remove some whitespace if it exceeds the desired maximum
26392 1057     1057 0 1854 my ($ii_to_go) = @_;
26393              
26394             # nothing can be done if no stack items defined for this line
26395 1057 100       2132 return if ( $max_lp_object_list < 0 );
26396              
26397             # See if we have exceeded the maximum desired line length ..
26398             # keep 2 extra free because they are needed in some cases
26399             # (result of trial-and-error testing)
26400 815         1131 my $tol = 2;
26401              
26402             # But reduce tol to 0 at a terminal comma; fixes b1432
26403 815 100 66     2385 if ( $tokens_to_go[$ii_to_go] eq ','
26404             && $ii_to_go < $max_index_to_go )
26405             {
26406 32         72 my $in = $ii_to_go + 1;
26407 32 50 33     160 if ( $types_to_go[$in] eq 'b' && $in < $max_index_to_go ) { $in++ }
  32         55  
26408 32 100       124 if ( $is_closing_token{ $tokens_to_go[$in] } ) {
26409 7         28 $tol = 0;
26410             }
26411             }
26412              
26413 815         1498 my $spaces_needed =
26414             $lp_position_predictor -
26415             $maximum_line_length_at_level[ $levels_to_go[$ii_to_go] ] +
26416             $tol;
26417              
26418 815 100       1825 return if ( $spaces_needed <= 0 );
26419              
26420             # We are over the limit, so try to remove a requested number of
26421             # spaces from leading whitespace. We are only allowed to remove
26422             # from whitespace items created on this batch, since others have
26423             # already been used and cannot be undone.
26424 2         6 my @candidates = ();
26425              
26426             # loop over all whitespace items created for the current batch
26427 2         7 foreach my $i ( 0 .. $max_lp_object_list ) {
26428 200         311 my $item = $rlp_object_list->[$i];
26429              
26430             # item must still be open to be a candidate (otherwise it
26431             # cannot influence the current token)
26432 200 100       326 next if ( $item->get_closed() >= 0 );
26433              
26434 13         29 my $available_spaces = $item->get_available_spaces();
26435              
26436 13 100       30 if ( $available_spaces > 0 ) {
26437 8         24 push( @candidates, [ $i, $available_spaces ] );
26438             }
26439             }
26440              
26441 2 50       15 return unless (@candidates);
26442              
26443             # sort by available whitespace so that we can remove whitespace
26444             # from the maximum available first.
26445             @candidates =
26446 2 50       19 sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @candidates;
  10         32  
26447              
26448             # keep removing whitespace until we are done or have no more
26449 2         6 foreach my $candidate (@candidates) {
26450 2         5 my ( $i, $available_spaces ) = @{$candidate};
  2         7  
26451 2 50       6 my $deleted_spaces =
26452             ( $available_spaces > $spaces_needed )
26453             ? $spaces_needed
26454             : $available_spaces;
26455              
26456             # remove the incremental space from this item
26457 2         10 $rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces);
26458              
26459 2         7 my $i_debug = $i;
26460              
26461             # update the leading whitespace of this item and all items
26462             # that came after it
26463 2         4 $i -= 1;
26464 2         9 while ( ++$i <= $max_lp_object_list ) {
26465              
26466 200         371 my $old_spaces = $rlp_object_list->[$i]->get_spaces();
26467 200 50       331 if ( $old_spaces >= $deleted_spaces ) {
26468 200         336 $rlp_object_list->[$i]->decrease_SPACES($deleted_spaces);
26469             }
26470              
26471             # shouldn't happen except for code bug:
26472             else {
26473             # non-fatal, keep going except in DEVEL_MODE
26474 0         0 if (DEVEL_MODE) {
26475             my $level = $rlp_object_list->[$i_debug]->get_level();
26476             my $ci_level =
26477             $rlp_object_list->[$i_debug]->get_ci_level();
26478             my $old_level = $rlp_object_list->[$i]->get_level();
26479             my $old_ci_level =
26480             $rlp_object_list->[$i]->get_ci_level();
26481             Fault(<<EOM);
26482             program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level
26483             EOM
26484             }
26485             }
26486             }
26487 2         10 $lp_position_predictor -= $deleted_spaces;
26488 2         4 $spaces_needed -= $deleted_spaces;
26489 2 50       12 last unless ( $spaces_needed > 0 );
26490             }
26491 2         9 return;
26492             } ## end sub check_for_long_gnu_style_lines
26493              
26494             sub undo_incomplete_lp_indentation {
26495              
26496             #------------------------------------------------------------------
26497             # Undo indentation for all incomplete -lp indentation levels of the
26498             # current batch unless -xlp is set.
26499             #------------------------------------------------------------------
26500              
26501             # This routine is called once after each output stream batch is
26502             # finished to undo indentation for all incomplete -lp indentation
26503             # levels. If this routine is called then comments and blank lines will
26504             # disrupt this indentation style. In older versions of perltidy this
26505             # was always done because it could cause problems otherwise, but recent
26506             # improvements allow fairly good results to be obtained by skipping
26507             # this step with the -xlp flag.
26508              
26509             # nothing to do if no stack items defined for this line
26510 229 100   229 0 596 return if ( $max_lp_object_list < 0 );
26511              
26512             # loop over all whitespace items created for the current batch
26513 83         278 foreach my $i ( 0 .. $max_lp_object_list ) {
26514 527         910 my $item = $rlp_object_list->[$i];
26515              
26516             # only look for open items
26517 527 100       1034 next if ( $item->get_closed() >= 0 );
26518              
26519             # Tentatively remove all of the available space
26520             # (The vertical aligner will try to get it back later)
26521 19         62 my $available_spaces = $item->get_available_spaces();
26522 19 100       67 if ( $available_spaces > 0 ) {
26523              
26524             # delete incremental space for this item
26525 9         54 $rlp_object_list->[$i]
26526             ->tentatively_decrease_available_spaces($available_spaces);
26527              
26528             # Reduce the total indentation space of any nodes that follow
26529             # Note that any such nodes must necessarily be dependents
26530             # of this node.
26531 9         34 foreach ( $i + 1 .. $max_lp_object_list ) {
26532 17         55 $rlp_object_list->[$_]->decrease_SPACES($available_spaces);
26533             }
26534             }
26535             }
26536 83         174 return;
26537             } ## end sub undo_incomplete_lp_indentation
26538             } ## end closure set_lp_indentation
26539              
26540             #----------------------------------------------------------------------
26541             # sub to set a requested break before an opening container in -lp mode.
26542             #----------------------------------------------------------------------
26543             sub set_forced_lp_break {
26544              
26545 109     109 0 301 my ( $self, $i_begin_line, $i_opening ) = @_;
26546              
26547             # Given:
26548             # $i_begin_line = index of break in the _to_go arrays
26549             # $i_opening = index of the opening container
26550              
26551             # Set any requested break at a token before this opening container
26552             # token. This is often an '=' or '=>' but can also be things like
26553             # '.', ',', 'return'. It was defined by sub set_lp_indentation.
26554              
26555             # Important:
26556             # For intact containers, call this at the closing token.
26557             # For broken containers, call this at the opening token.
26558             # This will avoid needless breaks when it turns out that the
26559             # container does not actually get broken. This isn't known until
26560             # the closing container for intact blocks.
26561              
26562             return
26563 109 50 33     503 if ( $i_begin_line < 0
26564             || $i_begin_line > $max_index_to_go );
26565              
26566             # Handle request to put a break break immediately before this token.
26567             # We may not want to do that since we are also breaking after it.
26568 109 100       302 if ( $i_begin_line == $i_opening ) {
26569              
26570             # The following rules should be reviewed. We may want to always
26571             # allow the break. If we do not do the break, the indentation
26572             # may be off.
26573              
26574             # RULE: don't break before it unless it is welded to a qw.
26575             # This works well, but we may want to relax this to allow
26576             # breaks in additional cases.
26577             return
26578 18 50       95 if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } );
26579 0 0       0 return unless ( $types_to_go[$max_index_to_go] eq 'q' );
26580             }
26581              
26582             # Only break for breakpoints at the same
26583             # indentation level as the opening paren
26584 91         213 my $test1 = $nesting_depth_to_go[$i_opening];
26585 91         184 my $test2 = $nesting_depth_to_go[$i_begin_line];
26586 91 100       235 return if ( $test2 != $test1 );
26587              
26588             # Back up at a blank (fixes case b932)
26589 90         153 my $ibr = $i_begin_line - 1;
26590 90 100 66     358 if ( $ibr > 0
26591             && $types_to_go[$ibr] eq 'b' )
26592             {
26593 44         72 $ibr--;
26594             }
26595 90 100       232 if ( $ibr >= 0 ) {
26596 44         101 my $i_nonblank = $self->set_forced_breakpoint($ibr);
26597              
26598             # Crude patch to prevent sub recombine_breakpoints from undoing
26599             # this break, especially after an '='. It will leave old
26600             # breakpoints alone. See c098/x045 for some examples.
26601 44 100       163 if ( defined($i_nonblank) ) {
26602 33         59 $old_breakpoint_to_go[$i_nonblank] = 1;
26603             }
26604             }
26605 90         191 return;
26606             } ## end sub set_forced_lp_break
26607              
26608             sub reduce_lp_indentation {
26609              
26610             # reduce the leading whitespace at token $i if possible by $spaces_needed
26611             # (a large value of $spaces_needed will remove all excess space)
26612             # NOTE: to be called from break_lists only for a sequence of tokens
26613             # contained between opening and closing parens/braces/brackets
26614              
26615 6     6 0 20 my ( $self, $i, $spaces_wanted ) = @_;
26616 6         12 my $deleted_spaces = 0;
26617              
26618 6         16 my $item = $leading_spaces_to_go[$i];
26619 6         18 my $available_spaces = $item->get_available_spaces();
26620              
26621 6 100 66     73 if (
      33        
26622             $available_spaces > 0
26623             && ( ( $spaces_wanted <= $available_spaces )
26624             || !$item->get_have_child() )
26625             )
26626             {
26627              
26628             # we'll remove these spaces, but mark them as recoverable
26629 5         28 $deleted_spaces =
26630             $item->tentatively_decrease_available_spaces($spaces_wanted);
26631             }
26632              
26633 6         16 return $deleted_spaces;
26634             } ## end sub reduce_lp_indentation
26635              
26636             ###########################################################
26637             # CODE SECTION 13: Preparing batches for vertical alignment
26638             ###########################################################
26639              
26640             sub check_convey_batch_input {
26641              
26642             # Check for valid input to sub convey_batch_to_vertical_aligner. An
26643             # error here would most likely be due to an error in the calling
26644             # routine 'sub grind_batch_of_CODE'.
26645 0     0 0 0 my ( $self, $ri_first, $ri_last ) = @_;
26646              
26647 0 0 0     0 if ( !defined($ri_first) || !defined($ri_last) ) {
26648 0         0 Fault(<<EOM);
26649             Undefined line ranges ri_first and/r ri_last
26650             EOM
26651             }
26652              
26653 0         0 my $nmax = @{$ri_first} - 1;
  0         0  
26654 0         0 my $nmax_check = @{$ri_last} - 1;
  0         0  
26655 0 0 0     0 if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) {
      0        
26656 0         0 Fault(<<EOM);
26657             Line range index error: nmax=$nmax but nmax_check=$nmax_check
26658             These should be equal and >=0
26659             EOM
26660             }
26661 0         0 my ( $ibeg, $iend );
26662 0         0 foreach my $n ( 0 .. $nmax ) {
26663 0         0 my $ibeg_m = $ibeg;
26664 0         0 my $iend_m = $iend;
26665 0         0 $ibeg = $ri_first->[$n];
26666 0         0 $iend = $ri_last->[$n];
26667 0 0 0     0 if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) {
      0        
26668 0         0 Fault(<<EOM);
26669             Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend
26670             These should have iend >= ibeg and be in the range (0..$max_index_to_go)
26671             EOM
26672             }
26673 0 0       0 next if ( $n == 0 );
26674 0 0       0 if ( $ibeg <= $iend_m ) {
26675 0         0 Fault(<<EOM);
26676             Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n
26677             EOM
26678             }
26679             }
26680 0         0 return;
26681             } ## end sub check_convey_batch_input
26682              
26683             sub convey_batch_to_vertical_aligner {
26684              
26685 4547     4547 0 8709 my ($self) = @_;
26686              
26687             # This routine receives a batch of code for which the final line breaks
26688             # have been defined. Here we prepare the lines for passing to the vertical
26689             # aligner. We do the following tasks:
26690             # - mark certain vertical alignment tokens, such as '=', in each line
26691             # - make final indentation adjustments
26692             # - do logical padding: insert extra blank spaces to help display certain
26693             # logical constructions
26694             # - send the line to the vertical aligner
26695              
26696 4547         8262 my $rLL = $self->[_rLL_];
26697 4547         7441 my $Klimit = $self->[_Klimit_];
26698 4547         7224 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
26699 4547         6852 my $this_batch = $self->[_this_batch_];
26700              
26701 4547         7406 my $do_not_pad = $this_batch->[_do_not_pad_];
26702 4547         7242 my $starting_in_quote = $this_batch->[_starting_in_quote_];
26703 4547         6933 my $ending_in_quote = $this_batch->[_ending_in_quote_];
26704 4547         7178 my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
26705 4547         7023 my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
26706 4547         6771 my $ri_first = $this_batch->[_ri_first_];
26707 4547         6746 my $ri_last = $this_batch->[_ri_last_];
26708              
26709 4547         6758 $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE);
26710              
26711 4547         6420 my $n_last_line = @{$ri_first} - 1;
  4547         8875  
26712              
26713 4547         7346 my $ibeg_next = $ri_first->[0];
26714 4547         7240 my $iend_next = $ri_last->[0];
26715              
26716 4547         7647 my $type_beg_next = $types_to_go[$ibeg_next];
26717 4547         7228 my $type_end_next = $types_to_go[$iend_next];
26718 4547         7482 my $token_beg_next = $tokens_to_go[$ibeg_next];
26719              
26720 4547         8880 my $rindentation_list = [0]; # ref to indentations for each line
26721 4547         7685 my ( $cscw_block_comment, $closing_side_comment, $is_block_comment );
26722              
26723 4547 100 100     14158 if ( !$max_index_to_go && $type_beg_next eq '#' ) {
26724 632         1181 $is_block_comment = 1;
26725             }
26726              
26727 4547 100       9326 if ($rOpts_closing_side_comments) {
26728 61         188 ( $closing_side_comment, $cscw_block_comment ) =
26729             $self->add_closing_side_comment( $ri_first, $ri_last );
26730             }
26731              
26732 4547 100 100     15966 if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation ) {
26733 828         4382 $self->undo_ci( $ri_first, $ri_last,
26734             $this_batch->[_rix_seqno_controlling_ci_] );
26735             }
26736              
26737             # for multi-line batches ...
26738 4547 100       10535 if ( $n_last_line > 0 ) {
26739              
26740             # flush before a long if statement to avoid unwanted alignment
26741             $self->flush_vertical_aligner()
26742             if ( $type_beg_next eq 'k'
26743 753 100 100     3310 && $is_if_unless{$token_beg_next} );
26744              
26745 753 100       4264 $self->set_logical_padding( $ri_first, $ri_last, $starting_in_quote )
26746             if ($rOpts_logical_padding);
26747              
26748 753 100       2308 $self->xlp_tweak( $ri_first, $ri_last )
26749             if ($rOpts_extended_line_up_parentheses);
26750             }
26751              
26752 4547         6391 if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
26753              
26754             # ----------------------------------------------------------
26755             # define the vertical alignments for all lines of this batch
26756             # ----------------------------------------------------------
26757 4547         6693 my $rline_alignments;
26758              
26759 4547 100       9138 if ( !$max_index_to_go ) {
26760              
26761             # Optional shortcut for single token ...
26762             # = [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
26763 1284         5993 $rline_alignments = [
26764             [
26765             [],
26766             [ $tokens_to_go[0] ],
26767             [ $types_to_go[0] ],
26768             [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ],
26769             ]
26770             ];
26771             }
26772             else {
26773 3263         10473 $rline_alignments =
26774             $self->make_vertical_alignments( $ri_first, $ri_last );
26775             }
26776              
26777             # ----------------------------------------------
26778             # loop to send each line to the vertical aligner
26779             # ----------------------------------------------
26780 4547         9196 my ( $type_beg, $type_end, $token_beg, $ljump );
26781              
26782 4547         10241 for my $n ( 0 .. $n_last_line ) {
26783              
26784             # ----------------------------------------------------------------
26785             # This hash will hold the args for vertical alignment of this line
26786             # We will populate it as we go.
26787             # ----------------------------------------------------------------
26788 7366         13188 my $rvao_args = {};
26789              
26790 7366         12274 my $type_beg_last = $type_beg;
26791 7366         10949 my $type_end_last = $type_end;
26792              
26793 7366         10648 my $ibeg = $ibeg_next;
26794 7366         10561 my $iend = $iend_next;
26795 7366         12397 my $Kbeg = $K_to_go[$ibeg];
26796 7366         11281 my $Kend = $K_to_go[$iend];
26797              
26798 7366         11083 $type_beg = $type_beg_next;
26799 7366         10510 $type_end = $type_end_next;
26800 7366         11310 $token_beg = $token_beg_next;
26801              
26802             # ---------------------------------------------------
26803             # Define the check value 'Kend' to send for this line
26804             # ---------------------------------------------------
26805             # The 'Kend' value is an integer for checking that lines come out of
26806             # the far end of the pipeline in the right order. It increases
26807             # linearly along the token stream. But we only send ending K values of
26808             # non-comments down the pipeline. This is equivalent to checking that
26809             # the last CODE_type is blank or equal to 'VER'. See also sub
26810             # resync_lines_and_tokens for related coding. Note that
26811             # '$batch_CODE_type' is the code type of the line to which the ending
26812             # token belongs.
26813 7366 100 100     20393 my $Kend_code =
26814             $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
26815              
26816             # Get some vars on line [n+1], if any,
26817             # and define $ljump = level jump needed by 'sub get_final_indentation'
26818 7366 100 100     25875 if ( $n < $n_last_line ) {
    100          
26819 2819         5716 $ibeg_next = $ri_first->[ $n + 1 ];
26820 2819         5156 $iend_next = $ri_last->[ $n + 1 ];
26821              
26822 2819         4785 $type_beg_next = $types_to_go[$ibeg_next];
26823 2819         4478 $type_end_next = $types_to_go[$iend_next];
26824 2819         4432 $token_beg_next = $tokens_to_go[$ibeg_next];
26825              
26826 2819         4477 my $Kbeg_next = $K_to_go[$ibeg_next];
26827 2819         9784 $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
26828             }
26829             elsif ( !$is_block_comment && $Kend < $Klimit ) {
26830              
26831             # Patch for git #51, a bare closing qw paren was not outdented
26832             # if the flag '-nodelete-old-newlines is set
26833             # Note that we are just looking ahead for the next nonblank
26834             # character. We could scan past an arbitrary number of block
26835             # comments or hanging side comments by calling K_next_code, but it
26836             # could add significant run time with very little to be gained.
26837 3376         5846 my $Kbeg_next = $Kend + 1;
26838 3376 100 100     16875 if ( $Kbeg_next < $Klimit
26839             && $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' )
26840             {
26841 2850         4517 $Kbeg_next += 1;
26842             }
26843             $ljump =
26844 3376         8072 $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
26845             }
26846             else {
26847 1171         2812 $ljump = 0;
26848             }
26849              
26850             # ---------------------------------------------
26851             # get the vertical alignment info for this line
26852             # ---------------------------------------------
26853              
26854             # The lines are broken into fields which can be spaced by the vertical
26855             # to achieve vertical alignment. These fields are the actual text
26856             # which will be output, so from here on no more changes can be made to
26857             # the text.
26858 7366         11835 my $rline_alignment = $rline_alignments->[$n];
26859             my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
26860 7366         10927 @{$rline_alignment};
  7366         16034  
26861              
26862             # Programming check: (shouldn't happen)
26863             # The number of tokens which separate the fields must always be
26864             # one less than the number of fields. If this is not true then
26865             # an error has been introduced in sub make_alignment_patterns.
26866 7366         10677 if (DEVEL_MODE) {
26867             if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
26868             my $nt = @{$rtokens};
26869             my $nf = @{$rfields};
26870             my $msg = <<EOM;
26871             Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
26872             The number of tokens = $nt should be one less than number of fields: $nf
26873             EOM
26874             Fault($msg);
26875             }
26876             }
26877              
26878             # --------------------------------------
26879             # get the final indentation of this line
26880             # --------------------------------------
26881             my (
26882              
26883 7366         21802 $indentation,
26884             $lev,
26885             $level_end,
26886             $i_terminal,
26887             $is_outdented_line,
26888              
26889             ) = $self->get_final_indentation(
26890              
26891             $ibeg,
26892             $iend,
26893             $rfields,
26894             $rpatterns,
26895             $ri_first,
26896             $ri_last,
26897             $rindentation_list,
26898             $ljump,
26899             $starting_in_quote,
26900             $is_static_block_comment,
26901              
26902             );
26903              
26904             # --------------------------------
26905             # define flag 'outdent_long_lines'
26906             # --------------------------------
26907 7366 100 100     35941 if (
      100        
      100        
      100        
26908             # we will allow outdenting of long lines..
26909             # which are long quotes, if allowed
26910             ( $type_beg eq 'Q' && $rOpts_outdent_long_quotes )
26911              
26912             # which are long block comments, if allowed
26913             || (
26914             $type_beg eq '#'
26915             && $rOpts_outdent_long_comments
26916              
26917             # but not if this is a static block comment
26918             && !$is_static_block_comment
26919             )
26920             )
26921             {
26922 884         2458 $rvao_args->{outdent_long_lines} = 1;
26923              
26924             # convert -lp indentation objects to spaces to allow outdenting
26925 884 100       2524 if ( ref($indentation) ) {
26926 14         63 $indentation = $indentation->get_spaces();
26927             }
26928             }
26929              
26930             # --------------------------------------------------
26931             # define flags 'break_alignment_before' and '_after'
26932             # --------------------------------------------------
26933              
26934             # These flags tell the vertical aligner to stop alignment before or
26935             # after this line.
26936 7366 100 100     29593 if ($is_outdented_line) {
    100          
    100          
26937 26         81 $rvao_args->{break_alignment_before} = 1;
26938 26         88 $rvao_args->{break_alignment_after} = 1;
26939             }
26940             elsif ($do_not_pad) {
26941 50         217 $rvao_args->{break_alignment_before} = 1;
26942             }
26943              
26944             # flush at an 'if' which follows a line with (1) terminal semicolon
26945             # or (2) terminal block_type which is not an 'if'. This prevents
26946             # unwanted alignment between the lines.
26947             elsif ( $type_beg eq 'k' && $token_beg eq 'if' ) {
26948 134         383 my $type_m = 'b';
26949 134         262 my $block_type_m;
26950              
26951 134 100       498 if ( $Kbeg > 0 ) {
26952 105         255 my $Km = $Kbeg - 1;
26953 105         286 $type_m = $rLL->[$Km]->[_TYPE_];
26954 105 100 66     579 if ( $type_m eq 'b' && $Km > 0 ) {
26955 93         201 $Km -= 1;
26956 93         232 $type_m = $rLL->[$Km]->[_TYPE_];
26957             }
26958 105 100 100     513 if ( $type_m eq '#' && $Km > 0 ) {
26959 23         52 $Km -= 1;
26960 23         64 $type_m = $rLL->[$Km]->[_TYPE_];
26961 23 100 66     112 if ( $type_m eq 'b' && $Km > 0 ) {
26962 9         17 $Km -= 1;
26963 9         24 $type_m = $rLL->[$Km]->[_TYPE_];
26964             }
26965             }
26966              
26967 105         248 my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
26968 105 100       346 if ($seqno_m) {
26969 44         156 $block_type_m = $self->[_rblock_type_of_seqno_]->{$seqno_m};
26970             }
26971             }
26972              
26973             # break after anything that is not if-like
26974 134 50 100     966 if (
      100        
      66        
      66        
      33        
      100        
26975             $type_m eq ';'
26976             || ( $type_m eq '}'
26977             && $block_type_m
26978             && $block_type_m ne 'if'
26979             && $block_type_m ne 'unless'
26980             && $block_type_m ne 'elsif'
26981             && $block_type_m ne 'else' )
26982             )
26983             {
26984 35         122 $rvao_args->{break_alignment_before} = 1;
26985             }
26986             }
26987              
26988             # ----------------------------------
26989             # define 'rvertical_tightness_flags'
26990             # ----------------------------------
26991             # These flags tell the vertical aligner if/when to combine consecutive
26992             # lines, based on the user input parameters.
26993             $rvao_args->{rvertical_tightness_flags} =
26994 7366 100 100     27810 $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
26995             $ri_first, $ri_last, $ending_in_quote, $closing_side_comment )
26996             unless ( $is_block_comment
26997             || $self->[_no_vertical_tightness_flags_] );
26998              
26999             # ----------------------------------
27000             # define 'is_terminal_ternary' flag
27001             # ----------------------------------
27002              
27003             # This flag is set at the final ':' of a ternary chain to request
27004             # vertical alignment of the final term. Here is a slightly complex
27005             # example:
27006             #
27007             # $self->{_text} = (
27008             # !$section ? ''
27009             # : $type eq 'item' ? "the $section entry"
27010             # : "the section on $section"
27011             # )
27012             # . (
27013             # $page
27014             # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
27015             # : ' elsewhere in this document'
27016             # );
27017             #
27018 7366 100 100     31193 if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
      100        
27019              
27020 97         241 my $is_terminal_ternary = 0;
27021 97 100       401 my $last_leading_type = $n > 0 ? $type_beg_last : ':';
27022 97         236 my $terminal_type = $types_to_go[$i_terminal];
27023 97 100 100     574 if ( $terminal_type ne ';'
      66        
27024             && $n_last_line > $n
27025             && $level_end == $lev )
27026             {
27027 61         143 my $Kbeg_next = $K_to_go[$ibeg_next];
27028 61         137 $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
27029 61         153 $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
27030             }
27031 97 100 100     711 if (
      100        
27032             $last_leading_type eq ':'
27033             && ( ( $terminal_type eq ';' && $level_end <= $lev )
27034             || ( $terminal_type ne ':' && $level_end < $lev ) )
27035             )
27036             {
27037              
27038             # the terminal term must not contain any ternary terms, as in
27039             # my $ECHO = (
27040             # $Is_MSWin32 ? ".\\echo$$"
27041             # : $Is_MacOS ? ":echo$$"
27042             # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
27043             # );
27044 16         59 $is_terminal_ternary = 1;
27045              
27046 16         65 my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
27047 16   100     140 while ( defined($KP) && $KP <= $Kend ) {
27048 6         14 my $type_KP = $rLL->[$KP]->[_TYPE_];
27049 6 50 33     28 if ( $type_KP eq '?' || $type_KP eq ':' ) {
27050 0         0 $is_terminal_ternary = 0;
27051 0         0 last;
27052             }
27053 6         19 $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
27054             }
27055             }
27056 97         267 $rvao_args->{is_terminal_ternary} = $is_terminal_ternary;
27057             }
27058              
27059             # -------------------------------------------------
27060             # add any new closing side comment to the last line
27061             # -------------------------------------------------
27062 7366 50 66     16433 if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
  9   66     43  
27063              
27064 9         35 $rfields->[-1] .= " $closing_side_comment";
27065              
27066             # NOTE: Patch for csc. We can just use 1 for the length of the csc
27067             # because its length should not be a limiting factor from here on.
27068 9         22 $rfield_lengths->[-1] += 2;
27069              
27070             # repack
27071 9         29 $rline_alignment =
27072             [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
27073             }
27074              
27075             # ------------------------
27076             # define flag 'list_seqno'
27077             # ------------------------
27078              
27079             # This flag indicates if this line is contained in a multi-line list
27080 7366 100       14859 if ( !$is_block_comment ) {
27081 6734         12087 my $parent_seqno = $parent_seqno_to_go[$ibeg];
27082 6734         17923 $rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno};
27083             }
27084              
27085             # The alignment tokens have been marked with nesting_depths, so we need
27086             # to pass nesting depths to the vertical aligner. They remain invariant
27087             # under all formatting operations. Previously, level values were sent
27088             # to the aligner. But they can be altered in welding and other
27089             # operations, and this can lead to alignment errors.
27090 7366         12119 my $nesting_depth_beg = $nesting_depth_to_go[$ibeg];
27091 7366         11224 my $nesting_depth_end = $nesting_depth_to_go[$iend];
27092              
27093             # A quirk in the definition of nesting depths is that the closing token
27094             # has the same depth as internal tokens. The vertical aligner is
27095             # programmed to expect them to have the lower depth, so we fix this.
27096 7366 100       18014 if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- }
  1234         2271  
27097 7366 100       17196 if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- }
  1011         1803  
27098              
27099             # Adjust nesting depths to keep -lp indentation for qw lists. This is
27100             # required because qw lists contained in brackets do not get nesting
27101             # depths, but the vertical aligner is watching nesting depth changes to
27102             # decide if a -lp block is intact. Without this patch, qw lists
27103             # enclosed in angle brackets will not get the correct -lp indentation.
27104              
27105             # Looking for line with isolated qw ...
27106 7366 50 100     18208 if ( $rOpts_line_up_parentheses
      66        
27107             && $type_beg eq 'q'
27108             && $ibeg == $iend )
27109             {
27110              
27111             # ... which is part of a multiline qw
27112 0         0 my $Km = $self->K_previous_nonblank($Kbeg);
27113 0         0 my $Kp = $self->K_next_nonblank($Kbeg);
27114 0 0 0     0 if ( defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q'
      0        
      0        
27115             || defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' )
27116             {
27117 0         0 $nesting_depth_beg++;
27118 0         0 $nesting_depth_end++;
27119             }
27120             }
27121              
27122             # ---------------------------------
27123             # define flag 'forget_side_comment'
27124             # ---------------------------------
27125              
27126             # This flag tells the vertical aligner to reset the side comment
27127             # location if we are entering a new block from level 0. This is
27128             # intended to keep side comments from drifting too far to the right.
27129 7366 100 100     19227 if ( $block_type_to_go[$i_terminal]
27130             && $nesting_depth_end > $nesting_depth_beg )
27131             {
27132             $rvao_args->{forget_side_comment} =
27133 59         261 !$self->[_radjusted_levels_]->[$Kbeg];
27134             }
27135              
27136             # -----------------------------------
27137             # Store the remaining non-flag values
27138             # -----------------------------------
27139 7366         13889 $rvao_args->{Kend} = $Kend_code;
27140 7366         13568 $rvao_args->{ci_level} = $ci_levels_to_go[$ibeg];
27141 7366         13472 $rvao_args->{indentation} = $indentation;
27142 7366         13878 $rvao_args->{level_end} = $nesting_depth_end;
27143 7366         13952 $rvao_args->{level} = $nesting_depth_beg;
27144 7366         13729 $rvao_args->{rline_alignment} = $rline_alignment;
27145             $rvao_args->{maximum_line_length} =
27146 7366         20919 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
27147              
27148             # --------------------------------------
27149             # send this line to the vertical aligner
27150             # --------------------------------------
27151 7366         12526 my $vao = $self->[_vertical_aligner_object_];
27152 7366         32883 $vao->valign_input($rvao_args);
27153              
27154 7366         31585 $do_not_pad = 0;
27155              
27156             } ## end of loop to output each line
27157              
27158             # Set flag indicating if the last line ends in an opening
27159             # token and is very short, so that a blank line is not
27160             # needed if the subsequent line is a comment.
27161             # Examples of what we are looking for:
27162             # {
27163             # && (
27164             # BEGIN {
27165             # default {
27166             # sub {
27167             $self->[_last_output_short_opening_token_]
27168              
27169             # line ends in opening token
27170             # /^[\{\(\[L]$/
27171 4547   66     19094 = $is_opening_type{$type_end}
27172              
27173             # and either
27174             && (
27175             # line has either single opening token
27176             $iend_next == $ibeg_next
27177              
27178             # or is a single token followed by opening token.
27179             # Note that sub identifiers have blanks like 'sub doit'
27180             # $token_beg !~ /\s+/
27181             || ( $iend_next - $ibeg_next <= 2 && index( $token_beg, SPACE ) < 0 )
27182             )
27183              
27184             # and limit total to 10 character widths
27185             && token_sequence_length( $ibeg_next, $iend_next ) <= 10;
27186              
27187             # remember indentation of lines containing opening containers for
27188             # later use by sub get_final_indentation
27189 4547 100 100     22106 $self->save_opening_indentation( $ri_first, $ri_last,
27190             $rindentation_list, $this_batch->[_runmatched_opening_indexes_] )
27191             if ( $this_batch->[_runmatched_opening_indexes_]
27192             || $types_to_go[$max_index_to_go] eq 'q' );
27193              
27194             # output any new -cscw block comment
27195 4547 50       9576 if ($cscw_block_comment) {
27196 0         0 $self->flush_vertical_aligner();
27197 0         0 my $file_writer_object = $self->[_file_writer_object_];
27198 0         0 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
27199             }
27200 4547         19951 return;
27201             } ## end sub convey_batch_to_vertical_aligner
27202              
27203             sub check_batch_summed_lengths {
27204              
27205 0     0 0 0 my ( $self, $msg ) = @_;
27206 0 0       0 $msg = EMPTY_STRING unless defined($msg);
27207 0         0 my $rLL = $self->[_rLL_];
27208              
27209             # Verify that the summed lengths are correct. We want to be sure that
27210             # errors have not been introduced by programming changes. Summed lengths
27211             # are defined in sub store_token. Operations like padding and unmasking
27212             # semicolons can change token lengths, but those operations are expected to
27213             # update the summed lengths when they make changes. So the summed lengths
27214             # should always be correct.
27215 0         0 foreach my $i ( 0 .. $max_index_to_go ) {
27216 0         0 my $len_by_sum =
27217             $summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i];
27218 0         0 my $len_tok_i = $token_lengths_to_go[$i];
27219 0         0 my $KK = $K_to_go[$i];
27220 0         0 my $len_tok_K;
27221              
27222             # For --indent-only, there is not always agreement between
27223             # token lengths in _rLL_ and token_lengths_to_go, so skip that check.
27224 0 0 0     0 if ( defined($KK) && !$rOpts_indent_only ) {
27225 0         0 $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_];
27226             }
27227 0 0 0     0 if ( $len_by_sum != $len_tok_i
      0        
27228             || defined($len_tok_K) && $len_by_sum != $len_tok_K )
27229             {
27230 0 0       0 my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef";
27231 0 0       0 $KK = 'undef' unless defined($KK);
27232 0         0 my $tok = $tokens_to_go[$i];
27233 0         0 my $type = $types_to_go[$i];
27234 0         0 Fault(<<EOM);
27235             Summed lengths are appear to be incorrect. $msg
27236             lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K
27237             near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok'
27238             EOM
27239             }
27240             }
27241 0         0 return;
27242             } ## end sub check_batch_summed_lengths
27243              
27244             { ## begin closure set_vertical_alignment_markers
27245             my %is_vertical_alignment_type;
27246             my %is_not_vertical_alignment_token;
27247             my %is_vertical_alignment_keyword;
27248             my %is_terminal_alignment_type;
27249             my %is_low_level_alignment_token;
27250              
27251             BEGIN {
27252              
27253 38     38   224 my @q;
27254              
27255             # Replaced =~ and // in the list. // had been removed in RT 119588
27256 38         282 @q = qw#
27257             = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
27258             { ? : => && || ~~ !~~ =~ !~ // <=> ->
27259             #;
27260 38         899 @is_vertical_alignment_type{@q} = (1) x scalar(@q);
27261              
27262             # These 'tokens' are not aligned. We need this to remove [
27263             # from the above list because it has type ='{'
27264 38         191 @q = qw([);
27265 38         117 @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
27266              
27267             # these are the only types aligned at a line end
27268 38         110 @q = qw(&& || =>);
27269 38         135 @is_terminal_alignment_type{@q} = (1) x scalar(@q);
27270              
27271             # these tokens only align at line level
27272 38         95 @q = ( '{', '(' );
27273 38         108 @is_low_level_alignment_token{@q} = (1) x scalar(@q);
27274              
27275             # eq and ne were removed from this list to improve alignment chances
27276 38         121 @q = qw(if unless and or err for foreach while until);
27277 38         115484 @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
27278             } ## end BEGIN
27279              
27280             my $ralignment_type_to_go;
27281             my $ralignment_counts;
27282             my $ralignment_hash_by_line;
27283              
27284             sub set_vertical_alignment_markers {
27285              
27286 3260     3260 0 6142 my ( $self, $ri_first, $ri_last ) = @_;
27287              
27288             #----------------------------------------------------------------------
27289             # This routine looks at output lines for certain tokens which can serve
27290             # as vertical alignment markers (such as an '=').
27291             #----------------------------------------------------------------------
27292              
27293             # Input parameters:
27294             # $ri_first = ref to list of starting line indexes in _to_go arrays
27295             # $ri_last = ref to list of ending line indexes in _to_go arrays
27296              
27297             # Method: We look at each token $i in this output batch and set
27298             # $ralignment_type_to_go->[$i] equal to those tokens at which we would
27299             # accept vertical alignment.
27300              
27301             # Initialize closure (and return) variables:
27302 3260         8508 $ralignment_type_to_go = [];
27303 3260         6396 $ralignment_counts = [];
27304 3260         9090 $ralignment_hash_by_line = [];
27305              
27306             # NOTE: closing side comments can insert up to 2 additional tokens
27307             # beyond the original $max_index_to_go, so we need to check ri_last for
27308             # the last index.
27309 3260         4893 my $max_line = @{$ri_first} - 1;
  3260         6143  
27310 3260         5974 my $max_i = $ri_last->[$max_line];
27311 3260 50       7678 if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go }
  0         0  
27312              
27313             # -----------------------------------------------------------------
27314             # Shortcut:
27315             # - no alignments if there is only 1 token.
27316             # - and nothing to do if we aren't allowed to change whitespace.
27317             # -----------------------------------------------------------------
27318 3260 100 66     12650 if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
27319 87         320 goto RETURN;
27320             }
27321              
27322             # -------------------------------
27323             # First handle any side comment.
27324             # -------------------------------
27325 3173         5418 my $i_terminal = $max_i;
27326 3173 100       7553 if ( $types_to_go[$max_i] eq '#' ) {
27327              
27328             # We know $max_i > 0 if we get here.
27329 343         683 $i_terminal -= 1;
27330 343 50 33     1657 if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) {
27331 343         596 $i_terminal -= 1;
27332             }
27333              
27334 343         676 my $token = $tokens_to_go[$max_i];
27335 343         617 my $KK = $K_to_go[$max_i];
27336              
27337             # Do not align various special side comments
27338             my $do_not_align = (
27339              
27340             # it is any specially marked side comment
27341             ( defined($KK) && $self->[_rspecial_side_comment_type_]->{$KK} )
27342              
27343             # or it is a static side comment
27344 343   100     4088 || ( $rOpts->{'static-side-comments'}
27345             && $token =~ /$static_side_comment_pattern/ )
27346              
27347             # or a closing side comment
27348             || ( $types_to_go[$i_terminal] eq '}'
27349             && $tokens_to_go[$i_terminal] eq '}'
27350             && $token =~ /$closing_side_comment_prefix_pattern/ )
27351             );
27352              
27353             # - For the specific combination -vc -nvsc, we put all side comments
27354             # at fixed locations. Note that we will lose hanging side comment
27355             # alignments. Otherwise, hsc's can move to strange locations.
27356             # - For -nvc -nvsc we make all side comments vertical alignments
27357             # because the vertical aligner will check for -nvsc and be able
27358             # to reduce the final padding to the side comments for long lines.
27359             # and keep hanging side comments aligned.
27360 343 100 100     1834 if ( !$do_not_align
      100        
27361             && !$rOpts_valign_side_comments
27362             && $rOpts_valign_code )
27363             {
27364              
27365 8         17 $do_not_align = 1;
27366 8         12 my $ipad = $max_i - 1;
27367 8 50       19 if ( $types_to_go[$ipad] eq 'b' ) {
27368             my $pad_spaces =
27369 8         16 $rOpts->{'minimum-space-to-comment'} -
27370             $token_lengths_to_go[$ipad];
27371 8         22 $self->pad_token( $ipad, $pad_spaces );
27372             }
27373             }
27374              
27375 343 100       890 if ( !$do_not_align ) {
27376 325         806 $ralignment_type_to_go->[$max_i] = '#';
27377 325         1069 $ralignment_hash_by_line->[$max_line]->{$max_i} = '#';
27378 325         857 $ralignment_counts->[$max_line]++;
27379             }
27380             }
27381              
27382             # ----------------------------------------------
27383             # Nothing more to do on this line if -nvc is set
27384             # ----------------------------------------------
27385 3173 100       7166 if ( !$rOpts_valign_code ) {
27386 17         58 goto RETURN;
27387             }
27388              
27389             # -------------------------------------
27390             # Loop over each line of this batch ...
27391             # -------------------------------------
27392              
27393 3156         7370 foreach my $line ( 0 .. $max_line ) {
27394              
27395 5797         8960 my $ibeg = $ri_first->[$line];
27396 5797         8506 my $iend = $ri_last->[$line];
27397              
27398 5797 100       11455 next if ( $iend <= $ibeg );
27399              
27400             # back up before any side comment
27401 5386 100       10111 if ( $iend > $i_terminal ) { $iend = $i_terminal }
  326         600  
27402              
27403             #----------------------------------
27404             # Loop over all tokens on this line
27405             #----------------------------------
27406 5386         12160 $self->set_vertical_alignment_markers_token_loop( $line, $ibeg,
27407             $iend );
27408             }
27409              
27410             RETURN:
27411 3260         9555 return ( $ralignment_type_to_go, $ralignment_counts,
27412             $ralignment_hash_by_line );
27413             } ## end sub set_vertical_alignment_markers
27414              
27415             sub set_vertical_alignment_markers_token_loop {
27416 5386     5386 0 11463 my ( $self, $line, $ibeg, $iend ) = @_;
27417              
27418             # Set vertical alignment markers for the tokens on one line
27419             # of the current output batch. This is done by updating the
27420             # three closure variables:
27421             # $ralignment_type_to_go
27422             # $ralignment_counts
27423             # $ralignment_hash_by_line
27424              
27425             # Input parameters:
27426             # $line = index of this line in the current batch
27427             # $ibeg, $iend = index range of tokens to check in the _to_go arrays
27428              
27429 5386         9022 my $level_beg = $levels_to_go[$ibeg];
27430 5386         8682 my $token_beg = $tokens_to_go[$ibeg];
27431 5386         8269 my $type_beg = $types_to_go[$ibeg];
27432 5386   100     22496 my $type_beg_special_char =
27433             ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
27434              
27435 5386         8546 my $last_vertical_alignment_BEFORE_index = -1;
27436 5386         8037 my $vert_last_nonblank_type = $type_beg;
27437 5386         7811 my $vert_last_nonblank_token = $token_beg;
27438              
27439             # ----------------------------------------------------------------
27440             # Initialization code merged from 'sub delete_needless_alignments'
27441             # ----------------------------------------------------------------
27442 5386         7495 my $i_good_paren = -1;
27443 5386         7966 my $i_elsif_close = $ibeg - 1;
27444 5386         7980 my $i_elsif_open = $iend + 1;
27445 5386         7683 my @imatch_list;
27446 5386 100       11387 if ( $type_beg eq 'k' ) {
27447              
27448             # Initialization for paren patch: mark a location of a paren we
27449             # should keep, such as one following something like a leading
27450             # 'if', 'elsif',
27451 1645         2856 $i_good_paren = $ibeg + 1;
27452 1645 100       4401 if ( $types_to_go[$i_good_paren] eq 'b' ) {
27453 1510         2469 $i_good_paren++;
27454             }
27455              
27456             # Initialization for 'elsif' patch: remember the paren range of
27457             # an elsif, and do not make alignments within them because this
27458             # can cause loss of padding and overall brace alignment in the
27459             # vertical aligner.
27460 1645 50 66     4885 if ( $token_beg eq 'elsif'
      66        
27461             && $i_good_paren < $iend
27462             && $tokens_to_go[$i_good_paren] eq '(' )
27463             {
27464 21         48 $i_elsif_open = $i_good_paren;
27465 21         49 $i_elsif_close = $mate_index_to_go[$i_good_paren];
27466 21 50       78 if ( !defined($i_elsif_close) ) { $i_elsif_close = -1 }
  0         0  
27467             }
27468             } ## end if ( $type_beg eq 'k' )
27469              
27470             # --------------------------------------------
27471             # Loop over each token in this output line ...
27472             # --------------------------------------------
27473 5386         11586 foreach my $i ( $ibeg + 1 .. $iend ) {
27474              
27475 43174 100       78710 next if ( $types_to_go[$i] eq 'b' );
27476              
27477 27505         37625 my $type = $types_to_go[$i];
27478 27505         37888 my $token = $tokens_to_go[$i];
27479 27505         36248 my $alignment_type = EMPTY_STRING;
27480              
27481             # ----------------------------------------------
27482             # Check for 'paren patch' : Remove excess parens
27483             # ----------------------------------------------
27484              
27485             # Excess alignment of parens can prevent other good alignments.
27486             # For example, note the parens in the first two rows of the
27487             # following snippet. They would normally get marked for
27488             # alignment and aligned as follows:
27489              
27490             # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
27491             # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
27492             # my $img = new Gimp::Image( $w, $h, RGB );
27493              
27494             # This causes unnecessary paren alignment and prevents the
27495             # third equals from aligning. If we remove the unwanted
27496             # alignments we get:
27497              
27498             # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
27499             # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
27500             # my $img = new Gimp::Image( $w, $h, RGB );
27501              
27502             # A rule for doing this which works well is to remove alignment
27503             # of parens whose containers do not contain other aligning
27504             # tokens, with the exception that we always keep alignment of
27505             # the first opening paren on a line (for things like 'if' and
27506             # 'elsif' statements).
27507 27505 100 100     54681 if ( $token eq ')' && @imatch_list ) {
27508              
27509             # undo the corresponding opening paren if:
27510             # - it is at the top of the stack
27511             # - and not the first overall opening paren
27512             # - does not follow a leading keyword on this line
27513 972         2139 my $imate = $mate_index_to_go[$i];
27514 972 50       2621 if ( !defined($imate) ) { $imate = -1 }
  0         0  
27515 972 100 100     4138 if ( $imatch_list[-1] eq $imate
      100        
      100        
27516             && ( $ibeg > 1 || @imatch_list > 1 )
27517             && $imate > $i_good_paren )
27518             {
27519 54 50       185 if ( $ralignment_type_to_go->[$imate] ) {
27520 54         119 $ralignment_type_to_go->[$imate] = EMPTY_STRING;
27521 54         116 $ralignment_counts->[$line]--;
27522 54         146 delete $ralignment_hash_by_line->[$line]->{$imate};
27523             }
27524 54         96 pop @imatch_list;
27525             }
27526             }
27527              
27528             # do not align tokens at lower level than start of line
27529             # except for side comments
27530 27505 100       48995 if ( $levels_to_go[$i] < $level_beg ) {
27531 157         403 next;
27532             }
27533              
27534             #--------------------------------------------------------
27535             # First see if we want to align BEFORE this token
27536             #--------------------------------------------------------
27537              
27538             # The first possible token that we can align before
27539             # is index 2 because: 1) it doesn't normally make sense to
27540             # align before the first token and 2) the second
27541             # token must be a blank if we are to align before
27542             # the third
27543 27348 100 100     97246 if ( $i < $ibeg + 2 ) { }
    100          
    100          
    100          
    100          
    100          
27544              
27545             # must follow a blank token
27546             elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
27547              
27548             # otherwise, do not align two in a row to create a
27549             # blank field
27550             elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
27551              
27552             # align before one of these keywords
27553             # (within a line, since $i>1)
27554             elsif ( $type eq 'k' ) {
27555              
27556             # /^(if|unless|and|or|eq|ne)$/
27557 627 100       2516 if ( $is_vertical_alignment_keyword{$token} ) {
27558 135         298 $alignment_type = $token;
27559              
27560             # Align postfix 'unless' and 'if' if requested (git #116)
27561             # These are the only equivalent keywords. For equivalent
27562             # token types see '%operator_map'.
27563 135 100 100     610 if ( $token eq 'unless' && $rOpts_valign_if_unless ) {
27564 2         4 $alignment_type = 'if';
27565             }
27566             }
27567             }
27568              
27569             # align qw in a 'use' statement (issue git #93)
27570             elsif ( $type eq 'q' ) {
27571 68 100 100     424 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) {
27572 34         72 $alignment_type = $type;
27573             }
27574             }
27575              
27576             # align before one of these types..
27577             elsif ( $is_vertical_alignment_type{$type}
27578             && !$is_not_vertical_alignment_token{$token} )
27579             {
27580 4003         6805 $alignment_type = $token;
27581              
27582             # Do not align a terminal token. Although it might
27583             # occasionally look ok to do this, this has been found to be
27584             # a good general rule. The main problems are:
27585             # (1) that the terminal token (such as an = or :) might get
27586             # moved far to the right where it is hard to see because
27587             # nothing follows it, and
27588             # (2) doing so may prevent other good alignments.
27589             # Current exceptions are && and || and =>
27590 4003 100       8355 if ( $i == $iend ) {
27591             $alignment_type = EMPTY_STRING
27592 593 100       2368 unless ( $is_terminal_alignment_type{$type} );
27593             }
27594              
27595             # Do not align leading ': (' or '. ('. This would prevent
27596             # alignment in something like the following:
27597             # $extra_space .=
27598             # ( $input_line_number < 10 ) ? " "
27599             # : ( $input_line_number < 100 ) ? " "
27600             # : "";
27601             # or
27602             # $code =
27603             # ( $case_matters ? $accessor : " lc($accessor) " )
27604             # . ( $yesno ? " eq " : " ne " )
27605              
27606             # Also, do not align a ( following a leading ? so we can
27607             # align something like this:
27608             # $converter{$_}->{ushortok} =
27609             # $PDL::IO::Pic::biggrays
27610             # ? ( m/GIF/ ? 0 : 1 )
27611             # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
27612 4003 100 100     9222 if ( $type_beg_special_char
      66        
27613             && $i == $ibeg + 2
27614             && $types_to_go[ $i - 1 ] eq 'b' )
27615             {
27616 36         70 $alignment_type = EMPTY_STRING;
27617             }
27618              
27619             # Certain tokens only align at the same level as the
27620             # initial line level
27621 4003 100 100     12036 if ( $is_low_level_alignment_token{$token}
27622             && $levels_to_go[$i] != $level_beg )
27623             {
27624 124         283 $alignment_type = EMPTY_STRING;
27625             }
27626              
27627 4003 100       8435 if ( $token eq '(' ) {
27628              
27629             # For a paren after keyword, only align if-like parens,
27630             # such as:
27631             # if ( $a ) { &a }
27632             # elsif ( $b ) { &b }
27633             # ^-------------------aligned parens
27634 569 100 100     2854 if ( $vert_last_nonblank_type eq 'k'
27635             && !$is_if_unless_elsif{$vert_last_nonblank_token} )
27636             {
27637 171         377 $alignment_type = EMPTY_STRING;
27638             }
27639              
27640             # Do not align a spaced-function-paren if requested.
27641             # Issue git #53, #73.
27642 569 100       1547 if ( !$rOpts_function_paren_vertical_alignment ) {
27643 7         13 my $seqno = $type_sequence_to_go[$i];
27644             $alignment_type = EMPTY_STRING
27645 7 50       22 if ( $self->[_ris_function_call_paren_]->{$seqno} );
27646             }
27647              
27648             # make () align with qw in a 'use' statement (git #93)
27649 569 100 66     2160 if ( $tokens_to_go[0] eq 'use'
      66        
      66        
27650             && $types_to_go[0] eq 'k'
27651             && defined( $mate_index_to_go[$i] )
27652             && $mate_index_to_go[$i] == $i + 1 )
27653             {
27654 15         45 $alignment_type = 'q';
27655              
27656             ## Note on discussion git #101. We could make this
27657             ## a separate type '()' to separate it from qw's:
27658             ## $alignment_type =
27659             ## $rOpts_valign_empty_parens_with_qw ? 'q' : '()';
27660             }
27661             }
27662              
27663             # be sure the alignment tokens are unique
27664             # This experiment didn't work well: reason not determined
27665             # if ($token ne $type) {$alignment_type .= $type}
27666             }
27667              
27668             # NOTE: This is deactivated because it causes the previous
27669             # if/elsif alignment to fail
27670             #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
27671             #{ $alignment_type = $type; }
27672              
27673 27348 100 100     97151 if ($alignment_type) {
    100 100        
      100        
27674 3289         4994 $last_vertical_alignment_BEFORE_index = $i;
27675             }
27676              
27677             #--------------------------------------------------------
27678             # Next see if we want to align AFTER the previous nonblank
27679             #--------------------------------------------------------
27680              
27681             # We want to line up ',' and interior ';' tokens, with the added
27682             # space AFTER these tokens. (Note: interior ';' is included
27683             # because it may occur in short blocks).
27684             elsif (
27685              
27686             # previous token IS one of these:
27687             (
27688             $vert_last_nonblank_type eq ','
27689             || $vert_last_nonblank_type eq ';'
27690             )
27691              
27692             # and it follows a blank
27693             && $types_to_go[ $i - 1 ] eq 'b'
27694              
27695             # and it's NOT one of these
27696             && !$is_closing_token{$type}
27697              
27698             # then go ahead and align
27699             )
27700              
27701             {
27702 1802         3029 $alignment_type = $vert_last_nonblank_type;
27703             }
27704              
27705             #-----------------------
27706             # Set the alignment type
27707             #-----------------------
27708 27348 100       45037 if ($alignment_type) {
27709              
27710             # but do not align the opening brace of an anonymous sub
27711 5091 100 100     25135 if ( $token eq '{'
    100 100        
    100 100        
27712             && $block_type_to_go[$i]
27713             && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ )
27714             {
27715              
27716             }
27717              
27718             # and do not make alignments within 'elsif' parens
27719             elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
27720              
27721             }
27722              
27723             # and ignore any tokens which have leading padded spaces
27724             # example: perl527/lop.t
27725             elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) {
27726              
27727             }
27728              
27729             else {
27730 5006         13144 $ralignment_type_to_go->[$i] = $alignment_type;
27731 5006         15893 $ralignment_hash_by_line->[$line]->{$i} = $alignment_type;
27732 5006         8597 $ralignment_counts->[$line]++;
27733 5006         9897 push @imatch_list, $i;
27734             }
27735             }
27736              
27737 27348         36323 $vert_last_nonblank_type = $type;
27738 27348         44257 $vert_last_nonblank_token = $token;
27739             }
27740 5386         12949 return;
27741             } ## end sub set_vertical_alignment_markers_token_loop
27742              
27743             } ## end closure set_vertical_alignment_markers
27744              
27745             sub make_vertical_alignments {
27746 3263     3263 0 7183 my ( $self, $ri_first, $ri_last ) = @_;
27747              
27748             #----------------------------
27749             # Shortcut for a single token
27750             #----------------------------
27751 3263 50       7213 if ( $max_index_to_go == 0 ) {
27752 0 0 0     0 if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) {
  0         0  
27753 0         0 my $rtokens = [];
27754 0         0 my $rfields = [ $tokens_to_go[0] ];
27755 0         0 my $rpatterns = [ $types_to_go[0] ];
27756 0         0 my $rfield_lengths =
27757             [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ];
27758 0         0 return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
27759             }
27760              
27761             # Strange line packing, not fatal but should not happen
27762 0         0 elsif (DEVEL_MODE) {
27763             my $max_line = @{$ri_first} - 1;
27764             my $ibeg = $ri_first->[0];
27765             my $iend = $ri_last->[0];
27766             my $tok_b = $tokens_to_go[$ibeg];
27767             my $tok_e = $tokens_to_go[$iend];
27768             my $type_b = $types_to_go[$ibeg];
27769             my $type_e = $types_to_go[$iend];
27770             Fault(
27771             "Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n"
27772             );
27773             }
27774             }
27775              
27776             #---------------------------------------------------------
27777             # Step 1: Define the alignment tokens for the entire batch
27778             #---------------------------------------------------------
27779 3263         5851 my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line );
27780              
27781             # We only need to make this call if vertical alignment of code is
27782             # requested or if a line might have a side comment.
27783 3263 100 100     8652 if ( $rOpts_valign_code
27784             || $types_to_go[$max_index_to_go] eq '#' )
27785             {
27786 3260         9269 ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
27787             = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
27788             }
27789              
27790             #----------------------------------------------
27791             # Step 2: Break each line into alignment fields
27792             #----------------------------------------------
27793 3263         7092 my $rline_alignments = [];
27794 3263         4850 my $max_line = @{$ri_first} - 1;
  3263         6435  
27795 3263         6812 foreach my $line ( 0 .. $max_line ) {
27796              
27797 6082         9909 my $ibeg = $ri_first->[$line];
27798 6082         9194 my $iend = $ri_last->[$line];
27799              
27800 6082         19561 my $rtok_fld_pat_len = $self->make_alignment_patterns(
27801             $ibeg, $iend, $ralignment_type_to_go,
27802             $ralignment_counts->[$line],
27803             $ralignment_hash_by_line->[$line]
27804             );
27805 6082         11306 push @{$rline_alignments}, $rtok_fld_pat_len;
  6082         14589  
27806             }
27807 3263         7677 return $rline_alignments;
27808             } ## end sub make_vertical_alignments
27809              
27810             sub get_seqno {
27811              
27812             # get opening and closing sequence numbers of a token for the vertical
27813             # aligner. Assign qw quotes a value to allow qw opening and closing tokens
27814             # to be treated somewhat like opening and closing tokens for stacking
27815             # tokens by the vertical aligner.
27816 18     18 0 42 my ( $self, $ii, $ending_in_quote ) = @_;
27817              
27818 18         32 my $rLL = $self->[_rLL_];
27819              
27820 18         29 my $KK = $K_to_go[$ii];
27821 18         35 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
27822              
27823 18 50       43 if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
27824 18         29 my $SEQ_QW = -1;
27825 18         31 my $token = $rLL->[$KK]->[_TOKEN_];
27826 18 100       44 if ( $ii > 0 ) {
27827 2 50       14 $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
27828             }
27829             else {
27830 16 100       33 if ( !$ending_in_quote ) {
27831 6 100       32 $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
27832             }
27833             }
27834             }
27835 18         47 return ($seqno);
27836             } ## end sub get_seqno
27837              
27838             {
27839             my %undo_extended_ci;
27840              
27841             sub initialize_undo_ci {
27842 555     555 0 1611 %undo_extended_ci = ();
27843 555         1079 return;
27844             }
27845              
27846             sub undo_ci {
27847              
27848             # Undo continuation indentation in certain sequences
27849 828     828 0 3449 my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
27850 828         1853 my ( $line_1, $line_2, $lev_last );
27851 828         1367 my $max_line = @{$ri_first} - 1;
  828         1715  
27852              
27853 828         1710 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
27854              
27855             # Prepare a list of controlling indexes for each line if required.
27856             # This is used for efficient processing below. Note: this is
27857             # critical for speed. In the initial implementation I just looped
27858             # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
27859             # found that this routine was causing a huge run time in large lists.
27860             # On a very large list test case, this new coding dropped the run time
27861             # of this routine from 30 seconds to 169 milliseconds.
27862 828         1396 my @i_controlling_ci;
27863 828 100 66     2611 if ( $rix_seqno_controlling_ci && @{$rix_seqno_controlling_ci} ) {
  40         175  
27864 40         67 my @tmp = reverse @{$rix_seqno_controlling_ci};
  40         119  
27865 40         100 my $ix_next = pop @tmp;
27866 40         107 foreach my $line ( 0 .. $max_line ) {
27867 98         156 my $iend = $ri_last->[$line];
27868 98   100     318 while ( defined($ix_next) && $ix_next <= $iend ) {
27869 120         188 push @{ $i_controlling_ci[$line] }, $ix_next;
  120         240  
27870 120         371 $ix_next = pop @tmp;
27871             }
27872             }
27873             }
27874              
27875             # Loop over all lines of the batch ...
27876              
27877             # Workaround originally created for problem c007, in which the
27878             # combination -lp -xci could produce a "Program bug" message in unusual
27879             # circumstances.
27880 828         1540 my $skip_SECTION_1;
27881 828 100 100     2784 if ( $rOpts_line_up_parentheses
27882             && $rOpts_extended_continuation_indentation )
27883             {
27884              
27885             # Only set this flag if -lp is actually used here
27886 71         184 foreach my $line ( 0 .. $max_line ) {
27887 85         177 my $ibeg = $ri_first->[$line];
27888 85 100       237 if ( ref( $leading_spaces_to_go[$ibeg] ) ) {
27889 19         30 $skip_SECTION_1 = 1;
27890 19         38 last;
27891             }
27892             }
27893             }
27894              
27895 828         2415 foreach my $line ( 0 .. $max_line ) {
27896              
27897 3647         6165 my $ibeg = $ri_first->[$line];
27898 3647         5077 my $iend = $ri_last->[$line];
27899 3647         5511 my $lev = $levels_to_go[$ibeg];
27900              
27901             #-----------------------------------
27902             # SECTION 1: Undo needless common CI
27903             #-----------------------------------
27904              
27905             # We are looking at leading tokens and looking for a sequence all
27906             # at the same level and all at a higher level than enclosing lines.
27907              
27908             # For example, we can undo continuation indentation in sort/map/grep
27909             # chains
27910              
27911             # my $dat1 = pack( "n*",
27912             # map { $_, $lookup->{$_} }
27913             # sort { $a <=> $b }
27914             # grep { $lookup->{$_} ne $default } keys %$lookup );
27915              
27916             # to become
27917              
27918             # my $dat1 = pack( "n*",
27919             # map { $_, $lookup->{$_} }
27920             # sort { $a <=> $b }
27921             # grep { $lookup->{$_} ne $default } keys %$lookup );
27922              
27923 3647 100 100     10540 if ( $line > 0 && !$skip_SECTION_1 ) {
27924              
27925             # if we have started a chain..
27926 2803 100       4927 if ($line_1) {
27927              
27928             # see if it continues..
27929 11 100       40 if ( $lev == $lev_last ) {
    50          
    0          
27930 8 100 66     55 if ( $types_to_go[$ibeg] eq 'k'
27931             && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
27932             {
27933              
27934             # chain continues...
27935             # check for chain ending at end of a statement
27936 6   33     21 my $is_semicolon_terminated = (
27937             $line == $max_line
27938             && (
27939             $types_to_go[$iend] eq ';'
27940              
27941             # with possible side comment
27942             || ( $types_to_go[$iend] eq '#'
27943             && $iend - $ibeg >= 2
27944             && $types_to_go[ $iend - 2 ] eq ';'
27945             && $types_to_go[ $iend - 1 ] eq 'b' )
27946             )
27947             );
27948              
27949 6 50       13 $line_2 = $line
27950             if ($is_semicolon_terminated);
27951             }
27952             else {
27953              
27954             # kill chain
27955 2         6 $line_1 = undef;
27956             }
27957             }
27958             elsif ( $lev < $lev_last ) {
27959              
27960             # chain ends with previous line
27961 3         9 $line_2 = $line - 1;
27962             }
27963             elsif ( $lev > $lev_last ) {
27964              
27965             # kill chain
27966 0         0 $line_1 = undef;
27967             }
27968              
27969             # undo the continuation indentation if a chain ends
27970 11 100 66     42 if ( defined($line_2) && defined($line_1) ) {
27971 3         13 my $continuation_line_count = $line_2 - $line_1 + 1;
27972 3 50       12 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
  3         8  
27973             = (0) x ($continuation_line_count)
27974             if ( $continuation_line_count >= 0 );
27975 3         9 @leading_spaces_to_go[ @{$ri_first}
27976             [ $line_1 .. $line_2 ] ] =
27977 3         9 @reduced_spaces_to_go[ @{$ri_first}
  3         10  
27978             [ $line_1 .. $line_2 ] ];
27979 3         7 $line_1 = undef;
27980             }
27981             }
27982              
27983             # not in a chain yet..
27984             else {
27985              
27986             # look for start of a new sort/map/grep chain
27987 2792 100       5752 if ( $lev > $lev_last ) {
27988 686 100 100     2691 if ( $types_to_go[$ibeg] eq 'k'
27989             && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
27990             {
27991 10         42 $line_1 = $line;
27992             }
27993             }
27994             }
27995             }
27996              
27997             #-------------------------------------
27998             # SECTION 2: Undo ci at cuddled blocks
27999             #-------------------------------------
28000              
28001             # Note that sub get_final_indentation will be called later to
28002             # actually do this, but for now we will tentatively mark cuddled
28003             # lines with ci=0 so that the the -xci loop which follows will be
28004             # correct at cuddles.
28005 3647 100 100     10449 if (
28006             $types_to_go[$ibeg] eq '}'
28007             && ( $nesting_depth_to_go[$iend] + 1 ==
28008             $nesting_depth_to_go[$ibeg] )
28009             )
28010             {
28011 450         1221 my $terminal_type = $types_to_go[$iend];
28012 450 100 66     1773 if ( $terminal_type eq '#' && $iend > $ibeg ) {
28013 6         18 $terminal_type = $types_to_go[ $iend - 1 ];
28014 6 50 33     32 if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
28015 0         0 $terminal_type = $types_to_go[ $iend - 2 ];
28016             }
28017             }
28018              
28019             # Patch for rt144979, part 2. Coordinated with part 1.
28020             # Skip cuddled braces.
28021 450         1008 my $seqno_beg = $type_sequence_to_go[$ibeg];
28022             my $is_cuddled_closing_brace = $seqno_beg
28023 450   66     1850 && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
28024              
28025 450 100 100     1692 if ( $terminal_type eq '{' && !$is_cuddled_closing_brace ) {
28026 13         46 $ci_levels_to_go[$ibeg] = 0;
28027             }
28028             }
28029              
28030             #--------------------------------------------------------
28031             # SECTION 3: Undo ci set by sub extended_ci if not needed
28032             #--------------------------------------------------------
28033              
28034             # Undo the ci of the leading token if its controlling token
28035             # went out on a previous line without ci
28036 3647 100       7212 if ( $ci_levels_to_go[$ibeg] ) {
28037 1316         2943 my $Kbeg = $K_to_go[$ibeg];
28038 1316         2525 my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
28039 1316 100 100     3111 if ( $seqno && $undo_extended_ci{$seqno} ) {
28040              
28041             # but do not undo ci set by the -lp flag
28042 50 100       148 if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
28043 36         58 $ci_levels_to_go[$ibeg] = 0;
28044 36         61 $leading_spaces_to_go[$ibeg] =
28045             $reduced_spaces_to_go[$ibeg];
28046             }
28047             }
28048             }
28049              
28050             # Flag any controlling opening tokens in lines without ci. This
28051             # will be used later in the above if statement to undo the ci which
28052             # they added. The array i_controlling_ci[$line] was prepared at
28053             # the top of this routine.
28054 3647 100 100     10064 if ( !$ci_levels_to_go[$ibeg]
28055             && defined( $i_controlling_ci[$line] ) )
28056             {
28057 27         47 foreach my $i ( @{ $i_controlling_ci[$line] } ) {
  27         65  
28058 60         109 my $seqno = $type_sequence_to_go[$i];
28059 60         133 $undo_extended_ci{$seqno} = 1;
28060             }
28061             }
28062              
28063 3647         6515 $lev_last = $lev;
28064             }
28065              
28066 828         2589 return;
28067             } ## end sub undo_ci
28068             }
28069              
28070             { ## begin closure set_logical_padding
28071             my %is_math_op;
28072              
28073             BEGIN {
28074              
28075 38     38   285 my @q = qw( + - * / );
28076 38         91770 @is_math_op{@q} = (1) x scalar(@q);
28077             }
28078              
28079             sub set_logical_padding {
28080              
28081             # Look at a batch of lines and see if extra padding can improve the
28082             # alignment when there are certain leading operators. Here is an
28083             # example, in which some extra space is introduced before
28084             # '( $year' to make it line up with the subsequent lines:
28085             #
28086             # if ( ( $Year < 1601 )
28087             # || ( $Year > 2899 )
28088             # || ( $EndYear < 1601 )
28089             # || ( $EndYear > 2899 ) )
28090             # {
28091             # &Error_OutOfRange;
28092             # }
28093             #
28094 749     749 0 2127 my ( $self, $ri_first, $ri_last, $starting_in_quote ) = @_;
28095 749         1246 my $max_line = @{$ri_first} - 1;
  749         1615  
28096              
28097 749         1764 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
28098             $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
28099              
28100             # Patch to produce padding in the first line of short code blocks.
28101             # This is part of an update to fix cases b562 .. b983.
28102             # This is needed to compensate for a change which was made in 'sub
28103             # starting_one_line_block' to prevent blinkers. Previously, that sub
28104             # would not look at the total block size and rely on sub
28105             # break_long_lines to break up long blocks. Consequently, the
28106             # first line of those batches would end in the opening block brace of a
28107             # sort/map/grep/eval block. When this was changed to immediately check
28108             # for blocks which were too long, the opening block brace would go out
28109             # in a single batch, and the block contents would go out as the next
28110             # batch. This caused the logic in this routine which decides if the
28111             # first line should be padded to be incorrect. To fix this, we set a
28112             # flag if the previous batch ended in an opening sort/map/grep/eval
28113             # block brace, and use it to adjust the logic to compensate.
28114              
28115             # For example, the following would have previously been a single batch
28116             # but now is two batches. We want to pad the line starting in '$dir':
28117             # my (@indices) = # batch n-1 (prev batch n)
28118             # sort { # batch n-1 (prev batch n)
28119             # $dir eq 'left' # batch n
28120             # ? $cells[$a] <=> $cells[$b] # batch n
28121             # : $cells[$b] <=> $cells[$a]; # batch n
28122             # } ( 0 .. $#cells ); # batch n
28123              
28124 749         1493 my $rLL = $self->[_rLL_];
28125 749         1419 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
28126              
28127 749         1216 my $is_short_block;
28128 749 100       2341 if ( $K_to_go[0] > 0 ) {
28129 632         1334 my $Kp = $K_to_go[0] - 1;
28130 632 100 100     3473 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
28131 589         1220 $Kp -= 1;
28132             }
28133 632 100 100     3203 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
28134 194         411 $Kp -= 1;
28135 194 100 100     1141 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
28136 25         63 $Kp -= 1;
28137             }
28138             }
28139 632         1455 my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
28140 632 100       1765 if ($seqno) {
28141 124         425 my $block_type = $rblock_type_of_seqno->{$seqno};
28142 124 100       455 if ($block_type) {
28143 93         237 $is_short_block = $is_sort_map_grep_eval{$block_type};
28144 93   66     440 $is_short_block ||= $want_one_line_block{$block_type};
28145             }
28146             }
28147             }
28148              
28149             # looking at each line of this batch..
28150 749         2511 foreach my $line ( 0 .. $max_line - 1 ) {
28151              
28152             # see if the next line begins with a logical operator
28153 2807         4249 $ibeg = $ri_first->[$line];
28154 2807         4042 $iend = $ri_last->[$line];
28155 2807         4500 $ibeg_next = $ri_first->[ $line + 1 ];
28156 2807         4558 $tok_next = $tokens_to_go[$ibeg_next];
28157 2807         4222 $type_next = $types_to_go[$ibeg_next];
28158              
28159             $has_leading_op_next = ( $tok_next =~ /^\w/ )
28160             ? $is_chain_operator{$tok_next} # + - * / : ? && ||
28161 2807 100       8828 : $is_chain_operator{$type_next}; # and, or
28162              
28163 2807 100       5848 next unless ($has_leading_op_next);
28164              
28165             # next line must not be at lesser depth
28166             next
28167 322 100       1009 if ( $nesting_depth_to_go[$ibeg] >
28168             $nesting_depth_to_go[$ibeg_next] );
28169              
28170             # identify the token in this line to be padded on the left
28171 287         519 $ipad = undef;
28172              
28173             # handle lines at same depth...
28174 287 100       757 if ( $nesting_depth_to_go[$ibeg] ==
28175             $nesting_depth_to_go[$ibeg_next] )
28176             {
28177              
28178             # if this is not first line of the batch ...
28179 265 100       649 if ( $line > 0 ) {
28180              
28181             # and we have leading operator..
28182 237 100       630 next if $has_leading_op;
28183              
28184             # Introduce padding if..
28185             # 1. the previous line is at lesser depth, or
28186             # 2. the previous line ends in an assignment
28187             # 3. the previous line ends in a 'return'
28188             # 4. the previous line ends in a comma
28189             # Example 1: previous line at lesser depth
28190             # if ( ( $Year < 1601 ) # <- we are here but
28191             # || ( $Year > 2899 ) # list has not yet
28192             # || ( $EndYear < 1601 ) # collapsed vertically
28193             # || ( $EndYear > 2899 ) )
28194             # {
28195             #
28196             # Example 2: previous line ending in assignment:
28197             # $leapyear =
28198             # $year % 4 ? 0 # <- We are here
28199             # : $year % 100 ? 1
28200             # : $year % 400 ? 0
28201             # : 1;
28202             #
28203             # Example 3: previous line ending in comma:
28204             # push @expr,
28205             # /test/ ? undef
28206             # : eval($_) ? 1
28207             # : eval($_) ? 1
28208             # : 0;
28209              
28210             # be sure levels agree (never indent after an indented 'if')
28211             next
28212 78 50       349 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
28213              
28214             # allow padding on first line after a comma but only if:
28215             # (1) this is line 2 and
28216             # (2) there are at more than three lines and
28217             # (3) lines 3 and 4 have the same leading operator
28218             # These rules try to prevent padding within a long
28219             # comma-separated list.
28220 78         167 my $ok_comma;
28221 78 50 66     388 if ( $types_to_go[$iendm] eq ','
      33        
28222             && $line == 1
28223             && $max_line > 2 )
28224             {
28225 0         0 my $ibeg_next_next = $ri_first->[ $line + 2 ];
28226 0         0 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
28227 0         0 $ok_comma = $tok_next_next eq $tok_next;
28228             }
28229              
28230             next
28231             unless (
28232 78 100 66     818 $is_assignment{ $types_to_go[$iendm] }
      100        
      100        
      100        
28233             || $ok_comma
28234             || ( $nesting_depth_to_go[$ibegm] <
28235             $nesting_depth_to_go[$ibeg] )
28236             || ( $types_to_go[$iendm] eq 'k'
28237             && $tokens_to_go[$iendm] eq 'return' )
28238             );
28239              
28240             # we will add padding before the first token
28241 56         166 $ipad = $ibeg;
28242             }
28243              
28244             # for first line of the batch..
28245             else {
28246              
28247             # WARNING: Never indent if first line is starting in a
28248             # continued quote, which would change the quote.
28249 28 50       123 next if $starting_in_quote;
28250              
28251             # if this is text after closing '}'
28252             # then look for an interior token to pad
28253 28 50       143 if ( $types_to_go[$ibeg] eq '}' ) {
    100          
28254              
28255             }
28256              
28257             # otherwise, we might pad if it looks really good
28258             elsif ($is_short_block) {
28259 2         5 $ipad = $ibeg;
28260             }
28261             else {
28262              
28263             # we might pad token $ibeg, so be sure that it
28264             # is at the same depth as the next line.
28265             next
28266 26 50       95 if ( $nesting_depth_to_go[$ibeg] !=
28267             $nesting_depth_to_go[$ibeg_next] );
28268              
28269             # We can pad on line 1 of a statement if at least 3
28270             # lines will be aligned. Otherwise, it
28271             # can look very confusing.
28272              
28273             # We have to be careful not to pad if there are too few
28274             # lines. The current rule is:
28275             # (1) in general we require at least 3 consecutive lines
28276             # with the same leading chain operator token,
28277             # (2) but an exception is that we only require two lines
28278             # with leading colons if there are no more lines. For example,
28279             # the first $i in the following snippet would get padding
28280             # by the second rule:
28281             #
28282             # $i == 1 ? ( "First", "Color" )
28283             # : $i == 2 ? ( "Then", "Rarity" )
28284             # : ( "Then", "Name" );
28285              
28286 26 100       97 next if ( $max_line <= 1 );
28287              
28288 10         32 my $leading_token = $tokens_to_go[$ibeg_next];
28289 10         23 my $tokens_differ;
28290              
28291             # never indent line 1 of a '.' series because
28292             # previous line is most likely at same level.
28293             # TODO: we should also look at the leading_spaces
28294             # of the last output line and skip if it is same
28295             # as this line.
28296 10 100       45 next if ( $leading_token eq '.' );
28297              
28298 7         17 my $count = 1;
28299 7         39 foreach my $l ( 2 .. 3 ) {
28300 11 50       34 last if ( $line + $l > $max_line );
28301 11         19 $count++;
28302 11         30 my $ibeg_next_next = $ri_first->[ $line + $l ];
28303             next
28304 11 100       46 if ( $tokens_to_go[$ibeg_next_next] eq
28305             $leading_token );
28306 4         12 $tokens_differ = 1;
28307 4         12 last;
28308             }
28309 7 100       34 next if ($tokens_differ);
28310 3 50 33     20 next if ( $count < 3 && $leading_token ne ':' );
28311 3         8 $ipad = $ibeg;
28312             }
28313             }
28314             }
28315              
28316             # find interior token to pad if necessary
28317 83 100       1162 if ( !defined($ipad) ) {
28318              
28319 22         88 foreach my $i ( $ibeg .. $iend - 1 ) {
28320              
28321             # find any unclosed container
28322             next
28323 61 50 66     301 unless ( $type_sequence_to_go[$i]
      66        
28324             && defined( $mate_index_to_go[$i] )
28325             && $mate_index_to_go[$i] > $iend );
28326              
28327             # find next nonblank token to pad
28328 22         48 $ipad = $inext_to_go[$i];
28329 22 50       86 last if $ipad;
28330             }
28331 22 50 33     179 last if ( !$ipad || $ipad > $iend );
28332             }
28333              
28334             # We cannot pad the first leading token of a file because
28335             # it could cause a bug in which the starting indentation
28336             # level is guessed incorrectly each time the code is run
28337             # though perltidy, thus causing the code to march off to
28338             # the right. For example, the following snippet would have
28339             # this problem:
28340              
28341             ## ov_method mycan( $package, '(""' ), $package
28342             ## or ov_method mycan( $package, '(0+' ), $package
28343             ## or ov_method mycan( $package, '(bool' ), $package
28344             ## or ov_method mycan( $package, '(nomethod' ), $package;
28345              
28346             # If this snippet is within a block this won't happen
28347             # unless the user just processes the snippet alone within
28348             # an editor. In that case either the user will see and
28349             # fix the problem or it will be corrected next time the
28350             # entire file is processed with perltidy.
28351 83         242 my $this_batch = $self->[_this_batch_];
28352 83         200 my $peak_batch_size = $this_batch->[_peak_batch_size_];
28353 83 50 66     338 next if ( $ipad == 0 && $peak_batch_size <= 1 );
28354              
28355             # next line must not be at greater depth
28356 83         216 my $iend_next = $ri_last->[ $line + 1 ];
28357             next
28358 83 100       341 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
28359             $nesting_depth_to_go[$ipad] );
28360              
28361             # lines must be somewhat similar to be padded..
28362 77         198 my $inext_next = $inext_to_go[$ibeg_next];
28363 77         189 my $type = $types_to_go[$ipad];
28364              
28365             # see if there are multiple continuation lines
28366 77         191 my $logical_continuation_lines = 1;
28367 77 100       272 if ( $line + 2 <= $max_line ) {
28368 71         162 my $leading_token = $tokens_to_go[$ibeg_next];
28369 71         186 my $ibeg_next_next = $ri_first->[ $line + 2 ];
28370 71 100 66     410 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
28371             && $nesting_depth_to_go[$ibeg_next] eq
28372             $nesting_depth_to_go[$ibeg_next_next] )
28373             {
28374 42         107 $logical_continuation_lines++;
28375             }
28376             }
28377              
28378             # see if leading types match
28379 77         244 my $types_match = $types_to_go[$inext_next] eq $type;
28380 77         172 my $matches_without_bang;
28381              
28382             # if first line has leading ! then compare the following token
28383 77 100 100     406 if ( !$types_match && $type eq '!' ) {
28384 4         17 $types_match = $matches_without_bang =
28385             $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
28386             }
28387 77 100 100     830 if (
      100        
      100        
      100        
      100        
28388              
28389             # either we have multiple continuation lines to follow
28390             # and we are not padding the first token
28391             (
28392             $logical_continuation_lines > 1
28393             && ( $ipad > 0 || $is_short_block )
28394             )
28395              
28396             # or..
28397             || (
28398              
28399             # types must match
28400             $types_match
28401              
28402             # and keywords must match if keyword
28403             && !(
28404             $type eq 'k'
28405             && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
28406             )
28407             )
28408             )
28409             {
28410              
28411             #----------------------begin special checks--------------
28412             #
28413             # SPECIAL CHECK 1:
28414             # A check is needed before we can make the pad.
28415             # If we are in a list with some long items, we want each
28416             # item to stand out. So in the following example, the
28417             # first line beginning with '$casefold->' would look good
28418             # padded to align with the next line, but then it
28419             # would be indented more than the last line, so we
28420             # won't do it.
28421             #
28422             # ok(
28423             # $casefold->{code} eq '0041'
28424             # && $casefold->{status} eq 'C'
28425             # && $casefold->{mapping} eq '0061',
28426             # 'casefold 0x41'
28427             # );
28428             #
28429             # Note:
28430             # It would be faster, and almost as good, to use a comma
28431             # count, and not pad if comma_count > 1 and the previous
28432             # line did not end with a comma.
28433             #
28434 56         145 my $ok_to_pad = 1;
28435              
28436 56         177 my $ibg = $ri_first->[ $line + 1 ];
28437 56         136 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
28438              
28439             # just use simplified formula for leading spaces to avoid
28440             # needless sub calls
28441 56         134 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
28442              
28443             # look at each line beyond the next ..
28444 56         124 my $l = $line + 1;
28445 56         201 foreach my $ltest ( $line + 2 .. $max_line ) {
28446 171         260 $l = $ltest;
28447 171         292 my $ibeg_t = $ri_first->[$l];
28448              
28449             # quit looking at the end of this container
28450             last
28451 171 100 100     643 if ( $nesting_depth_to_go[ $ibeg_t + 1 ] < $depth )
28452             || ( $nesting_depth_to_go[$ibeg_t] < $depth );
28453              
28454             # cannot do the pad if a later line would be
28455             # outdented more
28456 152 100       417 if ( $levels_to_go[$ibeg_t] + $ci_levels_to_go[$ibeg_t] <
28457             $lsp )
28458             {
28459 2         17 $ok_to_pad = 0;
28460 2         5 last;
28461             }
28462             }
28463              
28464             # don't pad if we end in a broken list
28465 56 100       268 if ( $l == $max_line ) {
28466 41         95 my $i2 = $ri_last->[$l];
28467 41 100       166 if ( $types_to_go[$i2] eq '#' ) {
28468 1         3 my $i1 = $ri_first->[$l];
28469 1 50       6 next if terminal_type_i( $i1, $i2 ) eq ',';
28470             }
28471             }
28472              
28473             # SPECIAL CHECK 2:
28474             # a minus may introduce a quoted variable, and we will
28475             # add the pad only if this line begins with a bare word,
28476             # such as for the word 'Button' here:
28477             # [
28478             # Button => "Print letter \"~$_\"",
28479             # -command => [ sub { print "$_[0]\n" }, $_ ],
28480             # -accelerator => "Meta+$_"
28481             # ];
28482             #
28483             # On the other hand, if 'Button' is quoted, it looks best
28484             # not to pad:
28485             # [
28486             # 'Button' => "Print letter \"~$_\"",
28487             # -command => [ sub { print "$_[0]\n" }, $_ ],
28488             # -accelerator => "Meta+$_"
28489             # ];
28490 56 50       222 if ( $types_to_go[$ibeg_next] eq 'm' ) {
28491 0 0       0 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
28492             }
28493              
28494 56 100       170 next unless $ok_to_pad;
28495              
28496             #----------------------end special check---------------
28497              
28498 54         235 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
28499 54         259 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
28500 54         142 $pad_spaces = $length_2 - $length_1;
28501              
28502             # If the first line has a leading ! and the second does
28503             # not, then remove one space to try to align the next
28504             # leading characters, which are often the same. For example:
28505             # if ( !$ts
28506             # || $ts == $self->Holder
28507             # || $self->Holder->Type eq "Arena" )
28508             #
28509             # This usually helps readability, but if there are subsequent
28510             # ! operators things will still get messed up. For example:
28511             #
28512             # if ( !exists $Net::DNS::typesbyname{$qtype}
28513             # && exists $Net::DNS::classesbyname{$qtype}
28514             # && !exists $Net::DNS::classesbyname{$qclass}
28515             # && exists $Net::DNS::typesbyname{$qclass} )
28516             # We can't fix that.
28517 54 100       173 if ($matches_without_bang) { $pad_spaces-- }
  4         13  
28518              
28519             # make sure this won't change if -lp is used
28520 54         144 my $indentation_1 = $leading_spaces_to_go[$ibeg];
28521 54 50 33     234 if ( ref($indentation_1)
28522             && $indentation_1->get_recoverable_spaces() == 0 )
28523             {
28524 0         0 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
28525 0 0 0     0 if ( ref($indentation_2)
28526             && $indentation_2->get_recoverable_spaces() != 0 )
28527             {
28528 0         0 $pad_spaces = 0;
28529             }
28530             }
28531              
28532             # we might be able to handle a pad of -1 by removing a blank
28533             # token
28534 54 100       218 if ( $pad_spaces < 0 ) {
28535              
28536             # Deactivated for -kpit due to conflict. This block deletes
28537             # a space in an attempt to improve alignment in some cases,
28538             # but it may conflict with user spacing requests. For now
28539             # it is just deactivated if the -kpit option is used.
28540 5 100       30 if ( $pad_spaces == -1 ) {
28541 3 100 33     61 if ( $ipad > $ibeg
      66        
28542             && $types_to_go[ $ipad - 1 ] eq 'b'
28543             && !%keyword_paren_inner_tightness )
28544             {
28545 2         15 $self->pad_token( $ipad - 1, $pad_spaces );
28546             }
28547             }
28548 5         15 $pad_spaces = 0;
28549             }
28550              
28551             # now apply any padding for alignment
28552 54 100 66     327 if ( $ipad >= 0 && $pad_spaces ) {
28553              
28554 47         143 my $length_t = total_line_length( $ibeg, $iend );
28555 47 50       291 if ( $pad_spaces + $length_t <=
28556             $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
28557             {
28558 47         247 $self->pad_token( $ipad, $pad_spaces );
28559             }
28560             }
28561             }
28562             }
28563             continue {
28564 2807         3982 $iendm = $iend;
28565 2807         3773 $ibegm = $ibeg;
28566 2807         4611 $has_leading_op = $has_leading_op_next;
28567             } ## end of loop over lines
28568 749         1985 return;
28569             } ## end sub set_logical_padding
28570             } ## end closure set_logical_padding
28571              
28572             sub pad_token {
28573              
28574             # insert $pad_spaces before token number $ipad
28575 57     57 0 196 my ( $self, $ipad, $pad_spaces ) = @_;
28576 57         137 my $rLL = $self->[_rLL_];
28577 57         117 my $KK = $K_to_go[$ipad];
28578 57         153 my $tok = $rLL->[$KK]->[_TOKEN_];
28579 57         135 my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
28580              
28581 57 100 33     217 if ( $pad_spaces > 0 ) {
    50          
    50          
28582 55         234 $tok = SPACE x $pad_spaces . $tok;
28583 55         140 $tok_len += $pad_spaces;
28584             }
28585             elsif ( $pad_spaces == 0 ) {
28586 0         0 return;
28587             }
28588             elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq SPACE ) {
28589 2         5 $tok = EMPTY_STRING;
28590 2         6 $tok_len = 0;
28591             }
28592             else {
28593              
28594             # shouldn't happen
28595 0         0 DEVEL_MODE
28596             && Fault("unexpected request for pad spaces = $pad_spaces\n");
28597 0         0 return;
28598             }
28599              
28600 57         195 $tok = $rLL->[$KK]->[_TOKEN_] = $tok;
28601 57         180 $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
28602              
28603 57         121 $token_lengths_to_go[$ipad] += $pad_spaces;
28604 57         145 $tokens_to_go[$ipad] = $tok;
28605              
28606 57         190 foreach my $i ( $ipad .. $max_index_to_go ) {
28607 3019         4144 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
28608             }
28609 57         213 return;
28610             } ## end sub pad_token
28611              
28612             sub xlp_tweak {
28613              
28614             # Remove one indentation space from unbroken containers marked with
28615             # 'K_extra_space'. These are mostly two-line lists with short names
28616             # formatted with -xlp -pt=2.
28617             #
28618             # Before this fix (extra space in line 2):
28619             # is($module->VERSION, $expected,
28620             # "$main_module->VERSION matches $module->VERSION ($expected)");
28621             #
28622             # After this fix:
28623             # is($module->VERSION, $expected,
28624             # "$main_module->VERSION matches $module->VERSION ($expected)");
28625             #
28626             # Notes:
28627             # - This fixes issue git #106
28628             # - This must be called after 'set_logical_padding'.
28629             # - This is currently only applied to -xlp. It would also work for -lp
28630             # but that style is essentially frozen.
28631              
28632 33     33 0 62 my ( $self, $ri_first, $ri_last ) = @_;
28633              
28634             # Must be 2 or more lines
28635 33 50       47 return unless ( @{$ri_first} > 1 );
  33         76  
28636              
28637             # Pull indentation object from start of second line
28638 33         51 my $ibeg_1 = $ri_first->[1];
28639 33         56 my $lp_object = $leading_spaces_to_go[$ibeg_1];
28640 33 100       77 return if ( !ref($lp_object) );
28641              
28642             # This only applies to an indentation object with a marked token
28643 28         82 my $K_extra_space = $lp_object->get_K_extra_space();
28644 28 100       76 return unless ($K_extra_space);
28645              
28646             # Look for the marked token within the first line of this batch
28647 3         7 my $ibeg_0 = $ri_first->[0];
28648 3         5 my $iend_0 = $ri_last->[0];
28649 3         6 my $ii = $ibeg_0 + $K_extra_space - $K_to_go[$ibeg_0];
28650 3 50 33     13 return if ( $ii <= $ibeg_0 || $ii > $iend_0 );
28651              
28652             # Skip padded tokens, they have already been aligned
28653 3         7 my $tok = $tokens_to_go[$ii];
28654 3 100       21 return if ( substr( $tok, 0, 1 ) eq SPACE );
28655              
28656             # Skip 'if'-like statements, this does not improve them
28657             return
28658             if ( $types_to_go[$ibeg_0] eq 'k'
28659 2 50 66     14 && $is_if_unless_elsif{ $tokens_to_go[$ibeg_0] } );
28660              
28661             # Looks okay, reduce indentation by 1 space if possible
28662 2         7 my $spaces = $lp_object->get_spaces();
28663 2 50       6 if ( $spaces > 0 ) {
28664 2         6 $lp_object->decrease_SPACES(1);
28665             }
28666              
28667 2         19 return;
28668             } ## end sub xlp_tweak
28669              
28670             { ## begin closure make_alignment_patterns
28671              
28672             my %keyword_map;
28673             my %operator_map;
28674             my %is_w_n_C;
28675             my %is_my_local_our;
28676             my %is_kwU;
28677             my %is_use_like;
28678             my %is_binary_type;
28679             my %is_binary_keyword;
28680             my %name_map;
28681              
28682             BEGIN {
28683              
28684             # Note: %block_type_map is now global to enable the -gal=s option
28685              
28686             # Map certain keywords to the same 'if' class to align
28687             # long if/elsif sequences. [elsif.pl]. But note that this is
28688             # only for purposes of making the patterns, not alignment tokens.
28689             # The only possible equivalent alignment tokens are 'if' and 'unless',
28690             # and this is handled earlier under control of $rOpts_valign_if_unless
28691             # to avoid making this a global hash.
28692 38     38   493 %keyword_map = (
28693             'unless' => 'if',
28694             'else' => 'if',
28695             'elsif' => 'if',
28696             'when' => 'given',
28697             'default' => 'given',
28698             'case' => 'switch',
28699              
28700             # treat an 'undef' similar to numbers and quotes
28701             'undef' => 'Q',
28702             );
28703              
28704             # Map certain operators to the same class for alignment.
28705             # Note that this map is for the alignment tokens, not the patterns.
28706             # We could have placed 'unless' => 'if' here, but since that is
28707             # under control of $rOpts_valign_if_unless, it is handled elsewhere.
28708 38         212 %operator_map = (
28709             '!~' => '=~',
28710             '+=' => '+=',
28711             '-=' => '+=',
28712             '*=' => '+=',
28713             '/=' => '+=',
28714             );
28715              
28716 38         156 %is_w_n_C = (
28717             'w' => 1,
28718             'n' => 1,
28719             'C' => 1,
28720             );
28721              
28722             # leading keywords which to skip for efficiency when making parenless
28723             # container names
28724 38         146 my @q = qw( my local our return );
28725 38         213 @{is_my_local_our}{@q} = (1) x scalar(@q);
28726              
28727             # leading keywords where we should just join one token to form
28728             # parenless name
28729 38         105 @q = qw( use );
28730 38         144 @{is_use_like}{@q} = (1) x scalar(@q);
28731              
28732             # leading token types which may be used to make a container name
28733 38         120 @q = qw( k w U );
28734 38         151 @{is_kwU}{@q} = (1) x scalar(@q);
28735              
28736             # token types which prevent using leading word as a container name
28737 38         645 @q = qw(
28738             x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <=
28739             %= ^= x= ~~ ** << /= &= // >> ~. &. |. ^.
28740             **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
28741             );
28742 38         137 push @q, ',';
28743 38         810 @{is_binary_type}{@q} = (1) x scalar(@q);
28744              
28745             # token keywords which prevent using leading word as a container name
28746 38         184 @q = qw(and or err eq ne cmp);
28747 38         255 @is_binary_keyword{@q} = (1) x scalar(@q);
28748              
28749             # Some common function calls whose args can be aligned. These do not
28750             # give good alignments if the lengths differ significantly.
28751 38         311827 %name_map = (
28752             'unlike' => 'like',
28753             'isnt' => 'is',
28754             ##'is_deeply' => 'is', # poor; names lengths too different
28755             );
28756              
28757             } ## end BEGIN
28758              
28759             sub make_alignment_patterns {
28760              
28761 6082     6082 0 17404 my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
28762             $ralignment_hash )
28763             = @_;
28764              
28765             #------------------------------------------------------------------
28766             # This sub creates arrays of vertical alignment info for one output
28767             # line.
28768             #------------------------------------------------------------------
28769              
28770             # Input parameters:
28771             # $ibeg, $iend - index range of this line in the _to_go arrays
28772             # $ralignment_type_to_go - alignment type of tokens, like '=', if any
28773             # $alignment_count - number of alignment tokens in the line
28774             # $ralignment_hash - this contains all of the alignments for this
28775             # line. It is not yet used but is available for future coding in
28776             # case there is a need to do a preliminary scan of alignment tokens.
28777              
28778             # The arrays which are created contain strings that can be tested by
28779             # the vertical aligner to see if consecutive lines can be aligned
28780             # vertically.
28781             #
28782             # The four arrays are indexed on the vertical
28783             # alignment fields and are:
28784             # @tokens - a list of any vertical alignment tokens for this line.
28785             # These are tokens, such as '=' '&&' '#' etc which
28786             # we want to might align vertically. These are
28787             # decorated with various information such as
28788             # nesting depth to prevent unwanted vertical
28789             # alignment matches.
28790             # @fields - the actual text of the line between the vertical alignment
28791             # tokens.
28792             # @patterns - a modified list of token types, one for each alignment
28793             # field. These should normally each match before alignment is
28794             # allowed, even when the alignment tokens match.
28795             # @field_lengths - the display width of each field
28796              
28797 6082         9020 if (DEVEL_MODE) {
28798             my $new_count = 0;
28799             if ( defined($ralignment_hash) ) {
28800             $new_count = keys %{$ralignment_hash};
28801             }
28802             my $old_count = $alignment_count;
28803             $old_count = 0 unless ($old_count);
28804             if ( $new_count != $old_count ) {
28805             my $K = $K_to_go[$ibeg];
28806             my $rLL = $self->[_rLL_];
28807             my $lnl = $rLL->[$K]->[_LINE_INDEX_];
28808             Fault(
28809             "alignment hash token count gives count=$new_count but old count is $old_count near line=$lnl\n"
28810             );
28811             }
28812             }
28813              
28814             # -------------------------------------
28815             # Shortcut for lines without alignments
28816             # -------------------------------------
28817 6082 100       12610 if ( !$alignment_count ) {
28818 3077         5731 my $rtokens = [];
28819 3077         8499 my $rfield_lengths =
28820             [ $summed_lengths_to_go[ $iend + 1 ] -
28821             $summed_lengths_to_go[$ibeg] ];
28822 3077         5292 my $rpatterns;
28823             my $rfields;
28824 3077 100       6186 if ( $ibeg == $iend ) {
28825 593         1728 $rfields = [ $tokens_to_go[$ibeg] ];
28826 593         1758 $rpatterns = [ $types_to_go[$ibeg] ];
28827             }
28828             else {
28829 2484         11017 $rfields =
28830             [ join( EMPTY_STRING, @tokens_to_go[ $ibeg .. $iend ] ) ];
28831 2484         8146 $rpatterns =
28832             [ join( EMPTY_STRING, @types_to_go[ $ibeg .. $iend ] ) ];
28833             }
28834 3077         9569 return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
28835             }
28836              
28837 3005         5174 my $i_start = $ibeg;
28838 3005         4656 my $depth = 0;
28839 3005         4492 my $i_depth_prev = $i_start;
28840 3005         4400 my $depth_prev = $depth;
28841 3005         7436 my %container_name = ( 0 => EMPTY_STRING );
28842 3005         4787 my $saw_exclamation_mark = 0;
28843              
28844 3005         4951 my @tokens = ();
28845 3005         4534 my @fields = ();
28846 3005         4516 my @patterns = ();
28847 3005         4549 my @field_lengths = ();
28848              
28849             #-------------------------------------------------------------
28850             # Make a container name for any uncontained commas, issue c089
28851             #-------------------------------------------------------------
28852             # This is a generalization of the fix for rt136416 which was a
28853             # specialized patch just for 'use Module' statements.
28854             # We restrict this to semicolon-terminated statements; that way
28855             # we know that the top level commas are not in a list container.
28856 3005 100 100     10432 if ( $ibeg == 0 && $iend == $max_index_to_go ) {
28857 1563         2703 my $iterm = $max_index_to_go;
28858 1563 100       4286 if ( $types_to_go[$iterm] eq '#' ) {
28859 289         838 $iterm = iprev_to_go($iterm);
28860             }
28861              
28862             # Alignment lines ending like '=> sub {'; fixes issue c093
28863 1563         3048 my $term_type_ok = $types_to_go[$iterm] eq ';';
28864 1563   66     5917 $term_type_ok ||=
      100        
28865             $tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm];
28866              
28867 1563 100 100     11003 if ( $iterm > $ibeg
      100        
      66        
28868             && $term_type_ok
28869             && !$is_my_local_our{ $tokens_to_go[$ibeg] }
28870             && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
28871             {
28872 846         2453 $container_name{'0'} =
28873             make_uncontained_comma_name( $iterm, $ibeg, $iend );
28874             }
28875             }
28876              
28877             #--------------------------------
28878             # Begin main loop over all tokens
28879             #--------------------------------
28880 3005         5009 my $j = 0; # field index
28881              
28882 3005         6265 $patterns[0] = EMPTY_STRING;
28883 3005         4696 my %token_count;
28884 3005         6586 for my $i ( $ibeg .. $iend ) {
28885              
28886             #-------------------------------------------------------------
28887             # Part 1: keep track of containers balanced on this line only.
28888             #-------------------------------------------------------------
28889             # These are used below to prevent unwanted cross-line alignments.
28890             # Unbalanced containers already avoid aligning across
28891             # container boundaries.
28892 36865         52225 my $type = $types_to_go[$i];
28893 36865 100       61672 if ( $type_sequence_to_go[$i] ) {
28894 5276         8822 my $token = $tokens_to_go[$i];
28895 5276 100       13536 if ( $is_opening_token{$token} ) {
    100          
28896              
28897             # if container is balanced on this line...
28898 2703         4774 my $i_mate = $mate_index_to_go[$i];
28899 2703 100       6075 if ( !defined($i_mate) ) { $i_mate = -1 }
  302         665  
28900 2703 100 100     9804 if ( $i_mate > $i && $i_mate <= $iend ) {
28901 2180         3363 $i_depth_prev = $i;
28902 2180         3219 $depth_prev = $depth;
28903 2180         3254 $depth++;
28904              
28905             # Append the previous token name to make the container name
28906             # more unique. This name will also be given to any commas
28907             # within this container, and it helps avoid undesirable
28908             # alignments of different types of containers.
28909              
28910             # Containers beginning with { and [ are given those names
28911             # for uniqueness. That way commas in different containers
28912             # will not match. Here is an example of what this prevents:
28913             # a => [ 1, 2, 3 ],
28914             # b => { b1 => 4, b2 => 5 },
28915             # Here is another example of what we avoid by labeling the
28916             # commas properly:
28917              
28918             # is_d( [ $a, $a ], [ $b, $c ] );
28919             # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
28920             # is_d( [ \$a, \$a ], [ \$b, \$c ] );
28921              
28922 2180 100       6263 my $name =
28923             $token eq '(' ? $self->make_paren_name($i) : $token;
28924              
28925             # name cannot be '.', so change to something else if so
28926 2180 100       5084 if ( $name eq '.' ) { $name = 'dot' }
  1         3  
28927              
28928 2180         5636 $container_name{$depth} = "+" . $name;
28929              
28930             # Make the container name even more unique if necessary.
28931             # If we are not vertically aligning this opening paren,
28932             # append a character count to avoid bad alignment since
28933             # it usually looks bad to align commas within containers
28934             # for which the opening parens do not align. Here
28935             # is an example very BAD alignment of commas (because
28936             # the atan2 functions are not all aligned):
28937             # $XY =
28938             # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
28939             # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
28940             # $X * atan2( $X, 1 ) -
28941             # $Y * atan2( $Y, 1 );
28942             #
28943             # On the other hand, it is usually okay to align commas
28944             # if opening parens align, such as:
28945             # glVertex3d( $cx + $s * $xs, $cy, $z );
28946             # glVertex3d( $cx, $cy + $s * $ys, $z );
28947             # glVertex3d( $cx - $s * $xs, $cy, $z );
28948             # glVertex3d( $cx, $cy - $s * $ys, $z );
28949             #
28950             # To distinguish between these situations, we append
28951             # the length of the line from the previous matching
28952             # token, or beginning of line, to the function name.
28953             # This will allow the vertical aligner to reject
28954             # undesirable matches.
28955              
28956             # if we are not aligning on this paren...
28957 2180 100       5303 if ( !$ralignment_type_to_go->[$i] ) {
28958              
28959             # Add the length to the name ...
28960 1658         3342 my $len = $summed_lengths_to_go[$i] -
28961             $summed_lengths_to_go[$i_start];
28962              
28963             # Do not include the length of any '!'. Otherwise,
28964             # commas in the following line will not match:
28965             # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
28966             # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
28967 1658 100       3648 if ($saw_exclamation_mark) { $len -= 1 }
  36         93  
28968              
28969             # For first token, use distance from start of line
28970             # but subtract off the indentation due to level.
28971             # Otherwise, results could vary with indentation.
28972 1658 100       3674 if ( $i_start == $ibeg ) {
28973 728         2191 $len +=
28974             leading_spaces_to_go($ibeg) -
28975             $levels_to_go[$i_start] *
28976             $rOpts_indent_columns;
28977             }
28978 1658 50       3784 if ( $len < 0 ) { $len = 0 }
  0         0  
28979              
28980             # tack this length onto the container name to try
28981             # to make a unique token name
28982 1658         3852 $container_name{$depth} .= "-" . $len;
28983             } ## end if ( !$ralignment_type_to_go...)
28984             } ## end if ( $i_mate > $i && $i_mate...)
28985             } ## end if ( $is_opening_token...)
28986              
28987             elsif ( $is_closing_type{$token} ) {
28988 2305         4188 $i_depth_prev = $i;
28989 2305         3633 $depth_prev = $depth;
28990 2305 100       5348 $depth-- if $depth > 0;
28991             }
28992             } ## end if ( $type_sequence_to_go...)
28993              
28994             #------------------------------------------------------------
28995             # Part 2: if we find a new synchronization token, we are done
28996             # with a field
28997             #------------------------------------------------------------
28998 36865 100 100     100676 if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
28999              
29000 5277         10089 my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
29001              
29002             # map similar items
29003 5277         9263 my $tok_map = $operator_map{$tok};
29004 5277 100       9996 $tok = $tok_map if ($tok_map);
29005              
29006             # make separators in different nesting depths unique
29007             # by appending the nesting depth digit.
29008 5277 100       10745 if ( $raw_tok ne '#' ) {
29009 4952         10270 $tok .= "$nesting_depth_to_go[$i]";
29010             }
29011              
29012             # also decorate commas with any container name to avoid
29013             # unwanted cross-line alignments.
29014 5277 100 100     16915 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
29015              
29016             # If we are at an opening token which increased depth, we have
29017             # to use the name from the previous depth.
29018 2739 100       5886 my $depth_last = $i == $i_depth_prev ? $depth_prev : $depth;
29019 2739 100       5279 my $depth_p =
29020             ( $depth_last < $depth ? $depth_last : $depth );
29021 2739 100       5759 if ( $container_name{$depth_p} ) {
29022 1404         2601 $tok .= $container_name{$depth_p};
29023             }
29024             }
29025              
29026             # Patch to avoid aligning leading and trailing if, unless.
29027             # Mark trailing if, unless statements with container names.
29028             # This makes them different from leading if, unless which
29029             # are not so marked at present. If we ever need to name
29030             # them too, we could use ci to distinguish them.
29031             # Example problem to avoid:
29032             # return ( 2, "DBERROR" )
29033             # if ( $retval == 2 );
29034             # if ( scalar @_ ) {
29035             # my ( $a, $b, $c, $d, $e, $f ) = @_;
29036             # }
29037 5277 100       10503 if ( $raw_tok eq '(' ) {
29038 205 100 100     974 if ( $ci_levels_to_go[$ibeg]
29039             && $container_name{$depth} =~ /^\+(if|unless)/ )
29040             {
29041 1         4 $tok .= $container_name{$depth};
29042             }
29043             }
29044              
29045             # Decorate block braces with block types to avoid
29046             # unwanted alignments such as the following:
29047             # foreach ( @{$routput_array} ) { $fh->print($_) }
29048             # eval { $fh->close() };
29049 5277 100 100     11147 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
29050 234         538 my $block_type = $block_type_to_go[$i];
29051              
29052             # map certain related block types to allow
29053             # else blocks to align
29054             $block_type = $block_type_map{$block_type}
29055 234 100       959 if ( defined( $block_type_map{$block_type} ) );
29056              
29057             # remove sub names to allow one-line sub braces to align
29058             # regardless of name
29059 234 100       1811 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
  45         122  
29060              
29061             # allow all control-type blocks to align
29062 234 100       1077 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
  12         29  
29063              
29064 234         535 $tok .= $block_type;
29065              
29066             # Avoid aligning opening braces across leading ci level
29067             # changes by marking block type with _ci (issue c224)
29068 234 100       725 if ( $ci_levels_to_go[$ibeg] ) { $tok .= '_1' }
  24         64  
29069             }
29070              
29071             # Mark multiple copies of certain tokens with the copy number
29072             # This will allow the aligner to decide if they are matched.
29073             # For now, only do this for equals. For example, the two
29074             # equals on the next line will be labeled '=0' and '=0.2'.
29075             # Later, the '=0.2' will be ignored in alignment because it
29076             # has no match.
29077              
29078             # $| = $debug = 1 if $opt_d;
29079             # $full_index = 1 if $opt_i;
29080              
29081 5277 100 100     16458 if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
29082 2007         5076 $token_count{$tok}++;
29083 2007 100       4929 if ( $token_count{$tok} > 1 ) {
29084 193         594 $tok .= '.' . $token_count{$tok};
29085             }
29086             }
29087              
29088             # concatenate the text of the consecutive tokens to form
29089             # the field
29090 5277         20503 push( @fields,
29091             join( EMPTY_STRING, @tokens_to_go[ $i_start .. $i - 1 ] ) );
29092              
29093 5277         11972 push @field_lengths,
29094             $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
29095              
29096             # store the alignment token for this field
29097 5277         10010 push( @tokens, $tok );
29098              
29099             # get ready for the next batch
29100 5277         7814 $i_start = $i;
29101 5277         7344 $saw_exclamation_mark = 0;
29102 5277         7150 $j++;
29103 5277         9630 $patterns[$j] = EMPTY_STRING;
29104             } ## end if ( new synchronization token
29105              
29106             #-----------------------------------------------
29107             # Part 3: continue accumulating the next pattern
29108             #-----------------------------------------------
29109              
29110             # for keywords we have to use the actual text
29111 36865 100       88147 if ( $type eq 'k' ) {
    100          
    100          
    100          
29112              
29113 1833         3517 my $tok_fix = $tokens_to_go[$i];
29114              
29115             # but map certain keywords to a common string to allow
29116             # alignment.
29117             $tok_fix = $keyword_map{$tok_fix}
29118 1833 100       5532 if ( defined( $keyword_map{$tok_fix} ) );
29119 1833         4207 $patterns[$j] .= $tok_fix;
29120             }
29121              
29122             elsif ( $type eq 'b' ) {
29123 13124         21779 $patterns[$j] .= $type;
29124             }
29125              
29126             # Mark most things before arrows as a quote to
29127             # get them to line up. Testfile: mixed.pl.
29128              
29129             # handle $type =~ /^[wnC]$/
29130             elsif ( $is_w_n_C{$type} ) {
29131              
29132 2621         4677 my $type_fix = $type;
29133              
29134 2621 100       6420 if ( $i < $iend - 1 ) {
29135 2330         4345 my $next_type = $types_to_go[ $i + 1 ];
29136 2330 100       4958 my $i_next_nonblank =
29137             ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
29138              
29139 2330 100       5846 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
29140 789         1370 $type_fix = 'Q';
29141              
29142             # Patch to ignore leading minus before words,
29143             # by changing pattern 'mQ' into just 'Q',
29144             # so that we can align things like this:
29145             # Button => "Print letter \"~$_\"",
29146             # -command => [ sub { print "$_[0]\n" }, $_ ],
29147 789 100       1974 if ( $patterns[$j] eq 'm' ) {
29148 212         425 $patterns[$j] = EMPTY_STRING;
29149             }
29150             }
29151             }
29152              
29153             # Convert a bareword within braces into a quote for
29154             # matching. This will allow alignment of expressions like
29155             # this:
29156             # local ( $SIG{'INT'} ) = IGNORE;
29157             # local ( $SIG{ALRM} ) = 'POSTMAN';
29158 2621 100 100     11969 if ( $type eq 'w'
      100        
      100        
      66        
29159             && $i > $ibeg
29160             && $i < $iend
29161             && $types_to_go[ $i - 1 ] eq 'L'
29162             && $types_to_go[ $i + 1 ] eq 'R' )
29163             {
29164 68         187 $type_fix = 'Q';
29165             }
29166              
29167             # patch to make numbers and quotes align
29168 2621 100       5653 if ( $type eq 'n' ) { $type_fix = 'Q' }
  1395         2420  
29169              
29170 2621         5260 $patterns[$j] .= $type_fix;
29171             } ## end elsif ( $is_w_n_C{$type} )
29172              
29173             # ignore any ! in patterns
29174             elsif ( $type eq '!' ) {
29175 43         176 $saw_exclamation_mark = 1;
29176             }
29177              
29178             # everything else
29179             else {
29180 19244         29318 $patterns[$j] .= $type;
29181              
29182             # remove any zero-level name at first fat comma
29183 19244 100 100     56255 if ( $depth == 0 && $type eq '=>' ) {
29184 613         1561 $container_name{$depth} = EMPTY_STRING;
29185             }
29186             }
29187              
29188             } ## end for my $i ( $ibeg .. $iend)
29189              
29190             #---------------------------------------------------------------
29191             # End of main loop .. join text of tokens to make the last field
29192             #---------------------------------------------------------------
29193 3005         13005 push( @fields,
29194             join( EMPTY_STRING, @tokens_to_go[ $i_start .. $iend ] ) );
29195 3005         7764 push @field_lengths,
29196             $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
29197              
29198 3005         16504 return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
29199             } ## end sub make_alignment_patterns
29200              
29201             sub make_uncontained_comma_name {
29202 846     846 0 1979 my ( $iterm, $ibeg, $iend ) = @_;
29203              
29204             # Make a container name by combining all leading barewords,
29205             # keywords and functions.
29206 846         1480 my $name = EMPTY_STRING;
29207 846         1288 my $count = 0;
29208 846         2282 my $count_max;
29209             my $iname_end;
29210 846         0 my $ilast_blank;
29211 846         2116 for ( $ibeg .. $iterm ) {
29212 1673         2762 my $type = $types_to_go[$_];
29213              
29214 1673 100       3684 if ( $type eq 'b' ) {
29215 383         744 $ilast_blank = $_;
29216 383         780 next;
29217             }
29218              
29219 1290         2235 my $token = $tokens_to_go[$_];
29220              
29221             # Give up if we find an opening paren, binary operator or
29222             # comma within or after the proposed container name.
29223 1290 100 100     7777 if ( $token eq '('
      100        
      100        
29224             || $is_binary_type{$type}
29225             || $type eq 'k' && $is_binary_keyword{$token} )
29226             {
29227 192         411 $name = EMPTY_STRING;
29228 192         429 last;
29229             }
29230              
29231             # The container name is only built of certain types:
29232 1098 100       3231 last if ( !$is_kwU{$type} );
29233              
29234             # Normally it is made of one word, but two words for 'use'
29235 486 100 66     1834 if ( $count == 0 ) {
    100          
29236 380 100 100     1946 if ( $type eq 'k'
29237             && $is_use_like{ $tokens_to_go[$_] } )
29238             {
29239 65         144 $count_max = 2;
29240             }
29241             else {
29242 315         636 $count_max = 1;
29243             }
29244             }
29245             elsif ( defined($count_max) && $count >= $count_max ) {
29246 42         132 last;
29247             }
29248              
29249 444 50       1316 if ( defined( $name_map{$token} ) ) {
29250 0         0 $token = $name_map{$token};
29251             }
29252              
29253 444         1125 $name .= SPACE . $token;
29254 444         804 $iname_end = $_;
29255 444         822 $count++;
29256             }
29257              
29258             # Require a space after the container name token(s)
29259 846 100 66     3524 if ( $name
      100        
29260             && defined($ilast_blank)
29261             && $ilast_blank > $iname_end )
29262             {
29263 206         569 $name = substr( $name, 1 );
29264             }
29265 846         2571 return $name;
29266             } ## end sub make_uncontained_comma_name
29267              
29268             } ## end closure make_alignment_patterns
29269              
29270             sub make_paren_name {
29271 957     957 0 2254 my ( $self, $i ) = @_;
29272              
29273             # The token at index $i is a '('.
29274             # Create an alignment name for it to avoid incorrect alignments.
29275              
29276             # Start with the name of the previous nonblank token...
29277 957         1727 my $name = EMPTY_STRING;
29278 957         1646 my $im = $i - 1;
29279 957 100       2360 return EMPTY_STRING if ( $im < 0 );
29280 938 100       2528 if ( $types_to_go[$im] eq 'b' ) { $im--; }
  494         962  
29281 938 50       2214 return EMPTY_STRING if ( $im < 0 );
29282 938         1769 $name = $tokens_to_go[$im];
29283              
29284             # Prepend any sub name to an isolated -> to avoid unwanted alignments
29285             # [test case is test8/penco.pl]
29286 938 100       2484 if ( $name eq '->' ) {
29287 5         17 $im--;
29288 5 50 33     36 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
29289 5         16 $name = $tokens_to_go[$im] . $name;
29290             }
29291             }
29292              
29293             # Finally, remove any leading arrows
29294 938 50       2901 if ( substr( $name, 0, 2 ) eq '->' ) {
29295 0         0 $name = substr( $name, 2 );
29296             }
29297 938         2596 return $name;
29298             } ## end sub make_paren_name
29299              
29300             { ## begin closure get_final_indentation
29301              
29302             my ( $last_indentation_written, $last_unadjusted_indentation,
29303             $last_leading_token );
29304              
29305             sub initialize_get_final_indentation {
29306 555     555 0 1372 $last_indentation_written = 0;
29307 555         1217 $last_unadjusted_indentation = 0;
29308 555         1253 $last_leading_token = EMPTY_STRING;
29309 555         963 return;
29310             } ## end sub initialize_get_final_indentation
29311              
29312             sub get_final_indentation {
29313              
29314             my (
29315 7366     7366 0 18094 $self, #
29316              
29317             $ibeg,
29318             $iend,
29319             $rfields,
29320             $rpatterns,
29321             $ri_first,
29322             $ri_last,
29323             $rindentation_list,
29324             $level_jump,
29325             $starting_in_quote,
29326             $is_static_block_comment,
29327              
29328             ) = @_;
29329              
29330             #--------------------------------------------------------------
29331             # This routine makes any necessary adjustments to get the final
29332             # indentation of a line in the Formatter.
29333             #--------------------------------------------------------------
29334              
29335             # It starts with the basic indentation which has been defined for the
29336             # leading token, and then takes into account any options that the user
29337             # has set regarding special indenting and outdenting.
29338              
29339             # This routine has to resolve a number of complex interacting issues,
29340             # including:
29341             # 1. The various -cti=n type flags, which contain the desired change in
29342             # indentation for lines ending in commas and semicolons, should be
29343             # followed,
29344             # 2. qw quotes require special processing and do not fit perfectly
29345             # with normal containers,
29346             # 3. formatting with -wn can complicate things, especially with qw
29347             # quotes,
29348             # 4. formatting with the -lp option is complicated, and does not
29349             # work well with qw quotes and with -wn formatting.
29350             # 5. a number of special situations, such as 'cuddled' formatting.
29351             # 6. This routine is mainly concerned with outdenting closing tokens
29352             # but note that there is some overlap with the functions of sub
29353             # undo_ci, which was processed earlier, so care has to be taken to
29354             # keep them coordinated.
29355              
29356             # Find the last code token of this line
29357 7366         10968 my $i_terminal = $iend;
29358 7366         12686 my $terminal_type = $types_to_go[$iend];
29359 7366 100 100     20140 if ( $terminal_type eq '#' && $i_terminal > $ibeg ) {
29360 364         809 $i_terminal -= 1;
29361 364         740 $terminal_type = $types_to_go[$i_terminal];
29362 364 100 66     1718 if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) {
29363 350         659 $i_terminal -= 1;
29364 350         638 $terminal_type = $types_to_go[$i_terminal];
29365             }
29366             }
29367              
29368 7366         10338 my $is_outdented_line;
29369              
29370 7366         11889 my $type_beg = $types_to_go[$ibeg];
29371 7366         11482 my $token_beg = $tokens_to_go[$ibeg];
29372 7366         11503 my $level_beg = $levels_to_go[$ibeg];
29373 7366         11442 my $block_type_beg = $block_type_to_go[$ibeg];
29374 7366         11101 my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
29375 7366         11865 my $seqno_beg = $type_sequence_to_go[$ibeg];
29376 7366         12396 my $is_closing_type_beg = $is_closing_type{$type_beg};
29377              
29378             # QW INDENTATION PATCH 3:
29379 7366         10260 my $seqno_qw_closing;
29380 7366 100 100     17541 if ( $type_beg eq 'q' && $ibeg == 0 ) {
29381 204         1304 my $KK = $K_to_go[$ibeg];
29382             $seqno_qw_closing =
29383 204         510 $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
29384             }
29385              
29386 7366   100     23802 my $is_semicolon_terminated = $terminal_type eq ';'
29387             && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
29388             || $seqno_qw_closing );
29389              
29390             # NOTE: A future improvement would be to make it semicolon terminated
29391             # even if it does not have a semicolon but is followed by a closing
29392             # block brace. This would undo ci even for something like the
29393             # following, in which the final paren does not have a semicolon because
29394             # it is a possible weld location:
29395              
29396             # if ($BOLD_MATH) {
29397             # (
29398             # $labels, $comment,
29399             # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
29400             # )
29401             # }
29402             #
29403              
29404             # MOJO patch: Set a flag if this lines begins with ')->'
29405 7366   100     23898 my $leading_paren_arrow = (
29406             $is_closing_type_beg
29407             && $token_beg eq ')'
29408             && (
29409             ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
29410             || ( $ibeg < $i_terminal - 1
29411             && $types_to_go[ $ibeg + 1 ] eq 'b'
29412             && $types_to_go[ $ibeg + 2 ] eq '->' )
29413             )
29414             );
29415              
29416             #---------------------------------------------------------
29417             # Section 1: set a flag and a default indentation
29418             #
29419             # Most lines are indented according to the initial token.
29420             # But it is common to outdent to the level just after the
29421             # terminal token in certain cases...
29422             # adjust_indentation flag:
29423             # 0 - do not adjust
29424             # 1 - outdent
29425             # 2 - vertically align with opening token
29426             # 3 - indent
29427             #---------------------------------------------------------
29428              
29429 7366         10962 my $adjust_indentation = 0;
29430 7366         10417 my $default_adjust_indentation = 0;
29431              
29432             # Parameters needed for option 2, aligning with opening token:
29433             my (
29434 7366         12335 $opening_indentation, $opening_offset,
29435             $is_leading, $opening_exists
29436             );
29437              
29438             #-------------------------------------
29439             # Section 1A:
29440             # if line starts with a sequenced item
29441             #-------------------------------------
29442 7366 100 100     35102 if ( $seqno_beg || $seqno_qw_closing ) {
    50 66        
      33        
29443              
29444             # This can be tedious so we let a sub do it
29445             (
29446 1976         7241 $adjust_indentation,
29447             $default_adjust_indentation,
29448             $opening_indentation,
29449             $opening_offset,
29450             $is_leading,
29451             $opening_exists,
29452              
29453             ) = $self->get_closing_token_indentation(
29454              
29455             $ibeg,
29456             $iend,
29457             $ri_first,
29458             $ri_last,
29459             $rindentation_list,
29460             $level_jump,
29461             $i_terminal,
29462             $is_semicolon_terminated,
29463             $seqno_qw_closing,
29464              
29465             );
29466             }
29467              
29468             #--------------------------------------------------------
29469             # Section 1B:
29470             # if at ');', '};', '>;', and '];' of a terminal qw quote
29471             #--------------------------------------------------------
29472             elsif (
29473             substr( $rpatterns->[0], 0, 2 ) eq 'qb'
29474             && substr( $rfields->[0], -1, 1 ) eq ';'
29475             ## $rpatterns->[0] =~ /^qb*;$/
29476             && $rfields->[0] =~ /^([\)\}\]\>]);$/
29477             )
29478             {
29479 0 0       0 if ( $closing_token_indentation{$1} == 0 ) {
29480 0         0 $adjust_indentation = 1;
29481             }
29482             else {
29483 0         0 $adjust_indentation = 3;
29484             }
29485             }
29486              
29487             #---------------------------------------------------------
29488             # Section 2: set indentation according to flag set above
29489             #
29490             # Select the indentation object to define leading
29491             # whitespace. If we are outdenting something like '} } );'
29492             # then we want to use one level below the last token
29493             # ($i_terminal) in order to get it to fully outdent through
29494             # all levels.
29495             #---------------------------------------------------------
29496 7366         13202 my $indentation;
29497             my $lev;
29498 7366         12033 my $level_end = $levels_to_go[$iend];
29499              
29500             #------------------------------------
29501             # Section 2A: adjust_indentation == 0
29502             # No change in indentation
29503             #------------------------------------
29504 7366 100       15505 if ( $adjust_indentation == 0 ) {
    100          
    100          
29505 6399         9271 $indentation = $leading_spaces_beg;
29506 6399         9515 $lev = $level_beg;
29507             }
29508              
29509             #-------------------------------------------------------------------
29510             # Section 2B: adjust_indentation == 1
29511             # Change the indentation to be that of a different token on the line
29512             #-------------------------------------------------------------------
29513             elsif ( $adjust_indentation == 1 ) {
29514              
29515             # Previously, the indentation of the terminal token was used:
29516             # OLD CODING:
29517             # $indentation = $reduced_spaces_to_go[$i_terminal];
29518             # $lev = $levels_to_go[$i_terminal];
29519              
29520             # Generalization for MOJO patch:
29521             # Use the lowest level indentation of the tokens on the line.
29522             # For example, here we can use the indentation of the ending ';':
29523             # } until ($selection > 0 and $selection < 10); # ok to use ';'
29524             # But this will not outdent if we use the terminal indentation:
29525             # )->then( sub { # use indentation of the ->, not the {
29526             # Warning: reduced_spaces_to_go[] may be a reference, do not
29527             # do numerical checks with it
29528              
29529 861         1491 my $i_ind = $ibeg;
29530 861         1716 $indentation = $reduced_spaces_to_go[$i_ind];
29531 861         1507 $lev = $levels_to_go[$i_ind];
29532 861         2393 while ( $i_ind < $i_terminal ) {
29533 1194         1717 $i_ind++;
29534 1194 100       3073 if ( $levels_to_go[$i_ind] < $lev ) {
29535 2         8 $indentation = $reduced_spaces_to_go[$i_ind];
29536 2         9 $lev = $levels_to_go[$i_ind];
29537             }
29538             }
29539             }
29540              
29541             #--------------------------------------------------------------
29542             # Section 2C: adjust_indentation == 2
29543             # Handle indented closing token which aligns with opening token
29544             #--------------------------------------------------------------
29545             elsif ( $adjust_indentation == 2 ) {
29546              
29547             # handle option to align closing token with opening token
29548 88         212 $lev = $level_beg;
29549              
29550             # calculate spaces needed to align with opening token
29551 88         300 my $space_count =
29552             get_spaces($opening_indentation) + $opening_offset;
29553              
29554             # Indent less than the previous line.
29555             #
29556             # Problem: For -lp we don't exactly know what it was if there
29557             # were recoverable spaces sent to the aligner. A good solution
29558             # would be to force a flush of the vertical alignment buffer, so
29559             # that we would know. For now, this rule is used for -lp:
29560             #
29561             # When the last line did not start with a closing token we will
29562             # be optimistic that the aligner will recover everything wanted.
29563             #
29564             # This rule will prevent us from breaking a hierarchy of closing
29565             # tokens, and in a worst case will leave a closing paren too far
29566             # indented, but this is better than frequently leaving it not
29567             # indented enough.
29568 88         245 my $last_spaces = get_spaces($last_indentation_written);
29569              
29570 88 100 100     548 if ( ref($last_indentation_written)
29571             && !$is_closing_token{$last_leading_token} )
29572             {
29573 38         179 $last_spaces +=
29574             get_recoverable_spaces($last_indentation_written);
29575             }
29576              
29577             # reset the indentation to the new space count if it works
29578             # only options are all or none: nothing in-between looks good
29579 88         186 $lev = $level_beg;
29580              
29581 88         204 my $diff = $last_spaces - $space_count;
29582 88 100       293 if ( $diff > 0 ) {
29583 49         128 $indentation = $space_count;
29584             }
29585             else {
29586              
29587             # We need to fix things ... but there is no good way to do it.
29588             # The best solution is for the user to use a longer maximum
29589             # line length. We could get a smooth variation if we just move
29590             # the paren in using
29591             # $space_count -= ( 1 - $diff );
29592             # But unfortunately this can give a rather unbalanced look.
29593              
29594             # For -xlp we currently allow a tolerance of one indentation
29595             # level and then revert to a simpler default. This will jump
29596             # suddenly but keeps a balanced look.
29597 39 50 66     325 if ( $rOpts_extended_line_up_parentheses
    100 33        
    50          
29598             && $diff >= -$rOpts_indent_columns
29599             && $space_count > $leading_spaces_beg )
29600             {
29601 0         0 $indentation = $space_count;
29602             }
29603              
29604             # Otherwise revert to defaults
29605             elsif ( $default_adjust_indentation == 0 ) {
29606 37         91 $indentation = $leading_spaces_beg;
29607             }
29608             elsif ( $default_adjust_indentation == 1 ) {
29609 2         9 $indentation = $reduced_spaces_to_go[$i_terminal];
29610 2         4 $lev = $levels_to_go[$i_terminal];
29611             }
29612             }
29613             }
29614              
29615             #-------------------------------------------------------------
29616             # Section 2D: adjust_indentation == 3
29617             # Full indentation of closing tokens (-icb and -icp or -cti=2)
29618             #-------------------------------------------------------------
29619             else {
29620              
29621             # handle -icb (indented closing code block braces)
29622             # Updated method for indented block braces: indent one full level if
29623             # there is no continuation indentation. This will occur for major
29624             # structures such as sub, if, else, but not for things like map
29625             # blocks.
29626             #
29627             # Note: only code blocks without continuation indentation are
29628             # handled here (if, else, unless, ..). In the following snippet,
29629             # the terminal brace of the sort block will have continuation
29630             # indentation as shown so it will not be handled by the coding
29631             # here. We would have to undo the continuation indentation to do
29632             # this, but it probably looks ok as is. This is a possible future
29633             # update for semicolon terminated lines.
29634             #
29635             # if ($sortby eq 'date' or $sortby eq 'size') {
29636             # @files = sort {
29637             # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
29638             # or $a cmp $b
29639             # } @files;
29640             # }
29641             #
29642 18 100 100     73 if ( $block_type_beg
29643             && $ci_levels_to_go[$i_terminal] == 0 )
29644             {
29645 6         17 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
29646 6         11 $indentation = $spaces + $rOpts_indent_columns;
29647              
29648             # NOTE: for -lp we could create a new indentation object, but
29649             # there is probably no need to do it
29650             }
29651              
29652             # handle -icp and any -icb block braces which fall through above
29653             # test such as the 'sort' block mentioned above.
29654             else {
29655              
29656             # There are currently two ways to handle -icp...
29657             # One way is to use the indentation of the previous line:
29658             # $indentation = $last_indentation_written;
29659              
29660             # The other way is to use the indentation that the previous line
29661             # would have had if it hadn't been adjusted:
29662 12         37 $indentation = $last_unadjusted_indentation;
29663              
29664             # Current method: use the minimum of the two. This avoids
29665             # inconsistent indentation.
29666 12 100       55 if ( get_spaces($last_indentation_written) <
29667             get_spaces($indentation) )
29668             {
29669 1         3 $indentation = $last_indentation_written;
29670             }
29671             }
29672              
29673             # use previous indentation but use own level
29674             # to cause list to be flushed properly
29675 18         33 $lev = $level_beg;
29676             }
29677              
29678             #-------------------------------------------------------------
29679             # Remember indentation except for multi-line quotes, which get
29680             # no indentation
29681             #-------------------------------------------------------------
29682 7366 100 100     22621 if ( !( $ibeg == 0 && $starting_in_quote ) ) {
29683 7347         10575 $last_indentation_written = $indentation;
29684 7347         10498 $last_unadjusted_indentation = $leading_spaces_beg;
29685 7347         10967 $last_leading_token = $token_beg;
29686              
29687             # Patch to make a line which is the end of a qw quote work with the
29688             # -lp option. Make $token_beg look like a closing token as some
29689             # type even if it is not. This variable will become
29690             # $last_leading_token at the end of this loop. Then, if the -lp
29691             # style is selected, and the next line is also a
29692             # closing token, it will not get more indentation than this line.
29693             # We need to do this because qw quotes (at present) only get
29694             # continuation indentation, not one level of indentation, so we
29695             # need to turn off the -lp indentation.
29696              
29697             # ... a picture is worth a thousand words:
29698              
29699             # perltidy -wn -gnu (Without this patch):
29700             # ok(defined(
29701             # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
29702             # 2981014)])
29703             # ));
29704              
29705             # perltidy -wn -gnu (With this patch):
29706             # ok(defined(
29707             # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
29708             # 2981014)])
29709             # ));
29710 7347 100 100     14720 if ( $seqno_qw_closing
      100        
29711             && ( length($token_beg) > 1 || $token_beg eq '>' ) )
29712             {
29713 4         21 $last_leading_token = ')';
29714             }
29715             }
29716              
29717             #---------------------------------------------------------------------
29718             # Rule: lines with leading closing tokens should not be outdented more
29719             # than the line which contained the corresponding opening token.
29720             #---------------------------------------------------------------------
29721              
29722             # Updated per bug report in alex_bug.pl: we must not
29723             # mess with the indentation of closing logical braces, so
29724             # we must treat something like '} else {' as if it were
29725             # an isolated brace
29726             my $is_isolated_block_brace = $block_type_beg
29727             && ( $i_terminal == $ibeg
29728 7366   100     17801 || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
29729             );
29730              
29731             # only do this for a ':; which is aligned with its leading '?'
29732 7366   100     16295 my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
29733              
29734 7366 100 100     23203 if (
      100        
      100        
29735             defined($opening_indentation)
29736             && !$leading_paren_arrow # MOJO patch
29737             && !$is_isolated_block_brace
29738             && !$is_unaligned_colon
29739             )
29740             {
29741 822 100       2564 if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
29742 48         176 $indentation = $opening_indentation;
29743             }
29744             }
29745              
29746             #----------------------------------------------------
29747             # remember the indentation of each line of this batch
29748             #----------------------------------------------------
29749 7366         10473 push @{$rindentation_list}, $indentation;
  7366         16539  
29750              
29751             #---------------------------------------------
29752             # outdent lines with certain leading tokens...
29753             #---------------------------------------------
29754 7366 100 100     44042 if (
      100        
29755              
29756             # must be first word of this batch
29757             $ibeg == 0
29758              
29759             # and ...
29760             && (
29761              
29762             # certain leading keywords if requested
29763             $rOpts_outdent_keywords
29764             && $type_beg eq 'k'
29765             && $outdent_keyword{$token_beg}
29766              
29767             # or labels if requested
29768             || $rOpts_outdent_labels && $type_beg eq 'J'
29769              
29770             # or static block comments if requested
29771             || $is_static_block_comment
29772             && $rOpts_outdent_static_block_comments
29773             )
29774             )
29775             {
29776 32         147 my $space_count = leading_spaces_to_go($ibeg);
29777 32 100       204 if ( $space_count > 0 ) {
29778 26         85 $space_count -= $rOpts_continuation_indentation;
29779 26         49 $is_outdented_line = 1;
29780 26 50       91 if ( $space_count < 0 ) { $space_count = 0 }
  0         0  
29781              
29782             # do not promote a spaced static block comment to non-spaced;
29783             # this is not normally necessary but could be for some
29784             # unusual user inputs (such as -ci = -i)
29785 26 50 66     128 if ( $type_beg eq '#' && $space_count == 0 ) {
29786 0         0 $space_count = 1;
29787             }
29788              
29789 26         56 $indentation = $space_count;
29790             }
29791             }
29792              
29793             return (
29794              
29795 7366         29422 $indentation,
29796             $lev,
29797             $level_end,
29798             $i_terminal,
29799             $is_outdented_line,
29800              
29801             );
29802             } ## end sub get_final_indentation
29803              
29804             sub get_closing_token_indentation {
29805              
29806             # Determine indentation adjustment for a line with a leading closing
29807             # token - i.e. one of these: ) ] } :
29808              
29809             my (
29810 1976     1976 0 5750 $self, #
29811              
29812             $ibeg,
29813             $iend,
29814             $ri_first,
29815             $ri_last,
29816             $rindentation_list,
29817             $level_jump,
29818             $i_terminal,
29819             $is_semicolon_terminated,
29820             $seqno_qw_closing,
29821              
29822             ) = @_;
29823              
29824 1976         3047 my $adjust_indentation = 0;
29825 1976         2980 my $default_adjust_indentation = $adjust_indentation;
29826 1976         3394 my $terminal_type = $types_to_go[$i_terminal];
29827              
29828 1976         3229 my $type_beg = $types_to_go[$ibeg];
29829 1976         3234 my $token_beg = $tokens_to_go[$ibeg];
29830 1976         3300 my $level_beg = $levels_to_go[$ibeg];
29831 1976         3241 my $block_type_beg = $block_type_to_go[$ibeg];
29832 1976         3100 my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
29833 1976         3181 my $seqno_beg = $type_sequence_to_go[$ibeg];
29834 1976         3147 my $is_closing_type_beg = $is_closing_type{$type_beg};
29835              
29836             my (
29837 1976         3324 $opening_indentation, $opening_offset,
29838             $is_leading, $opening_exists
29839             );
29840              
29841             # Honor any flag to reduce -ci set by the -bbxi=n option
29842 1976 100 100     8001 if ( $seqno_beg && $self->[_rwant_reduced_ci_]->{$seqno_beg} ) {
29843              
29844             # if this is an opening, it must be alone on the line ...
29845 4 50 66     17 if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
    0          
29846 4         8 $adjust_indentation = 1;
29847             }
29848              
29849             # ... or a single welded unit (fix for b1173)
29850             elsif ($total_weld_count) {
29851 0         0 my $K_beg = $K_to_go[$ibeg];
29852 0         0 my $Kterm = $K_to_go[$i_terminal];
29853 0         0 my $Kterm_test = $self->[_rK_weld_left_]->{$Kterm};
29854 0 0 0     0 if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
29855 0         0 $Kterm = $Kterm_test;
29856             }
29857 0 0       0 if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
  0         0  
29858             }
29859             }
29860              
29861 1976         3451 my $ris_bli_container = $self->[_ris_bli_container_];
29862 1976 100       4567 my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
29863              
29864             # Update the $is_bli flag as we go. It is initially 1.
29865             # We note seeing a leading opening brace by setting it to 2.
29866             # If we get to the closing brace without seeing the opening then we
29867             # turn it off. This occurs if the opening brace did not get output
29868             # at the start of a line, so we will then indent the closing brace
29869             # in the default way.
29870 1976 100 100     5424 if ( $is_bli_beg && $is_bli_beg == 1 ) {
29871 21         43 my $K_opening_container = $self->[_K_opening_container_];
29872 21         55 my $K_opening = $K_opening_container->{$seqno_beg};
29873 21         47 my $K_beg = $K_to_go[$ibeg];
29874 21 50       68 if ( $K_beg eq $K_opening ) {
29875 21         54 $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
29876             }
29877 0         0 else { $is_bli_beg = 0 }
29878             }
29879              
29880             # QW PATCH for the combination -lp -wn
29881             # For -lp formatting use $ibeg_weld_fix to get around the problem
29882             # that with -lp type formatting the opening and closing tokens to not
29883             # have sequence numbers.
29884 1976         3110 my $ibeg_weld_fix = $ibeg;
29885 1976 100 100     5032 if ( $seqno_qw_closing && $total_weld_count ) {
29886 8         27 my $i_plus = $inext_to_go[$ibeg];
29887 8 50       33 if ( $i_plus <= $max_index_to_go ) {
29888 8         20 my $K_plus = $K_to_go[$i_plus];
29889 8 100       36 if ( defined( $self->[_rK_weld_left_]->{$K_plus} ) ) {
29890 6         17 $ibeg_weld_fix = $i_plus;
29891             }
29892             }
29893             }
29894              
29895             # if we are at a closing token of some type..
29896 1976 100 100     7671 if ( $is_closing_type_beg || $seqno_qw_closing ) {
    100          
29897              
29898 1266         2673 my $K_beg = $K_to_go[$ibeg];
29899              
29900             # get the indentation of the line containing the corresponding
29901             # opening token
29902             (
29903 1266         3898 $opening_indentation, $opening_offset,
29904             $is_leading, $opening_exists
29905             )
29906             = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
29907             $ri_last, $rindentation_list, $seqno_qw_closing );
29908              
29909             # Patch for rt144979, part 1. Coordinated with part 2.
29910             # Do not undo ci for a cuddled closing brace control; it
29911             # needs to be treated exactly the same ci as an isolated
29912             # closing brace.
29913             my $is_cuddled_closing_brace = $seqno_beg
29914 1266   100     5056 && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
29915              
29916             # First set the default behavior:
29917 1266 100 66     14551 if (
      100        
      100        
      100        
      66        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
29918              
29919             # default behavior is to outdent closing lines
29920             # of the form: "); }; ]; )->xxx;"
29921             $is_semicolon_terminated
29922              
29923             # and 'cuddled parens' of the form: ")->pack(". Bug fix for RT
29924             # #123749]: the TYPES here were incorrectly ')' and '('. The
29925             # corrected TYPES are '}' and '{'. But skip a cuddled block.
29926             || (
29927             $terminal_type eq '{'
29928             && $type_beg eq '}'
29929             && ( $nesting_depth_to_go[$iend] + 1 ==
29930             $nesting_depth_to_go[$ibeg] )
29931             && !$is_cuddled_closing_brace
29932             )
29933              
29934             # remove continuation indentation for any line like
29935             # } ... {
29936             # or without ending '{' and unbalanced, such as
29937             # such as '}->{$operator}'
29938             || (
29939             $type_beg eq '}'
29940              
29941             && ( $types_to_go[$iend] eq '{'
29942             || $levels_to_go[$iend] < $level_beg )
29943              
29944             # but not if a cuddled block
29945             && !$is_cuddled_closing_brace
29946             )
29947              
29948             # and when the next line is at a lower indentation level...
29949              
29950             # PATCH #1: and only if the style allows undoing continuation
29951             # for all closing token types. We should really wait until
29952             # the indentation of the next line is known and then make
29953             # a decision, but that would require another pass.
29954              
29955             # PATCH #2: and not if this token is under -xci control
29956             || ( $level_jump < 0
29957             && !$some_closing_token_indentation
29958             && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} )
29959              
29960             # Patch for -wn=2, multiple welded closing tokens
29961             || ( $i_terminal > $ibeg
29962             && $is_closing_type{ $types_to_go[$iend] } )
29963              
29964             # Alternate Patch for git #51, isolated closing qw token not
29965             # outdented if no-delete-old-newlines is set. This works, but
29966             # a more general patch elsewhere fixes the real problem: ljump.
29967             # || ( $seqno_qw_closing && $ibeg == $i_terminal )
29968              
29969             )
29970             {
29971 861         1692 $adjust_indentation = 1;
29972             }
29973              
29974             # outdent something like '),'
29975 1266 100 100     4026 if (
29976             $terminal_type eq ','
29977              
29978             # Removed this constraint for -wn
29979             # OLD: allow just one character before the comma
29980             # && $i_terminal == $ibeg + 1
29981              
29982             # require LIST environment; otherwise, we may outdent too much -
29983             # this can happen in calls without parentheses (overload.t);
29984             && $self->is_in_list_by_i($i_terminal)
29985             )
29986             {
29987 87         189 $adjust_indentation = 1;
29988             }
29989              
29990             # undo continuation indentation of a terminal closing token if
29991             # it is the last token before a level decrease. This will allow
29992             # a closing token to line up with its opening counterpart, and
29993             # avoids an indentation jump larger than 1 level.
29994 1266         2222 my $rLL = $self->[_rLL_];
29995 1266         2219 my $Klimit = $self->[_Klimit_];
29996 1266 100 100     6581 if ( $i_terminal == $ibeg
      66        
      100        
29997             && $is_closing_type_beg
29998             && defined($K_beg)
29999             && $K_beg < $Klimit )
30000             {
30001 526         1116 my $K_plus = $K_beg + 1;
30002 526         1484 my $type_plus = $rLL->[$K_plus]->[_TYPE_];
30003              
30004 526 100 100     2132 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
30005 473         1236 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
30006             }
30007              
30008 526 100 100     1785 if ( $type_plus eq '#' && $K_plus < $Klimit ) {
30009 49         175 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
30010 49 100 66     274 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
30011 42         121 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
30012             }
30013              
30014             # Note: we have skipped past just one comment (perhaps a
30015             # side comment). There could be more, and we could easily
30016             # skip past all the rest with the following code, or with a
30017             # while loop. It would be rare to have to do this, and
30018             # those block comments would still be indented, so it would
30019             # to leave them indented. So it seems best to just stop at
30020             # a maximum of one comment.
30021             ##if ($type_plus eq '#') {
30022             ## $K_plus = $self->K_next_code($K_plus);
30023             ##}
30024             }
30025              
30026 526 100 66     2246 if ( !$is_bli_beg && defined($K_plus) ) {
30027 512         894 my $lev = $level_beg;
30028 512         950 my $level_next = $rLL->[$K_plus]->[_LEVEL_];
30029              
30030             # and do not undo ci if it was set by the -xci option
30031             $adjust_indentation = 1
30032             if ( $level_next < $lev
30033 512 100 100     2125 && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} );
30034             }
30035              
30036             # Patch for RT #96101, in which closing brace of anonymous subs
30037             # was not outdented. We should look ahead and see if there is
30038             # a level decrease at the next token (i.e., a closing token),
30039             # but right now we do not have that information. For now
30040             # we see if we are in a list, and this works well.
30041             # See test files 'sub*.t' for good test cases.
30042 526 100 100     3284 if ( !$rOpts_indent_closing_brace
      100        
      100        
30043             && $block_type_beg
30044             && $self->[_ris_asub_block_]->{$seqno_beg}
30045             && $self->is_in_list_by_i($i_terminal) )
30046             {
30047             (
30048 18         94 $opening_indentation, $opening_offset,
30049             $is_leading, $opening_exists
30050             )
30051             = $self->get_opening_indentation( $ibeg, $ri_first,
30052             $ri_last, $rindentation_list );
30053 18         70 my $indentation = $leading_spaces_beg;
30054 18 100 66     181 if ( defined($opening_indentation)
30055             && get_spaces($indentation) >
30056             get_spaces($opening_indentation) )
30057             {
30058 14         39 $adjust_indentation = 1;
30059             }
30060             }
30061             }
30062              
30063             # YVES patch 1 of 2:
30064             # Undo ci of line with leading closing eval brace,
30065             # but not beyond the indentation of the line with
30066             # the opening brace.
30067 1266 100 100     4794 if ( $block_type_beg
      66        
      100        
30068             && $block_type_beg eq 'eval'
30069             && !ref($leading_spaces_beg)
30070             && !$rOpts_indent_closing_brace )
30071             {
30072             (
30073 30         117 $opening_indentation, $opening_offset,
30074             $is_leading, $opening_exists
30075             )
30076             = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
30077             $rindentation_list );
30078 30         79 my $indentation = $leading_spaces_beg;
30079 30 100 66     198 if ( defined($opening_indentation)
30080             && get_spaces($indentation) >
30081             get_spaces($opening_indentation) )
30082             {
30083 24         81 $adjust_indentation = 1;
30084             }
30085             }
30086              
30087             # patch for issue git #40: -bli setting has priority
30088 1266 100       2878 $adjust_indentation = 0 if ($is_bli_beg);
30089              
30090 1266         2061 $default_adjust_indentation = $adjust_indentation;
30091              
30092             # Now modify default behavior according to user request:
30093             # handle option to indent non-blocks of the form ); }; ];
30094             # But don't do special indentation to something like ')->pack('
30095 1266 100       2890 if ( !$block_type_beg ) {
30096              
30097             # Note that logical padding has already been applied, so we may
30098             # need to remove some spaces to get a valid hash key.
30099 671         1338 my $tok = $token_beg;
30100 671         1686 my $cti = $closing_token_indentation{$tok};
30101              
30102             # Fix the value of 'cti' for an isolated non-welded closing qw
30103             # delimiter.
30104 671 100 100     2007 if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
30105              
30106             # A quote delimiter which is not a container will not have
30107             # a cti value defined. In this case use the style of a
30108             # paren. For example
30109             # my @fars = (
30110             # qw<
30111             # far
30112             # farfar
30113             # farfars-far
30114             # >,
30115             # );
30116 26 100 100     159 if ( !defined($cti) && length($tok) == 1 ) {
30117              
30118             # something other than ')', '}', ']' ; use flag for ')'
30119 3         8 $cti = $closing_token_indentation{')'};
30120              
30121             # But for now, do not outdent non-container qw
30122             # delimiters because it would would change existing
30123             # formatting.
30124 3 50       13 if ( $tok ne '>' ) { $cti = 3 }
  3         8  
30125             }
30126              
30127             # A non-welded closing qw cannot currently use -cti=1
30128             # because that option requires a sequence number to find
30129             # the opening indentation, and qw quote delimiters are not
30130             # sequenced items.
30131 26 50 66     179 if ( defined($cti) && $cti == 1 ) { $cti = 0 }
  0         0  
30132             }
30133              
30134 671 100       3527 if ( !defined($cti) ) {
    100          
    100          
    100          
30135              
30136             # $cti may not be defined for several reasons.
30137             # -padding may have been applied so the character
30138             # has a length > 1
30139             # - we may have welded to a closing quote token.
30140             # Here is an example (perltidy -wn):
30141             # __PACKAGE__->load_components( qw(
30142             # > Core
30143             # >
30144             # > ) );
30145 3         9 $adjust_indentation = 0;
30146              
30147             }
30148             elsif ( $cti == 1 ) {
30149 43 100 100     247 if ( $i_terminal <= $ibeg + 1
30150             || $is_semicolon_terminated )
30151             {
30152 42         108 $adjust_indentation = 2;
30153             }
30154             else {
30155 1         3 $adjust_indentation = 0;
30156             }
30157             }
30158             elsif ( $cti == 2 ) {
30159 3 50       9 if ($is_semicolon_terminated) {
30160 3         7 $adjust_indentation = 3;
30161             }
30162             else {
30163 0         0 $adjust_indentation = 0;
30164             }
30165             }
30166             elsif ( $cti == 3 ) {
30167 3         8 $adjust_indentation = 3;
30168             }
30169             }
30170              
30171             # handle option to indent blocks
30172             else {
30173 595 50 66     1673 if (
      66        
30174             $rOpts_indent_closing_brace
30175             && (
30176             $i_terminal == $ibeg # isolated terminal '}'
30177             || $is_semicolon_terminated
30178             )
30179             ) # } xxxx ;
30180             {
30181 12         23 $adjust_indentation = 3;
30182             }
30183             }
30184             } ## end if ( $is_closing_type_beg || $seqno_qw_closing )
30185              
30186             # if line begins with a ':', align it with any
30187             # previous line leading with corresponding ?
30188             elsif ( $type_beg eq ':' ) {
30189             (
30190 93         502 $opening_indentation, $opening_offset,
30191             $is_leading, $opening_exists
30192             )
30193             = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
30194             $rindentation_list );
30195 93 100       333 if ($is_leading) { $adjust_indentation = 2; }
  46         114  
30196             }
30197              
30198             return (
30199              
30200 1976         7791 $adjust_indentation,
30201             $default_adjust_indentation,
30202             $opening_indentation,
30203             $opening_offset,
30204             $is_leading,
30205             $opening_exists,
30206              
30207             );
30208             } ## end sub get_closing_token_indentation
30209             } ## end closure get_final_indentation
30210              
30211             sub get_opening_indentation {
30212              
30213             # get the indentation of the line which output the opening token
30214             # corresponding to a given closing token in the current output batch.
30215             #
30216             # given:
30217             # $i_closing - index in this line of a closing token ')' '}' or ']'
30218             #
30219             # $ri_first - reference to list of the first index $i for each output
30220             # line in this batch
30221             # $ri_last - reference to list of the last index $i for each output line
30222             # in this batch
30223             # $rindentation_list - reference to a list containing the indentation
30224             # used for each line.
30225             # $qw_seqno - optional sequence number to use if normal seqno not defined
30226             # (NOTE: would be more general to just look this up from index i)
30227             #
30228             # return:
30229             # -the indentation of the line which contained the opening token
30230             # which matches the token at index $i_opening
30231             # -and its offset (number of columns) from the start of the line
30232             #
30233 1407     1407 0 3447 my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
30234             = @_;
30235              
30236             # first, see if the opening token is in the current batch
30237 1407         2577 my $i_opening = $mate_index_to_go[$i_closing];
30238 1407         2509 my ( $indent, $offset, $is_leading, $exists );
30239 1407         2280 $exists = 1;
30240 1407 100 66     4645 if ( defined($i_opening) && $i_opening >= 0 ) {
30241              
30242             # it is..look up the indentation
30243 550         1913 ( $indent, $offset, $is_leading ) =
30244             lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
30245             $rindentation_list );
30246             }
30247              
30248             # if not, it should have been stored in the hash by a previous batch
30249             else {
30250 857         1650 my $seqno = $type_sequence_to_go[$i_closing];
30251 857 100       1941 $seqno = $qw_seqno unless ($seqno);
30252 857         2278 ( $indent, $offset, $is_leading, $exists ) =
30253             get_saved_opening_indentation($seqno);
30254             }
30255 1407         4675 return ( $indent, $offset, $is_leading, $exists );
30256             } ## end sub get_opening_indentation
30257              
30258             sub examine_vertical_tightness_flags {
30259 555     555 0 1820 my ($self) = @_;
30260              
30261             # For efficiency, we will set a flag to skip all calls to sub
30262             # 'set_vertical_tightness_flags' if vertical tightness is not possible with
30263             # the user input parameters. If vertical tightness is possible, we will
30264             # simply leave the flag undefined and return.
30265              
30266             # Vertical tightness is never possible with --freeze-whitespace
30267 555 100       1831 if ($rOpts_freeze_whitespace) {
30268 3         8 $self->[_no_vertical_tightness_flags_] = 1;
30269 3         7 return;
30270             }
30271              
30272             # This sub is coordinated with sub set_vertical_tightness_flags.
30273             # The Section numbers in the following comments are the sections
30274             # in sub set_vertical_tightness_flags:
30275              
30276             # Examine controls for Section 1a:
30277 552 100       1707 return if ($rOpts_line_up_parentheses);
30278              
30279 521         2518 foreach my $key ( keys %opening_vertical_tightness ) {
30280 3066 100       7411 return if ( $opening_vertical_tightness{$key} );
30281             }
30282              
30283             # Examine controls for Section 1b:
30284 509         2736 foreach my $key ( keys %closing_vertical_tightness ) {
30285 3009 100       6980 return if ( $closing_vertical_tightness{$key} );
30286             }
30287              
30288             # Examine controls for Section 1c:
30289 500         2539 foreach my $key ( keys %opening_token_right ) {
30290 1496 100       4161 return if ( $opening_token_right{$key} );
30291             }
30292              
30293             # Examine controls for Section 1d:
30294 498         2066 foreach my $key ( keys %stack_opening_token ) {
30295 1492 100       3844 return if ( $stack_opening_token{$key} );
30296             }
30297 497         1944 foreach my $key ( keys %stack_closing_token ) {
30298 1491 50       3778 return if ( $stack_closing_token{$key} );
30299             }
30300              
30301             # Examine controls for Section 2:
30302 497 100       1976 return if ($rOpts_block_brace_vertical_tightness);
30303              
30304             # Examine controls for Section 3:
30305 495 100       1543 return if ($rOpts_stack_closing_block_brace);
30306              
30307             # None of the controls used for vertical tightness are set, so
30308             # we can skip all calls to sub set_vertical_tightness_flags
30309 493         1203 $self->[_no_vertical_tightness_flags_] = 1;
30310 493         1011 return;
30311             } ## end sub examine_vertical_tightness_flags
30312              
30313             sub set_vertical_tightness_flags {
30314              
30315 1308     1308 0 3250 my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
30316             $ending_in_quote, $closing_side_comment )
30317             = @_;
30318              
30319             # Define vertical tightness controls for the nth line of a batch.
30320             # Note: do not call this sub for a block comment or if
30321             # $rOpts_freeze_whitespace is set.
30322              
30323             # These parameters are passed to the vertical aligner to indicated
30324             # if we should combine this line with the next line to achieve the
30325             # desired vertical tightness. This was previously an array but
30326             # has been converted to a hash:
30327              
30328             # old hash Meaning
30329             # index key
30330             #
30331             # 0 _vt_type: 1=opening non-block 2=closing non-block
30332             # 3=opening block brace 4=closing block brace
30333             #
30334             # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
30335             # 1b _vt_closing_flag: spaces of padding to use if closing
30336             # 2 _vt_seqno: sequence number of container
30337             # 3 _vt_valid flag: do not append if this flag is false. Will be
30338             # true if appropriate -vt flag is set. Otherwise, Will be
30339             # made true only for 2 line container in parens with -lp
30340             # 4 _vt_seqno_beg: sequence number of first token of line
30341             # 5 _vt_seqno_end: sequence number of last token of line
30342             # 6 _vt_min_lines: min number of lines for joining opening cache,
30343             # 0=no constraint
30344             # 7 _vt_max_lines: max number of lines for joining opening cache,
30345             # 0=no constraint
30346              
30347             # The vertical tightness mechanism can add whitespace, so whitespace can
30348             # continually increase if we allowed it when the -fws flag is set.
30349             # See case b499 for an example.
30350              
30351             # Define these values...
30352 1308         1995 my $vt_type = 0;
30353 1308         1960 my $vt_opening_flag = 0;
30354 1308         1892 my $vt_closing_flag = 0;
30355 1308         1994 my $vt_seqno = 0;
30356 1308         1816 my $vt_valid_flag = 0;
30357 1308         1868 my $vt_seqno_beg = 0;
30358 1308         1991 my $vt_seqno_end = 0;
30359 1308         1854 my $vt_min_lines = 0;
30360 1308         1864 my $vt_max_lines = 0;
30361              
30362             # Uses these global parameters:
30363             # $rOpts_block_brace_tightness
30364             # $rOpts_block_brace_vertical_tightness
30365             # $rOpts_stack_closing_block_brace
30366             # $rOpts_line_up_parentheses
30367             # %opening_vertical_tightness
30368             # %closing_vertical_tightness
30369             # %opening_token_right
30370             # %stack_closing_token
30371             # %stack_opening_token
30372              
30373             #--------------------------------------------------------------
30374             # Vertical Tightness Flags Section 1:
30375             # Handle Lines 1 .. n-1 but not the last line
30376             # For non-BLOCK tokens, we will need to examine the next line
30377             # too, so we won't consider the last line.
30378             #--------------------------------------------------------------
30379 1308 100 100     5722 if ( $n < $n_last_line ) {
    100 100        
    100 66        
      33        
      100        
      66        
      33        
      66        
      33        
30380              
30381             #--------------------------------------------------------------
30382             # Vertical Tightness Flags Section 1a:
30383             # Look for Type 1, last token of this line is a non-block opening token
30384             #--------------------------------------------------------------
30385 801         1462 my $ibeg_next = $ri_first->[ $n + 1 ];
30386 801         1414 my $token_end = $tokens_to_go[$iend];
30387 801         1333 my $iend_next = $ri_last->[ $n + 1 ];
30388              
30389 801 100 100     5487 if (
      100        
      100        
      100        
30390             $type_sequence_to_go[$iend]
30391             && !$block_type_to_go[$iend]
30392             && $is_opening_token{$token_end}
30393             && (
30394             $opening_vertical_tightness{$token_end} > 0
30395              
30396             # allow 2-line method call to be closed up
30397             || ( $rOpts_line_up_parentheses
30398             && $token_end eq '('
30399             && $self->[_rlp_object_by_seqno_]
30400             ->{ $type_sequence_to_go[$iend] }
30401             && $iend > $ibeg
30402             && $types_to_go[ $iend - 1 ] ne 'b' )
30403             )
30404             )
30405             {
30406             # avoid multiple jumps in nesting depth in one line if
30407             # requested
30408 74         185 my $ovt = $opening_vertical_tightness{$token_end};
30409              
30410             # Turn off the -vt flag if the next line ends in a weld.
30411             # This avoids an instability with one-line welds (fixes b1183).
30412 74         144 my $type_end_next = $types_to_go[$iend_next];
30413             $ovt = 0
30414             if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] }
30415 74 0 33     298 && $is_closing_type{$type_end_next} );
30416              
30417             # The flag '_rbreak_container_' avoids conflict of -bom and -pt=1
30418             # or -pt=2; fixes b1270. See similar patch above for $cvt.
30419 74         165 my $seqno = $type_sequence_to_go[$iend];
30420 74 50 66     337 if ( $ovt
      66        
30421             && $seqno
30422             && $self->[_rbreak_container_]->{$seqno} )
30423             {
30424 0         0 $ovt = 0;
30425             }
30426              
30427             # The flag '_rmax_vertical_tightness_' avoids welding conflicts.
30428 74 50       247 if ( defined( $self->[_rmax_vertical_tightness_]->{$seqno} ) ) {
30429             $ovt =
30430 0         0 min( $ovt, $self->[_rmax_vertical_tightness_]->{$seqno} );
30431             }
30432              
30433 74 100 100     449 unless (
30434             $ovt < 2
30435             && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
30436             $nesting_depth_to_go[$ibeg_next] )
30437             )
30438             {
30439              
30440             # If -vt flag has not been set, mark this as invalid
30441             # and aligner will validate it if it sees the closing paren
30442             # within 2 lines.
30443 60         123 my $valid_flag = $ovt;
30444              
30445 60         99 $vt_type = 1;
30446 60         115 $vt_opening_flag = $ovt;
30447 60         136 $vt_seqno = $type_sequence_to_go[$iend];
30448 60         1105 $vt_valid_flag = $valid_flag;
30449             }
30450             }
30451              
30452             #--------------------------------------------------------------
30453             # Vertical Tightness Flags Section 1b:
30454             # Look for Type 2, first token of next line is a non-block closing
30455             # token .. and be sure this line does not have a side comment
30456             #--------------------------------------------------------------
30457 801         1452 my $token_next = $tokens_to_go[$ibeg_next];
30458 801 100 100     3824 if ( $type_sequence_to_go[$ibeg_next]
      100        
      66        
30459             && !$block_type_to_go[$ibeg_next]
30460             && $is_closing_token{$token_next}
30461             && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen!
30462             {
30463 197         533 my $cvt = $closing_vertical_tightness{$token_next};
30464              
30465             # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
30466             # See similar patch above for $ovt.
30467 197         417 my $seqno = $type_sequence_to_go[$ibeg_next];
30468 197 50 66     599 if ( $cvt && $self->[_rbreak_container_]->{$seqno} ) {
30469 0         0 $cvt = 0;
30470             }
30471              
30472             # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
30473             # otherwise. Added for rt136417.
30474 197 100       587 if ( $cvt == 3 ) {
30475 2 100       8 $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
30476             }
30477              
30478             # The unusual combination -pvtc=2 -dws -naws can be unstable.
30479             # This fixes b1282, b1283. This can be moved to set_options.
30480 197 50 66     665 if ( $cvt == 2
      33        
30481             && $rOpts_delete_old_whitespace
30482             && !$rOpts_add_whitespace )
30483             {
30484 0         0 $cvt = 1;
30485             }
30486              
30487             # Fix for b1379, b1380, b1381, b1382, b1384 part 2,
30488             # instability with adding and deleting trailing commas:
30489             # Reducing -cvt=2 to =1 fixes stability for -wtc=b in b1379,1380.
30490             # Reducing -cvt>0 to =0 fixes stability for -wtc=b in b1381,1382.
30491             # Reducing -cvt>0 to =0 fixes stability for -wtc=m in b1384
30492 197 100 100     591 if ( $cvt
30493             && $self->[_ris_bare_trailing_comma_by_seqno_]->{$seqno} )
30494             {
30495 10         25 $cvt = 0;
30496             }
30497              
30498 197 100 100     1294 if (
      100        
30499              
30500             # Never append a trailing line like ')->pack(' because it
30501             # will throw off later alignment. So this line must start at a
30502             # deeper level than the next line (fix1 for welding, git #45).
30503             (
30504             $nesting_depth_to_go[$ibeg_next] >=
30505             $nesting_depth_to_go[ $iend_next + 1 ] + 1
30506             )
30507             && (
30508             $cvt == 2
30509             || (
30510             !$self->is_in_list_by_i($ibeg_next)
30511             && (
30512             $cvt == 1
30513              
30514             # allow closing up 2-line method calls
30515             || ( $rOpts_line_up_parentheses
30516             && $token_next eq ')'
30517             && $type_sequence_to_go[$ibeg_next]
30518             && $self->[_rlp_object_by_seqno_]
30519             ->{ $type_sequence_to_go[$ibeg_next] } )
30520             )
30521             )
30522             )
30523             )
30524             {
30525              
30526             # decide which trailing closing tokens to append..
30527 76         169 my $ok = 0;
30528 76 100 100     395 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
  25         45  
30529             else {
30530 51         280 my $str = join( EMPTY_STRING,
30531             @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
30532              
30533             # append closing token if followed by comment or ';'
30534             # or another closing token (fix2 for welding, git #45)
30535 51 100       406 if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
  50         126  
30536             }
30537              
30538 76 100       214 if ($ok) {
30539 75         170 my $valid_flag = $cvt;
30540 75         135 my $min_lines = 0;
30541 75         133 my $max_lines = 0;
30542              
30543             # Fix for b1187 and b1188: Blinking can occur if we allow
30544             # welded tokens to re-form into one-line blocks during
30545             # vertical alignment when -lp used. So for this case we
30546             # set the minimum number of lines to be 1 instead of 0.
30547             # The maximum should be 1 if -vtc is not used. If -vtc is
30548             # used, we turn the valid
30549             # flag off and set the maximum to 0. This is equivalent to
30550             # using a large number.
30551 75         151 my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
30552 75 50 100     426 if ( $rOpts_line_up_parentheses
      66        
      66        
      33        
30553             && $total_weld_count
30554             && $seqno_ibeg_next
30555             && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
30556             && $self->is_welded_at_seqno($seqno_ibeg_next) )
30557             {
30558 0         0 $min_lines = 1;
30559 0 0       0 $max_lines = $cvt ? 0 : 1;
30560 0         0 $valid_flag = 0;
30561             }
30562              
30563 75         144 $vt_type = 2;
30564 75 100       289 $vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1;
30565 75         152 $vt_seqno = $type_sequence_to_go[$ibeg_next];
30566 75         133 $vt_valid_flag = $valid_flag;
30567 75         127 $vt_min_lines = $min_lines;
30568 75         170 $vt_max_lines = $max_lines;
30569             }
30570             }
30571             }
30572              
30573             #--------------------------------------------------------------
30574             # Vertical Tightness Flags Section 1c:
30575             # Implement the Opening Token Right flag (Type 2)..
30576             # If requested, move an isolated trailing opening token to the end of
30577             # the previous line which ended in a comma. We could do this
30578             # in sub recombine_breakpoints but that would cause problems
30579             # with -lp formatting. The problem is that indentation will
30580             # quickly move far to the right in nested expressions. By
30581             # doing it after indentation has been set, we avoid changes
30582             # to the indentation. Actual movement of the token takes place
30583             # in sub valign_output_step_B.
30584              
30585             # Note added 4 May 2021: the man page suggests that the -otr flags
30586             # are mainly for opening tokens following commas. But this seems
30587             # to have been generalized long ago to include other situations.
30588             # I checked the coding back to 2012 and it is essentially the same
30589             # as here, so it is best to leave this unchanged for now.
30590             #--------------------------------------------------------------
30591 801 50 66     2429 if (
      66        
      33        
      33        
      33        
      33        
      0        
      33        
      33        
      33        
30592             $opening_token_right{ $tokens_to_go[$ibeg_next] }
30593              
30594             # previous line is not opening
30595             # (use -sot to combine with it)
30596             && !$is_opening_token{$token_end}
30597              
30598             # previous line ended in one of these
30599             # (add other cases if necessary; '=>' and '.' are not necessary
30600             && !$block_type_to_go[$ibeg_next]
30601              
30602             # this is a line with just an opening token
30603             && ( $iend_next == $ibeg_next
30604             || $iend_next == $ibeg_next + 2
30605             && $types_to_go[$iend_next] eq '#' )
30606              
30607             # Fix for case b1060 when both -baoo and -otr are set:
30608             # to avoid blinking, honor the -baoo flag over the -otr flag.
30609             && $token_end ne '||' && $token_end ne '&&'
30610              
30611             # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
30612             # Generalized from '=' to $is_assignment to fix b1375.
30613             && !(
30614             $is_assignment{ $types_to_go[$iend] }
30615             && $rOpts_line_up_parentheses
30616             && $type_sequence_to_go[$ibeg_next]
30617             && $self->[_rlp_object_by_seqno_]
30618             ->{ $type_sequence_to_go[$ibeg_next] }
30619             )
30620              
30621             # looks bad if we align vertically with the wrong container
30622             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
30623              
30624             # give -kba priority over -otr (b1445)
30625             && !$self->[_rbreak_after_Klast_]->{ $K_to_go[$iend] }
30626             )
30627             {
30628 2 50       14 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
30629              
30630 2         5 $vt_type = 2;
30631 2         5 $vt_closing_flag = $spaces;
30632 2         6 $vt_seqno = $type_sequence_to_go[$ibeg_next];
30633 2         8 $vt_valid_flag = 1;
30634             }
30635              
30636             #--------------------------------------------------------------
30637             # Vertical Tightness Flags Section 1d:
30638             # Stacking of opening and closing tokens (Type 2)
30639             #--------------------------------------------------------------
30640 801         1207 my $stackable;
30641 801         1344 my $token_beg_next = $tokens_to_go[$ibeg_next];
30642              
30643             # patch to make something like 'qw(' behave like an opening paren
30644             # (aran.t)
30645 801 100       1829 if ( $types_to_go[$ibeg_next] eq 'q' ) {
30646 1 50       9 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
30647 1         5 $token_beg_next = $1;
30648             }
30649             }
30650              
30651 801 100 100     3666 if ( $is_closing_token{$token_end}
    100 66        
30652             && $is_closing_token{$token_beg_next} )
30653             {
30654              
30655             # avoid instability of combo -bom and -sct; b1179
30656 70         166 my $seq_next = $type_sequence_to_go[$ibeg_next];
30657             $stackable = $stack_closing_token{$token_beg_next}
30658             unless ( $block_type_to_go[$ibeg_next]
30659 70 50 33     538 || $seq_next && $self->[_rbreak_container_]->{$seq_next} );
      33        
30660             }
30661             elsif ($is_opening_token{$token_end}
30662             && $is_opening_token{$token_beg_next} )
30663             {
30664 41 50       152 $stackable = $stack_opening_token{$token_beg_next}
30665             unless ( $block_type_to_go[$ibeg_next] )
30666             ; # shouldn't happen; just checking
30667             }
30668              
30669 801 100       1831 if ($stackable) {
30670              
30671 6         11 my $is_semicolon_terminated;
30672 6 100       25 if ( $n + 1 == $n_last_line ) {
30673 5         19 my ( $terminal_type, $i_terminal ) =
30674             terminal_type_i( $ibeg_next, $iend_next );
30675 5   66     35 $is_semicolon_terminated = $terminal_type eq ';'
30676             && $nesting_depth_to_go[$iend_next] <
30677             $nesting_depth_to_go[$ibeg_next];
30678             }
30679              
30680             # this must be a line with just an opening token
30681             # or end in a semicolon
30682 6 50 0     27 if (
      33        
      66        
30683             $is_semicolon_terminated
30684             || ( $iend_next == $ibeg_next
30685             || $iend_next == $ibeg_next + 2
30686             && $types_to_go[$iend_next] eq '#' )
30687             )
30688             {
30689 6 100       19 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
30690              
30691 6         10 $vt_type = 2;
30692 6         10 $vt_closing_flag = $spaces;
30693 6         11 $vt_seqno = $type_sequence_to_go[$ibeg_next];
30694 6         11 $vt_valid_flag = 1;
30695              
30696             }
30697             }
30698             }
30699              
30700             #--------------------------------------------------------------
30701             # Vertical Tightness Flags Section 2:
30702             # Handle type 3, opening block braces on last line of the batch
30703             # Check for a last line with isolated opening BLOCK curly
30704             #--------------------------------------------------------------
30705             elsif ($rOpts_block_brace_vertical_tightness
30706             && $ibeg eq $iend
30707             && $types_to_go[$iend] eq '{'
30708             && $block_type_to_go[$iend]
30709             && $block_type_to_go[$iend] =~
30710             /$block_brace_vertical_tightness_pattern/ )
30711             {
30712 11         45 $vt_type = 3;
30713 11         33 $vt_opening_flag = $rOpts_block_brace_vertical_tightness;
30714 11         22 $vt_seqno = 0;
30715 11         26 $vt_valid_flag = 1;
30716             }
30717              
30718             #--------------------------------------------------------------
30719             # Vertical Tightness Flags Section 3:
30720             # Handle type 4, a closing block brace on the last line of the batch Check
30721             # for a last line with isolated closing BLOCK curly
30722             # Patch: added a check for any new closing side comment which the
30723             # -csc option may generate. If it exists, there will be a side comment
30724             # so we cannot combine with a brace on the next line. This issue
30725             # occurs for the combination -scbb and -csc is used.
30726             #--------------------------------------------------------------
30727             elsif ($rOpts_stack_closing_block_brace
30728             && $ibeg eq $iend
30729             && $block_type_to_go[$iend]
30730             && $types_to_go[$iend] eq '}'
30731             && ( !$closing_side_comment || $n < $n_last_line ) )
30732             {
30733 5 50       21 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
30734              
30735 5         13 $vt_type = 4;
30736 5         8 $vt_closing_flag = $spaces;
30737 5         15 $vt_seqno = $type_sequence_to_go[$iend];
30738 5         11 $vt_valid_flag = 1;
30739              
30740             }
30741              
30742             # get the sequence numbers of the ends of this line
30743 1308         2287 $vt_seqno_beg = $type_sequence_to_go[$ibeg];
30744 1308 100       2890 if ( !$vt_seqno_beg ) {
30745 886 100       1984 if ( $types_to_go[$ibeg] eq 'q' ) {
30746 11         46 $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
30747             }
30748 875         1520 else { $vt_seqno_beg = EMPTY_STRING }
30749             }
30750              
30751 1308         2036 $vt_seqno_end = $type_sequence_to_go[$iend];
30752 1308 100       2698 if ( !$vt_seqno_end ) {
30753 853 100       1907 if ( $types_to_go[$iend] eq 'q' ) {
30754 7         24 $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
30755             }
30756 846         1417 else { $vt_seqno_end = EMPTY_STRING }
30757             }
30758              
30759 1308 100       2867 if ( !defined($vt_seqno) ) { $vt_seqno = EMPTY_STRING }
  1         3  
30760              
30761 1308         9601 my $rvertical_tightness_flags = {
30762             _vt_type => $vt_type,
30763             _vt_opening_flag => $vt_opening_flag,
30764             _vt_closing_flag => $vt_closing_flag,
30765             _vt_seqno => $vt_seqno,
30766             _vt_valid_flag => $vt_valid_flag,
30767             _vt_seqno_beg => $vt_seqno_beg,
30768             _vt_seqno_end => $vt_seqno_end,
30769             _vt_min_lines => $vt_min_lines,
30770             _vt_max_lines => $vt_max_lines,
30771             };
30772              
30773 1308         4006 return ($rvertical_tightness_flags);
30774             } ## end sub set_vertical_tightness_flags
30775              
30776             ##########################################################
30777             # CODE SECTION 14: Code for creating closing side comments
30778             ##########################################################
30779              
30780             { ## begin closure accumulate_csc_text
30781              
30782             # These routines are called once per batch when the --closing-side-comments flag
30783             # has been set.
30784              
30785             my %block_leading_text;
30786             my %block_opening_line_number;
30787             my $csc_new_statement_ok;
30788             my $csc_last_label;
30789             my %csc_block_label;
30790             my $accumulating_text_for_block;
30791             my $leading_block_text;
30792             my $rleading_block_if_elsif_text;
30793             my $leading_block_text_level;
30794             my $leading_block_text_length_exceeded;
30795             my $leading_block_text_line_length;
30796             my $leading_block_text_line_number;
30797              
30798             sub initialize_csc_vars {
30799 555     555 0 1741 %block_leading_text = ();
30800 555         1181 %block_opening_line_number = ();
30801 555         1151 $csc_new_statement_ok = 1;
30802 555         1248 $csc_last_label = EMPTY_STRING;
30803 555         1243 %csc_block_label = ();
30804 555         1449 $rleading_block_if_elsif_text = [];
30805 555         1157 $accumulating_text_for_block = EMPTY_STRING;
30806 555         2448 reset_block_text_accumulator();
30807 555         1076 return;
30808             } ## end sub initialize_csc_vars
30809              
30810             sub reset_block_text_accumulator {
30811              
30812             # save text after 'if' and 'elsif' to append after 'else'
30813 564 100   564 0 1993 if ($accumulating_text_for_block) {
30814              
30815             ## ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
30816 9 100       34 if ( $is_if_elsif{$accumulating_text_for_block} ) {
30817 5         13 push @{$rleading_block_if_elsif_text}, $leading_block_text;
  5         18  
30818             }
30819             }
30820 564         1312 $accumulating_text_for_block = EMPTY_STRING;
30821 564         1196 $leading_block_text = EMPTY_STRING;
30822 564         1264 $leading_block_text_level = 0;
30823 564         1277 $leading_block_text_length_exceeded = 0;
30824 564         1218 $leading_block_text_line_number = 0;
30825 564         1192 $leading_block_text_line_length = 0;
30826 564         1174 return;
30827             } ## end sub reset_block_text_accumulator
30828              
30829             sub set_block_text_accumulator {
30830 9     9 0 25 my ( $self, $i ) = @_;
30831 9         24 $accumulating_text_for_block = $tokens_to_go[$i];
30832 9 100       38 if ( $accumulating_text_for_block !~ /^els/ ) {
30833 7         21 $rleading_block_if_elsif_text = [];
30834             }
30835 9         22 $leading_block_text = EMPTY_STRING;
30836 9         22 $leading_block_text_level = $levels_to_go[$i];
30837 9         26 $leading_block_text_line_number = $self->get_output_line_number();
30838 9         26 $leading_block_text_length_exceeded = 0;
30839              
30840             # this will contain the column number of the last character
30841             # of the closing side comment
30842             $leading_block_text_line_length =
30843             length($csc_last_label) +
30844             length($accumulating_text_for_block) +
30845 9         37 length( $rOpts->{'closing-side-comment-prefix'} ) +
30846             $leading_block_text_level * $rOpts_indent_columns + 3;
30847 9         28 return;
30848             } ## end sub set_block_text_accumulator
30849              
30850             sub accumulate_block_text {
30851 708     708 0 1166 my ( $self, $i ) = @_;
30852              
30853             # accumulate leading text for -csc, ignoring any side comments
30854 708 50 66     1579 if ( $accumulating_text_for_block
      66        
30855             && !$leading_block_text_length_exceeded
30856             && $types_to_go[$i] ne '#' )
30857             {
30858              
30859 92         136 my $added_length = $token_lengths_to_go[$i];
30860 92 50       168 $added_length += 1 if $i == 0;
30861 92         131 my $new_line_length =
30862             $leading_block_text_line_length + $added_length;
30863              
30864             # we can add this text if we don't exceed some limits..
30865 92 100 33     385 if (
    50 66        
      33        
      66        
      100        
30866              
30867             # we must not have already exceeded the text length limit
30868             length($leading_block_text) <
30869             $rOpts_closing_side_comment_maximum_text
30870              
30871             # and either:
30872             # the new total line length must be below the line length limit
30873             # or the new length must be below the text length limit
30874             # (ie, we may allow one token to exceed the text length limit)
30875             && (
30876             $new_line_length <
30877             $maximum_line_length_at_level[$leading_block_text_level]
30878              
30879             || length($leading_block_text) + $added_length <
30880             $rOpts_closing_side_comment_maximum_text
30881             )
30882              
30883             # UNLESS: we are adding a closing paren before the brace we seek.
30884             # This is an attempt to avoid situations where the ... to be
30885             # added are longer than the omitted right paren, as in:
30886              
30887             # foreach my $item (@a_rather_long_variable_name_here) {
30888             # &whatever;
30889             # } ## end foreach my $item (@a_rather_long_variable_name_here...
30890              
30891             || (
30892             $tokens_to_go[$i] eq ')'
30893             && (
30894             (
30895             $i + 1 <= $max_index_to_go
30896             && $block_type_to_go[ $i + 1 ]
30897             && $block_type_to_go[ $i + 1 ] eq
30898             $accumulating_text_for_block
30899             )
30900             || ( $i + 2 <= $max_index_to_go
30901             && $block_type_to_go[ $i + 2 ]
30902             && $block_type_to_go[ $i + 2 ] eq
30903             $accumulating_text_for_block )
30904             )
30905             )
30906             )
30907             {
30908              
30909             # add an extra space at each newline
30910 89 50 33     185 if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
30911 0         0 $leading_block_text .= SPACE;
30912             }
30913              
30914             # add the token text
30915 89         167 $leading_block_text .= $tokens_to_go[$i];
30916 89         134 $leading_block_text_line_length = $new_line_length;
30917             }
30918              
30919             # show that text was truncated if necessary
30920             elsif ( $types_to_go[$i] ne 'b' ) {
30921 0         0 $leading_block_text_length_exceeded = 1;
30922 0         0 $leading_block_text .= '...';
30923             }
30924             }
30925 708         1234 return;
30926             } ## end sub accumulate_block_text
30927              
30928             sub accumulate_csc_text {
30929              
30930 61     61 0 110 my ($self) = @_;
30931              
30932             # called once per output buffer when -csc is used. Accumulates
30933             # the text placed after certain closing block braces.
30934             # Defines and returns the following for this buffer:
30935              
30936 61         111 my $block_leading_text =
30937             EMPTY_STRING; # the leading text of the last '}'
30938 61         101 my $rblock_leading_if_elsif_text;
30939 61         101 my $i_block_leading_text =
30940             -1; # index of token owning block_leading_text
30941 61         94 my $block_line_count = 100; # how many lines the block spans
30942 61         98 my $terminal_type = 'b'; # type of last nonblank token
30943 61         99 my $i_terminal = 0; # index of last nonblank token
30944 61         98 my $terminal_block_type = EMPTY_STRING;
30945              
30946             # update most recent statement label
30947 61 50       177 $csc_last_label = EMPTY_STRING unless ($csc_last_label);
30948 61 50       186 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
  0         0  
30949 61         105 my $block_label = $csc_last_label;
30950              
30951             # Loop over all tokens of this batch
30952 61         146 for my $i ( 0 .. $max_index_to_go ) {
30953 717         1088 my $type = $types_to_go[$i];
30954 717         967 my $block_type = $block_type_to_go[$i];
30955 717         1012 my $token = $tokens_to_go[$i];
30956 717 100       1299 $block_type = EMPTY_STRING unless ($block_type);
30957              
30958             # remember last nonblank token type
30959 717 100 100     2179 if ( $type ne '#' && $type ne 'b' ) {
30960 463         655 $terminal_type = $type;
30961 463         654 $terminal_block_type = $block_type;
30962 463         604 $i_terminal = $i;
30963             }
30964              
30965 717         986 my $type_sequence = $type_sequence_to_go[$i];
30966 717 100 66     1416 if ( $block_type && $type_sequence ) {
30967              
30968 34 100       121 if ( $token eq '}' ) {
    50          
30969              
30970             # restore any leading text saved when we entered this block
30971 17 100       59 if ( defined( $block_leading_text{$type_sequence} ) ) {
30972             ( $block_leading_text, $rblock_leading_if_elsif_text )
30973 9         19 = @{ $block_leading_text{$type_sequence} };
  9         34  
30974 9         20 $i_block_leading_text = $i;
30975 9         29 delete $block_leading_text{$type_sequence};
30976 9         20 $rleading_block_if_elsif_text =
30977             $rblock_leading_if_elsif_text;
30978             }
30979              
30980 17 50       51 if ( defined( $csc_block_label{$type_sequence} ) ) {
30981 17         49 $block_label = $csc_block_label{$type_sequence};
30982 17         44 delete $csc_block_label{$type_sequence};
30983             }
30984              
30985             # if we run into a '}' then we probably started accumulating
30986             # at something like a trailing 'if' clause..no harm done.
30987 17 50 33     55 if ( $accumulating_text_for_block
30988             && $levels_to_go[$i] <= $leading_block_text_level )
30989             {
30990 0         0 my $lev = $levels_to_go[$i];
30991 0         0 reset_block_text_accumulator();
30992             }
30993              
30994 17 50       50 if ( defined( $block_opening_line_number{$type_sequence} ) )
30995             {
30996 17         62 my $output_line_number =
30997             $self->get_output_line_number();
30998             $block_line_count =
30999             $output_line_number -
31000 17         49 $block_opening_line_number{$type_sequence} + 1;
31001 17         37 delete $block_opening_line_number{$type_sequence};
31002             }
31003             else {
31004              
31005             # Error: block opening line undefined for this line..
31006             # This shouldn't be possible, but it is not a
31007             # significant problem.
31008             }
31009             }
31010              
31011             elsif ( $token eq '{' ) {
31012              
31013 17         67 my $line_number = $self->get_output_line_number();
31014 17         45 $block_opening_line_number{$type_sequence} = $line_number;
31015              
31016             # set a label for this block, except for
31017             # a bare block which already has the label
31018             # A label can only be used on the next {
31019 17 50       72 if ( $block_type =~ /:$/ ) {
31020 0         0 $csc_last_label = EMPTY_STRING;
31021             }
31022 17         43 $csc_block_label{$type_sequence} = $csc_last_label;
31023 17         31 $csc_last_label = EMPTY_STRING;
31024              
31025 17 100 66     78 if ( $accumulating_text_for_block
31026             && $levels_to_go[$i] == $leading_block_text_level )
31027             {
31028              
31029 9 50       29 if ( $accumulating_text_for_block eq $block_type ) {
31030              
31031             # save any leading text before we enter this block
31032 9         29 $block_leading_text{$type_sequence} = [
31033             $leading_block_text,
31034             $rleading_block_if_elsif_text
31035             ];
31036 9         29 $block_opening_line_number{$type_sequence} =
31037             $leading_block_text_line_number;
31038 9         150 reset_block_text_accumulator();
31039             }
31040             else {
31041              
31042             # shouldn't happen, but not a serious error.
31043             # We were accumulating -csc text for block type
31044             # $accumulating_text_for_block and unexpectedly
31045             # encountered a '{' for block type $block_type.
31046             }
31047             }
31048             }
31049             }
31050              
31051 717 100 100     2304 if ( $type eq 'k'
      100        
      100        
31052             && $csc_new_statement_ok
31053             && $is_if_elsif_else_unless_while_until_for_foreach{$token}
31054             && $token =~ /$closing_side_comment_list_pattern/ )
31055             {
31056 9         68 $self->set_block_text_accumulator($i);
31057             }
31058             else {
31059              
31060             # note: ignoring type 'q' because of tricks being played
31061             # with 'q' for hanging side comments
31062 708 100 100     2310 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
      66        
31063 454   100     1592 $csc_new_statement_ok =
31064             ( $block_type || $type eq 'J' || $type eq ';' );
31065             }
31066 708 50 66     1667 if ( $type eq ';'
      33        
31067             && $accumulating_text_for_block
31068             && $levels_to_go[$i] == $leading_block_text_level )
31069             {
31070 0         0 reset_block_text_accumulator();
31071             }
31072             else {
31073 708         1226 $self->accumulate_block_text($i);
31074             }
31075             }
31076             }
31077              
31078             # Treat an 'else' block specially by adding preceding 'if' and
31079             # 'elsif' text. Otherwise, the 'end else' is not helpful,
31080             # especially for cuddled-else formatting.
31081 61 100 100     244 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
31082 2         17 $block_leading_text =
31083             $self->make_else_csc_text( $i_terminal, $terminal_block_type,
31084             $block_leading_text, $rblock_leading_if_elsif_text );
31085             }
31086              
31087             # if this line ends in a label then remember it for the next pass
31088 61         105 $csc_last_label = EMPTY_STRING;
31089 61 50       145 if ( $terminal_type eq 'J' ) {
31090 0         0 $csc_last_label = $tokens_to_go[$i_terminal];
31091             }
31092              
31093 61         283 return ( $terminal_type, $i_terminal, $i_block_leading_text,
31094             $block_leading_text, $block_line_count, $block_label );
31095             } ## end sub accumulate_csc_text
31096              
31097             sub make_else_csc_text {
31098              
31099             # create additional -csc text for an 'else' and optionally 'elsif',
31100             # depending on the value of switch
31101             #
31102             # = 0 add 'if' text to trailing else
31103             # = 1 same as 0 plus:
31104             # add 'if' to 'elsif's if can fit in line length
31105             # add last 'elsif' to trailing else if can fit in one line
31106             # = 2 same as 1 but do not check if exceed line length
31107             #
31108             # $rif_elsif_text = a reference to a list of all previous closing
31109             # side comments created for this if block
31110             #
31111 2     2 0 17 my ( $self, $i_terminal, $block_type, $block_leading_text,
31112             $rif_elsif_text )
31113             = @_;
31114 2         9 my $csc_text = $block_leading_text;
31115              
31116 2 50 33     14 if ( $block_type eq 'elsif'
31117             && $rOpts_closing_side_comment_else_flag == 0 )
31118             {
31119 0         0 return $csc_text;
31120             }
31121              
31122 2         5 my $count = @{$rif_elsif_text};
  2         5  
31123 2 50       10 return $csc_text unless ($count);
31124              
31125 2         10 my $if_text = '[ if' . $rif_elsif_text->[0];
31126              
31127             # always show the leading 'if' text on 'else'
31128 2 50       11 if ( $block_type eq 'else' ) {
31129 2         7 $csc_text .= $if_text;
31130             }
31131              
31132             # see if that's all
31133 2 50       7 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
31134 2         8 return $csc_text;
31135             }
31136              
31137 0         0 my $last_elsif_text = EMPTY_STRING;
31138 0 0       0 if ( $count > 1 ) {
31139 0         0 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
31140 0 0       0 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
  0         0  
31141             }
31142              
31143             # tentatively append one more item
31144 0         0 my $saved_text = $csc_text;
31145 0 0       0 if ( $block_type eq 'else' ) {
31146 0         0 $csc_text .= $last_elsif_text;
31147             }
31148             else {
31149 0         0 $csc_text .= SPACE . $if_text;
31150             }
31151              
31152             # all done if no length checks requested
31153 0 0       0 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
31154 0         0 return $csc_text;
31155             }
31156              
31157             # undo it if line length exceeded
31158             my $length =
31159             length($csc_text) +
31160             length($block_type) +
31161 0         0 length( $rOpts->{'closing-side-comment-prefix'} ) +
31162             $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
31163 0 0       0 if (
31164             $length > $maximum_line_length_at_level[$leading_block_text_level] )
31165             {
31166 0         0 $csc_text = $saved_text;
31167             }
31168 0         0 return $csc_text;
31169             } ## end sub make_else_csc_text
31170             } ## end closure accumulate_csc_text
31171              
31172             { ## begin closure balance_csc_text
31173              
31174             # Some additional routines for handling the --closing-side-comments option
31175              
31176             my %matching_char;
31177              
31178             BEGIN {
31179 38     38   89283 %matching_char = (
31180             '{' => '}',
31181             '(' => ')',
31182             '[' => ']',
31183             '}' => '{',
31184             ')' => '(',
31185             ']' => '[',
31186             );
31187             } ## end BEGIN
31188              
31189             sub balance_csc_text {
31190              
31191             # Append characters to balance a closing side comment so that editors
31192             # such as vim can correctly jump through code.
31193             # Simple Example:
31194             # input = ## end foreach my $foo ( sort { $b ...
31195             # output = ## end foreach my $foo ( sort { $b ...})
31196              
31197             # NOTE: This routine does not currently filter out structures within
31198             # quoted text because the bounce algorithms in text editors do not
31199             # necessarily do this either (a version of vim was checked and
31200             # did not do this).
31201              
31202             # Some complex examples which will cause trouble for some editors:
31203             # while ( $mask_string =~ /\{[^{]*?\}/g ) {
31204             # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
31205             # if ( $1 eq '{' ) {
31206             # test file test1/braces.pl has many such examples.
31207              
31208 6     6 0 20 my ($csc) = @_;
31209              
31210             # loop to examine characters one-by-one, RIGHT to LEFT and
31211             # build a balancing ending, LEFT to RIGHT.
31212 6         28 foreach my $pos ( reverse( 0 .. length($csc) - 1 ) ) {
31213              
31214 171         257 my $char = substr( $csc, $pos, 1 );
31215              
31216             # ignore everything except structural characters
31217 171 100       324 next unless ( $matching_char{$char} );
31218              
31219             # pop most recently appended character
31220 7         17 my $top = chop($csc);
31221              
31222             # push it back plus the mate to the newest character
31223             # unless they balance each other.
31224 7 100       27 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
31225             }
31226              
31227             # return the balanced string
31228 6         30 return $csc;
31229             } ## end sub balance_csc_text
31230             } ## end closure balance_csc_text
31231              
31232             sub add_closing_side_comment {
31233              
31234 61     61 0 126 my ( $self, $ri_first, $ri_last ) = @_;
31235 61         109 my $rLL = $self->[_rLL_];
31236              
31237             # add closing side comments after closing block braces if -csc used
31238 61         115 my ( $closing_side_comment, $cscw_block_comment );
31239              
31240             #---------------------------------------------------------------
31241             # Step 1: loop through all tokens of this line to accumulate
31242             # the text needed to create the closing side comments. Also see
31243             # how the line ends.
31244             #---------------------------------------------------------------
31245              
31246 61         163 my ( $terminal_type, $i_terminal, $i_block_leading_text,
31247             $block_leading_text, $block_line_count, $block_label )
31248             = $self->accumulate_csc_text();
31249              
31250             #---------------------------------------------------------------
31251             # Step 2: make the closing side comment if this ends a block
31252             #---------------------------------------------------------------
31253 61         176 my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
31254              
31255             # if this line might end in a block closure..
31256 61 50 66     783 if (
      66        
      33        
      66        
      66        
      66        
      33        
      33        
31257             $terminal_type eq '}'
31258              
31259             # Fix 1 for c091, this is only for blocks
31260             && $block_type_to_go[$i_terminal]
31261              
31262             # ..and either
31263             && (
31264              
31265             # the block is long enough
31266             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
31267              
31268             # or there is an existing comment to check
31269             || ( $have_side_comment
31270             && $rOpts->{'closing-side-comment-warnings'} )
31271             )
31272              
31273             # .. and if this is one of the types of interest
31274             && $block_type_to_go[$i_terminal] =~
31275             /$closing_side_comment_list_pattern/
31276              
31277             # .. but not an anonymous sub
31278             # These are not normally of interest, and their closing braces are
31279             # often followed by commas or semicolons anyway. This also avoids
31280             # possible erratic output due to line numbering inconsistencies
31281             # in the cases where their closing braces terminate a line.
31282             && $block_type_to_go[$i_terminal] ne 'sub'
31283              
31284             # ..and the corresponding opening brace must is not in this batch
31285             # (because we do not need to tag one-line blocks, although this
31286             # should also be caught with a positive -csci value)
31287             && !defined( $mate_index_to_go[$i_terminal] )
31288              
31289             # ..and either
31290             && (
31291              
31292             # this is the last token (line doesn't have a side comment)
31293             !$have_side_comment
31294              
31295             # or the old side comment is a closing side comment
31296             || $tokens_to_go[$max_index_to_go] =~
31297             /$closing_side_comment_prefix_pattern/
31298             )
31299             )
31300             {
31301              
31302             # then make the closing side comment text
31303 9 50       40 if ($block_label) { $block_label .= SPACE }
  0         0  
31304 9         45 my $token =
31305             "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
31306              
31307             # append any extra descriptive text collected above
31308 9 100       32 if ( $i_block_leading_text == $i_terminal ) {
31309 5         13 $token .= $block_leading_text;
31310             }
31311              
31312             $token = balance_csc_text($token)
31313 9 100       46 if $rOpts->{'closing-side-comments-balanced'};
31314              
31315 9         85 $token =~ s/\s*$//; # trim any trailing whitespace
31316              
31317             # handle case of existing closing side comment
31318 9 50       33 if ($have_side_comment) {
31319              
31320             # warn if requested and tokens differ significantly
31321 0 0       0 if ( $rOpts->{'closing-side-comment-warnings'} ) {
31322 0         0 my $old_csc = $tokens_to_go[$max_index_to_go];
31323 0         0 my $new_csc = $token;
31324 0         0 $new_csc =~ s/\s+//g; # trim all whitespace
31325 0         0 $old_csc =~ s/\s+//g; # trim all whitespace
31326 0         0 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
31327 0         0 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
31328 0         0 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
31329 0         0 my $new_trailing_dots = $1;
31330 0         0 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
31331              
31332             # Patch to handle multiple closing side comments at
31333             # else and elsif's. These have become too complicated
31334             # to check, so if we see an indication of
31335             # '[ if' or '[ # elsif', then assume they were made
31336             # by perltidy.
31337 0 0       0 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
    0          
31338 0 0       0 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
  0         0  
31339             }
31340             elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
31341 0 0       0 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
  0         0  
31342             }
31343              
31344             # if old comment is contained in new comment,
31345             # only compare the common part.
31346 0 0       0 if ( length($new_csc) > length($old_csc) ) {
31347 0         0 $new_csc = substr( $new_csc, 0, length($old_csc) );
31348             }
31349              
31350             # if the new comment is shorter and has been limited,
31351             # only compare the common part.
31352 0 0 0     0 if ( length($new_csc) < length($old_csc)
31353             && $new_trailing_dots )
31354             {
31355 0         0 $old_csc = substr( $old_csc, 0, length($new_csc) );
31356             }
31357              
31358             # any remaining difference?
31359 0 0       0 if ( $new_csc ne $old_csc ) {
    0          
31360              
31361             # just leave the old comment if we are below the threshold
31362             # for creating side comments
31363 0 0       0 if ( $block_line_count <
31364             $rOpts->{'closing-side-comment-interval'} )
31365             {
31366 0         0 $token = undef;
31367             }
31368              
31369             # otherwise we'll make a note of it
31370             else {
31371              
31372 0         0 my $msg_line_number;
31373 0         0 my $K = $K_to_go[$i_terminal];
31374 0 0       0 if ( defined($K) ) {
31375 0         0 $msg_line_number = $rLL->[$K]->[_LINE_INDEX_] + 1;
31376             }
31377             warning(
31378 0         0 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n",
31379             $msg_line_number
31380             );
31381              
31382             # save the old side comment in a new trailing block
31383             # comment
31384 0         0 my $timestamp = EMPTY_STRING;
31385 0 0       0 if ( $rOpts->{'timestamp'} ) {
31386 0         0 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
31387 0         0 $year += 1900;
31388 0         0 $month += 1;
31389 0         0 $timestamp = "$year-$month-$day";
31390             }
31391             $cscw_block_comment =
31392 0         0 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
31393             }
31394             }
31395              
31396             # No differences.. we can safely delete old comment if we
31397             # are below the threshold
31398             elsif ( $block_line_count <
31399             $rOpts->{'closing-side-comment-interval'} )
31400             {
31401             # Since the line breaks have already been set, we have
31402             # to remove the token from the _to_go array and also
31403             # from the line range (this fixes issue c081).
31404             # Note that we can only get here if -cscw has been set
31405             # because otherwise the old comment is already deleted.
31406 0         0 $token = undef;
31407 0         0 my $ibeg = $ri_first->[-1];
31408 0         0 my $iend = $ri_last->[-1];
31409 0 0 0     0 if ( $iend > $ibeg
      0        
31410             && $iend == $max_index_to_go
31411             && $types_to_go[$max_index_to_go] eq '#' )
31412             {
31413 0         0 $iend--;
31414 0         0 $max_index_to_go--;
31415 0 0 0     0 if ( $iend > $ibeg
31416             && $types_to_go[$max_index_to_go] eq 'b' )
31417             {
31418 0         0 $iend--;
31419 0         0 $max_index_to_go--;
31420             }
31421 0         0 $ri_last->[-1] = $iend;
31422             }
31423             }
31424             }
31425              
31426             # switch to the new csc (unless we deleted it!)
31427 0 0       0 if ($token) {
31428              
31429 0         0 my $len_tok = length($token); # NOTE: length no longer important
31430 0         0 my $added_len =
31431             $len_tok - $token_lengths_to_go[$max_index_to_go];
31432              
31433 0         0 $tokens_to_go[$max_index_to_go] = $token;
31434 0         0 $token_lengths_to_go[$max_index_to_go] = $len_tok;
31435 0         0 my $K = $K_to_go[$max_index_to_go];
31436 0         0 $rLL->[$K]->[_TOKEN_] = $token;
31437 0         0 $rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok;
31438 0         0 $summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len;
31439             }
31440             }
31441              
31442             # handle case of NO existing closing side comment
31443             else {
31444              
31445             # To avoid inserting a new token in the token arrays, we
31446             # will just return the new side comment so that it can be
31447             # inserted just before it is needed in the call to the
31448             # vertical aligner.
31449 9         25 $closing_side_comment = $token;
31450             }
31451             }
31452 61         175 return ( $closing_side_comment, $cscw_block_comment );
31453             } ## end sub add_closing_side_comment
31454              
31455             ############################
31456             # CODE SECTION 15: Summarize
31457             ############################
31458              
31459             sub wrapup {
31460              
31461             # This is the last routine called when a file is formatted.
31462             # Flush buffer and write any informative messages
31463 555     555 0 2118 my ( $self, $severe_error ) = @_;
31464              
31465 555         2439 $self->flush();
31466 555         2100 my $file_writer_object = $self->[_file_writer_object_];
31467 555         3608 $file_writer_object->decrement_output_line_number()
31468             ; # fix up line number since it was incremented
31469 555         2498 we_are_at_the_last_line();
31470              
31471 555         2218 my $max_depth = $self->[_maximum_BLOCK_level_];
31472 555         1397 my $at_line = $self->[_maximum_BLOCK_level_at_line_];
31473 555         3574 write_logfile_entry(
31474             "Maximum leading structural depth is $max_depth in input at line $at_line\n"
31475             );
31476              
31477 555         1836 my $added_semicolon_count = $self->[_added_semicolon_count_];
31478 555         1695 my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
31479 555         1704 my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
31480              
31481 555 100       2287 if ( $added_semicolon_count > 0 ) {
31482 15 100       79 my $first = ( $added_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
31483 15 100       61 my $what =
31484             ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
31485 15         85 write_logfile_entry("$added_semicolon_count $what added:\n");
31486 15         132 write_logfile_entry(
31487             " $first at input line $first_added_semicolon_at\n");
31488              
31489 15 100       125 if ( $added_semicolon_count > 1 ) {
31490 3         23 write_logfile_entry(
31491             " Last at input line $last_added_semicolon_at\n");
31492             }
31493 15         74 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
31494 15         77 write_logfile_entry("\n");
31495             }
31496              
31497 555         1533 my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
31498 555         1290 my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
31499 555         1403 my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
31500 555 100       1951 if ( $deleted_semicolon_count > 0 ) {
31501 2 50       10 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
31502 2 50       11 my $what =
31503             ( $deleted_semicolon_count > 1 )
31504             ? "semicolons were"
31505             : "semicolon was";
31506 2         13 write_logfile_entry(
31507             "$deleted_semicolon_count unnecessary $what deleted:\n");
31508 2         13 write_logfile_entry(
31509             " $first at input line $first_deleted_semicolon_at\n");
31510              
31511 2 50       7 if ( $deleted_semicolon_count > 1 ) {
31512 2         11 write_logfile_entry(
31513             " Last at input line $last_deleted_semicolon_at\n");
31514             }
31515 2         172 write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
31516 2         10 write_logfile_entry("\n");
31517             }
31518              
31519 555         1428 my $embedded_tab_count = $self->[_embedded_tab_count_];
31520 555         1216 my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
31521 555         1283 my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
31522 555 50       1869 if ( $embedded_tab_count > 0 ) {
31523 0 0       0 my $first = ( $embedded_tab_count > 1 ) ? "First" : EMPTY_STRING;
31524 0 0       0 my $what =
31525             ( $embedded_tab_count > 1 )
31526             ? "quotes or patterns"
31527             : "quote or pattern";
31528 0         0 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
31529 0         0 write_logfile_entry(
31530             "This means the display of this script could vary with device or software\n"
31531             );
31532 0         0 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
31533              
31534 0 0       0 if ( $embedded_tab_count > 1 ) {
31535 0         0 write_logfile_entry(
31536             " Last at input line $last_embedded_tab_at\n");
31537             }
31538 0         0 write_logfile_entry("\n");
31539             }
31540              
31541 555         1346 my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
31542 555         1226 my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
31543 555         1223 my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
31544 555         1233 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
31545              
31546 555 50       1807 if ($first_tabbing_disagreement) {
31547 0         0 write_logfile_entry(
31548             "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
31549             );
31550             }
31551              
31552 555         1297 my $first_btd = $self->[_first_brace_tabbing_disagreement_];
31553 555 50       1769 if ($first_btd) {
31554 0         0 my $msg =
31555             "First closing brace indentation disagreement started at input line $first_btd\n";
31556 0         0 write_logfile_entry($msg);
31557              
31558             # leave a hint in the .ERR file if there was a brace error
31559 0 0       0 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
  0         0  
31560             }
31561              
31562 555         1372 my $in_btd = $self->[_in_brace_tabbing_disagreement_];
31563 555 50       1771 if ($in_btd) {
31564 0         0 my $msg =
31565             "Ending with brace indentation disagreement which started at input line $in_btd\n";
31566 0         0 write_logfile_entry($msg);
31567              
31568             # leave a hint in the .ERR file if there was a brace error
31569 0 0       0 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
  0         0  
31570             }
31571              
31572 555 50       1712 if ($in_tabbing_disagreement) {
31573 0         0 my $msg =
31574             "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
31575 0         0 write_logfile_entry($msg);
31576             }
31577             else {
31578              
31579 555 50       1441 if ($last_tabbing_disagreement) {
31580              
31581 0         0 write_logfile_entry(
31582             "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
31583             );
31584             }
31585             else {
31586 555         1388 write_logfile_entry("No indentation disagreement seen\n");
31587             }
31588             }
31589              
31590 555 50       3436 if ($first_tabbing_disagreement) {
31591 0         0 write_logfile_entry(
31592             "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
31593             );
31594             }
31595 555         2201 write_logfile_entry("\n");
31596              
31597 555         2436 my $vao = $self->[_vertical_aligner_object_];
31598 555         5208 $vao->report_anything_unusual();
31599              
31600 555         3427 $file_writer_object->report_line_length_errors();
31601              
31602             # Define the formatter self-check for convergence.
31603             $self->[_converged_] =
31604             $severe_error
31605             || $file_writer_object->get_convergence_check()
31606 555   100     5092 || $rOpts->{'indent-only'};
31607              
31608 555         1579 return;
31609             } ## end sub wrapup
31610              
31611             } ## end package Perl::Tidy::Formatter
31612             1;