File Coverage

blib/lib/Perl/Tidy/VerticalAligner.pm
Criterion Covered Total %
statement 1912 2063 92.6
branch 668 826 80.8
condition 421 544 77.3
subroutine 98 110 89.0
pod 0 71 0.0
total 3099 3614 85.7


line stmt bran cond sub pod time code
1             package Perl::Tidy::VerticalAligner;
2 38     38   309 use strict;
  38         80  
  38         1488  
3 38     38   249 use warnings;
  38         93  
  38         1371  
4 38     38   208 use Carp;
  38         70  
  38         2911  
5 38     38   268 use English qw( -no_match_vars );
  38         84  
  38         390  
6             our $VERSION = '20230701';
7 38     38   30252 use Perl::Tidy::VerticalAligner::Alignment;
  38         116  
  38         1365  
8 38     38   15246 use Perl::Tidy::VerticalAligner::Line;
  38         143  
  38         1362  
9              
10 38     38   262 use constant DEVEL_MODE => 0;
  38         97  
  38         2395  
11 38     38   272 use constant EMPTY_STRING => q{};
  38         95  
  38         1784  
12 38     38   220 use constant SPACE => q{ };
  38         80  
  38         15693  
13              
14             # The Perl::Tidy::VerticalAligner package collects output lines and
15             # attempts to line up certain common tokens, such as => and #, which are
16             # identified by the calling routine.
17             #
18             # Usage:
19             # - Initiate an object with a call to new().
20             # - Write lines one-by-one with calls to valign_input().
21             # - Make a final call to flush() to empty the pipeline.
22             #
23             # The sub valign_input collects lines into groups. When a group reaches
24             # the maximum possible size it is processed for alignment and output.
25             # The maximum group size is reached whenever there is a change in indentation
26             # level, a blank line, a block comment, or an external flush call. The calling
27             # routine may also force a break in alignment at any time.
28             #
29             # If the calling routine needs to interrupt the output and send other text to
30             # the output, it must first call flush() to empty the output pipeline. This
31             # might occur for example if a block of pod text needs to be sent to the output
32             # between blocks of code.
33              
34             # It is essential that a final call to flush() be made. Otherwise some
35             # final lines of text will be lost.
36              
37             # Index...
38             # CODE SECTION 1: Preliminary code, global definitions and sub new
39             # sub new
40             # CODE SECTION 2: Some Basic Utilities
41             # CODE SECTION 3: Code to accept input and form groups
42             # sub valign_input
43             # CODE SECTION 4: Code to process comment lines
44             # sub _flush_comment_lines
45             # CODE SECTION 5: Code to process groups of code lines
46             # sub _flush_group_lines
47             # CODE SECTION 6: Output Step A
48             # sub valign_output_step_A
49             # CODE SECTION 7: Output Step B
50             # sub valign_output_step_B
51             # CODE SECTION 8: Output Step C
52             # sub valign_output_step_C
53             # CODE SECTION 9: Output Step D
54             # sub valign_output_step_D
55             # CODE SECTION 10: Summary
56             # sub report_anything_unusual
57              
58             ##################################################################
59             # CODE SECTION 1: Preliminary code, global definitions and sub new
60             ##################################################################
61              
62             sub AUTOLOAD {
63              
64             # Catch any undefined sub calls so that we are sure to get
65             # some diagnostic information. This sub should never be called
66             # except for a programming error.
67 0     0   0 our $AUTOLOAD;
68 0 0       0 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
69 0         0 my ( $pkg, $fname, $lno ) = caller();
70 0         0 my $my_package = __PACKAGE__;
71 0         0 print STDERR <<EOM;
72             ======================================================================
73             Error detected in package '$my_package', version $VERSION
74             Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
75             Called from package: '$pkg'
76             Called from File '$fname' at line '$lno'
77             This error is probably due to a recent programming change
78             ======================================================================
79             EOM
80 0         0 exit 1;
81             } ## end sub AUTOLOAD
82              
83       0     sub DESTROY {
84              
85             # required to avoid call to AUTOLOAD in some versions of perl
86             }
87              
88             sub Die {
89 0     0 0 0 my ($msg) = @_;
90 0         0 Perl::Tidy::Die($msg);
91 0         0 croak "unexpected return from Perl::Tidy::Die";
92             }
93              
94             sub Fault {
95 0     0 0 0 my ($msg) = @_;
96              
97             # This routine is called for errors that really should not occur
98             # except if there has been a bug introduced by a recent program change.
99             # Please add comments at calls to Fault to explain why the call
100             # should not occur, and where to look to fix it.
101 0         0 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
102 0         0 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
103 0         0 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
104 0         0 my $pkg = __PACKAGE__;
105              
106 0         0 my $input_stream_name = get_input_stream_name();
107              
108 0         0 Die(<<EOM);
109             ==============================================================================
110             While operating on input stream with name: '$input_stream_name'
111             A fault was detected at line $line0 of sub '$subroutine1'
112             in file '$filename1'
113             which was called from line $line1 of sub '$subroutine2'
114             Message: '$msg'
115             This is probably an error introduced by a recent programming change.
116             $pkg reports VERSION='$VERSION'.
117             ==============================================================================
118             EOM
119              
120             # We shouldn't get here, but this return is to keep Perl-Critic from
121             # complaining.
122 0         0 return;
123             } ## end sub Fault
124              
125             my %valid_LINE_keys;
126              
127             BEGIN {
128              
129             # define valid keys in a line object
130 38     38   298 my @q = qw(
131             jmax
132             rtokens
133             rfields
134             rfield_lengths
135             rpatterns
136             indentation
137             leading_space_count
138             outdent_long_lines
139             list_type
140             list_seqno
141             is_hanging_side_comment
142             maximum_line_length
143             rvertical_tightness_flags
144             is_terminal_ternary
145             j_terminal_match
146             end_group
147             Kend
148             ci_level
149             level
150             level_end
151             imax_pair
152              
153             ralignments
154             );
155              
156 38         5103 @valid_LINE_keys{@q} = (1) x scalar(@q);
157             } ## end BEGIN
158              
159             BEGIN {
160              
161             # Define the fixed indexes for variables in $self, which is an array
162             # reference. Note the convention of leading and trailing underscores to
163             # keep them unique.
164             # Do not combine with other BEGIN blocks (c101).
165 38     38   166 my $i = 0;
166             use constant {
167 38         11336 _file_writer_object_ => $i++,
168             _logger_object_ => $i++,
169             _diagnostics_object_ => $i++,
170             _length_function_ => $i++,
171              
172             _rOpts_ => $i++,
173             _rOpts_indent_columns_ => $i++,
174             _rOpts_tabs_ => $i++,
175             _rOpts_entab_leading_whitespace_ => $i++,
176             _rOpts_fixed_position_side_comment_ => $i++,
177             _rOpts_minimum_space_to_comment_ => $i++,
178             _rOpts_valign_code_ => $i++,
179             _rOpts_valign_block_comments_ => $i++,
180             _rOpts_valign_side_comments_ => $i++,
181              
182             _last_level_written_ => $i++,
183             _last_side_comment_column_ => $i++,
184             _last_side_comment_line_number_ => $i++,
185             _last_side_comment_length_ => $i++,
186             _last_side_comment_level_ => $i++,
187             _outdented_line_count_ => $i++,
188             _first_outdented_line_at_ => $i++,
189             _last_outdented_line_at_ => $i++,
190             _consecutive_block_comments_ => $i++,
191              
192             _rgroup_lines_ => $i++,
193             _group_level_ => $i++,
194             _group_type_ => $i++,
195             _group_maximum_line_length_ => $i++,
196             _zero_count_ => $i++,
197             _last_leading_space_count_ => $i++,
198             _comment_leading_space_count_ => $i++,
199 38     38   306 };
  38         104  
200              
201             # Debug flag. This is a relic from the original program development
202             # looking for problems with tab characters. Caution: this debug flag can
203             # produce a lot of output It should be 0 except when debugging small
204             # scripts.
205              
206 38     38   302 use constant DEBUG_TABS => 0;
  38         82  
  38         4273  
207              
208             my $debug_warning = sub {
209 0         0 print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
210 0         0 return;
211 38         164 };
212              
213 38         51606 DEBUG_TABS && $debug_warning->('TABS');
214             } ## end BEGIN
215              
216             # GLOBAL variables
217             my (
218              
219             %valign_control_hash,
220             $valign_control_default,
221              
222             );
223              
224             sub check_options {
225              
226             # This routine is called to check the user-supplied run parameters
227             # and to configure the control hashes to them.
228 554     554 0 1928 my ($rOpts) = @_;
229              
230             # All alignments are done by default
231 554         1653 %valign_control_hash = ();
232 554         1326 $valign_control_default = 1;
233              
234             # If -vil=s is entered without -vxl, assume -vxl='*'
235 554 50 66     4457 if ( !$rOpts->{'valign-exclusion-list'}
236             && $rOpts->{'valign-inclusion-list'} )
237             {
238 0         0 $rOpts->{'valign-exclusion-list'} = '*';
239             }
240              
241             # See if the user wants to exclude any alignment types ...
242 554 100       2458 if ( $rOpts->{'valign-exclusion-list'} ) {
243              
244             # The inclusion list is only relevant if there is an exclusion list
245 3 100       15 if ( $rOpts->{'valign-inclusion-list'} ) {
246 1         7 my @vil = split /\s+/, $rOpts->{'valign-inclusion-list'};
247 1         5 @valign_control_hash{@vil} = (1) x scalar(@vil);
248             }
249              
250             # Note that the -vxl list is done after -vil, so -vxl has priority
251             # in the event of duplicate entries.
252 3         18 my @vxl = split /\s+/, $rOpts->{'valign-exclusion-list'};
253 3         16 @valign_control_hash{@vxl} = (0) x scalar(@vxl);
254              
255             # Optimization: revert to defaults if no exclusions.
256             # This could happen with -vxl=' ' and any -vil list
257 3 50       15 if ( !@vxl ) {
258 0         0 %valign_control_hash = ();
259             }
260              
261             # '$valign_control_default' applies to types not in the hash:
262             # - If a '*' was entered then set it to be that default type
263             # - Otherwise, leave it set it to 1
264 3 100       18 if ( defined( $valign_control_hash{'*'} ) ) {
265 1         3 $valign_control_default = $valign_control_hash{'*'};
266             }
267              
268             # Side comments are controlled separately and must be removed
269             # if given in a list.
270 3 50       12 if (%valign_control_hash) {
271 3         10 $valign_control_hash{'#'} = 1;
272             }
273             }
274              
275 554         1438 return;
276             } ## end sub check_options
277              
278             sub check_keys {
279 0     0 0 0 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
280              
281             # Check the keys of a hash:
282             # $rtest = ref to hash to test
283             # $rvalid = ref to hash with valid keys
284              
285             # $msg = a message to write in case of error
286             # $exact_match defines the type of check:
287             # = false: test hash must not have unknown key
288             # = true: test hash must have exactly same keys as known hash
289             my @unknown_keys =
290 0         0 grep { !exists $rvalid->{$_} } keys %{$rtest};
  0         0  
  0         0  
291             my @missing_keys =
292 0         0 grep { !exists $rtest->{$_} } keys %{$rvalid};
  0         0  
  0         0  
293 0         0 my $error = @unknown_keys;
294 0 0 0     0 if ($exact_match) { $error ||= @missing_keys }
  0         0  
295 0 0       0 if ($error) {
296 0         0 local $LIST_SEPARATOR = ')(';
297 0         0 my @expected_keys = sort keys %{$rvalid};
  0         0  
298 0         0 @unknown_keys = sort @unknown_keys;
299 0         0 Fault(<<EOM);
300             ------------------------------------------------------------------------
301             Program error detected checking hash keys
302             Message is: '$msg'
303             Expected keys: (@expected_keys)
304             Unknown key(s): (@unknown_keys)
305             Missing key(s): (@missing_keys)
306             ------------------------------------------------------------------------
307             EOM
308             }
309 0         0 return;
310             } ## end sub check_keys
311              
312             sub new {
313              
314 555     555 0 3068 my ( $class, @args ) = @_;
315              
316             my %defaults = (
317             rOpts => undef,
318             file_writer_object => undef,
319             logger_object => undef,
320             diagnostics_object => undef,
321 0     0   0 length_function => sub { return length( $_[0] ) },
322 555         5887 );
323 555         3813 my %args = ( %defaults, @args );
324              
325             # Initialize other caches and buffers
326 555         3800 initialize_step_B_cache();
327 555         2991 initialize_valign_buffer();
328 555         2944 initialize_decode();
329 555         3024 set_logger_object( $args{logger_object} );
330              
331             # Initialize all variables in $self.
332             # To add an item to $self, first define a new constant index in the BEGIN
333             # section.
334 555         1571 my $self = [];
335              
336             # objects
337 555         1876 $self->[_file_writer_object_] = $args{file_writer_object};
338 555         1568 $self->[_logger_object_] = $args{logger_object};
339 555         1556 $self->[_diagnostics_object_] = $args{diagnostics_object};
340 555         1859 $self->[_length_function_] = $args{length_function};
341              
342             # shortcuts to user options
343 555         1418 my $rOpts = $args{rOpts};
344              
345 555         1438 $self->[_rOpts_] = $rOpts;
346 555         1633 $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
347 555         1537 $self->[_rOpts_tabs_] = $rOpts->{'tabs'};
348             $self->[_rOpts_entab_leading_whitespace_] =
349 555         1440 $rOpts->{'entab-leading-whitespace'};
350             $self->[_rOpts_fixed_position_side_comment_] =
351 555         1448 $rOpts->{'fixed-position-side-comment'};
352             $self->[_rOpts_minimum_space_to_comment_] =
353 555         1473 $rOpts->{'minimum-space-to-comment'};
354 555         1494 $self->[_rOpts_valign_code_] = $rOpts->{'valign-code'};
355 555         1370 $self->[_rOpts_valign_block_comments_] = $rOpts->{'valign-block-comments'};
356 555         1485 $self->[_rOpts_valign_side_comments_] = $rOpts->{'valign-side-comments'};
357              
358             # Batch of lines being collected
359 555         1686 $self->[_rgroup_lines_] = [];
360 555         1405 $self->[_group_level_] = 0;
361 555         1448 $self->[_group_type_] = EMPTY_STRING;
362 555         1677 $self->[_group_maximum_line_length_] = undef;
363 555         1507 $self->[_zero_count_] = 0;
364 555         1439 $self->[_comment_leading_space_count_] = 0;
365 555         1399 $self->[_last_leading_space_count_] = 0;
366              
367             # Memory of what has been processed
368 555         1275 $self->[_last_level_written_] = -1;
369 555         1262 $self->[_last_side_comment_column_] = 0;
370 555         1288 $self->[_last_side_comment_line_number_] = 0;
371 555         1213 $self->[_last_side_comment_length_] = 0;
372 555         1288 $self->[_last_side_comment_level_] = -1;
373 555         1224 $self->[_outdented_line_count_] = 0;
374 555         1249 $self->[_first_outdented_line_at_] = 0;
375 555         1243 $self->[_last_outdented_line_at_] = 0;
376 555         1217 $self->[_consecutive_block_comments_] = 0;
377              
378 555         1346 bless $self, $class;
379 555         4746 return $self;
380             } ## end sub new
381              
382             #################################
383             # CODE SECTION 2: Basic Utilities
384             #################################
385              
386             sub flush {
387              
388             # flush() is the external call to completely empty the pipeline.
389 1806     1806 0 3958 my ($self) = @_;
390              
391             # push things out the pipeline...
392              
393             # push out any current group lines
394 1806         5914 $self->_flush_group_lines();
395              
396             # then anything left in the cache of step_B
397 1806         7230 $self->_flush_step_B_cache();
398              
399             # then anything left in the buffer of step_C
400 1806         5568 $self->dump_valign_buffer();
401              
402 1806         3591 return;
403             } ## end sub flush
404              
405             sub initialize_for_new_group {
406 2234     2234 0 4889 my ($self) = @_;
407              
408 2234         5042 $self->[_rgroup_lines_] = [];
409 2234         4488 $self->[_group_type_] = EMPTY_STRING;
410 2234         3967 $self->[_zero_count_] = 0;
411 2234         3855 $self->[_comment_leading_space_count_] = 0;
412 2234         3672 $self->[_last_leading_space_count_] = 0;
413 2234         3852 $self->[_group_maximum_line_length_] = undef;
414              
415             # Note that the value for _group_level_ is
416             # handled separately in sub valign_input
417 2234         3735 return;
418             } ## end sub initialize_for_new_group
419              
420             sub group_line_count {
421 73     73 0 113 return +@{ $_[0]->[_rgroup_lines_] };
  73         390  
422             }
423              
424             # interface to Perl::Tidy::Diagnostics routines
425             # For debugging; not currently used
426             sub write_diagnostics {
427 0     0 0 0 my ( $self, $msg ) = @_;
428 0         0 my $diagnostics_object = $self->[_diagnostics_object_];
429 0 0       0 if ($diagnostics_object) {
430 0         0 $diagnostics_object->write_diagnostics($msg);
431             }
432 0         0 return;
433             } ## end sub write_diagnostics
434              
435             { ## begin closure for logger routines
436             my $logger_object;
437              
438             # Called once per file to initialize the logger object
439             sub set_logger_object {
440 555     555 0 21832 $logger_object = shift;
441 555         1358 return;
442             }
443              
444             sub get_logger_object {
445 0     0 0 0 return $logger_object;
446             }
447              
448             sub get_input_stream_name {
449 0     0 0 0 my $input_stream_name = EMPTY_STRING;
450 0 0       0 if ($logger_object) {
451 0         0 $input_stream_name = $logger_object->get_input_stream_name();
452             }
453 0         0 return $input_stream_name;
454             } ## end sub get_input_stream_name
455              
456             sub warning {
457 0     0 0 0 my ($msg) = @_;
458 0 0       0 if ($logger_object) {
459 0         0 $logger_object->warning($msg);
460             }
461 0         0 return;
462             } ## end sub warning
463              
464             sub write_logfile_entry {
465 91     91 0 207 my ($msg) = @_;
466 91 50       219 if ($logger_object) {
467 91         224 $logger_object->write_logfile_entry($msg);
468             }
469 91         163 return;
470             } ## end sub write_logfile_entry
471             }
472              
473             sub get_cached_line_count {
474 1     1 0 3 my $self = shift;
475 1 50       4 return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
476             }
477              
478             sub get_recoverable_spaces {
479              
480             # return the number of spaces (+ means shift right, - means shift left)
481             # that we would like to shift a group of lines with the same indentation
482             # to get them to line up with their opening parens
483 4107     4107 0 7117 my $indentation = shift;
484 4107 100       14916 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
485             } ## end sub get_recoverable_spaces
486              
487             ######################################################
488             # CODE SECTION 3: Code to accept input and form groups
489             ######################################################
490              
491 38     38   377 use constant DEBUG_VALIGN => 0;
  38         125  
  38         2604  
492 38     38   298 use constant SC_LONG_LINE_DIFF => 12;
  38         106  
  38         3461  
493              
494             my %is_closing_token;
495              
496             BEGIN {
497 38     38   244 my @q = qw< } ) ] >;
498 38         62479 @is_closing_token{@q} = (1) x scalar(@q);
499             }
500              
501             #--------------------------------------------
502             # VTFLAGS: Vertical tightness types and flags
503             #--------------------------------------------
504             # Vertical tightness is controlled by a 'type' and associated 'flags' for each
505             # line. These values are set by sub Formatter::set_vertical_tightness_flags.
506             # These are defined as follows:
507              
508             # Vertical Tightness Line Type Codes:
509             # Type 0, no vertical tightness condition
510             # Type 1, last token of this line is a non-block opening token
511             # Type 2, first token of next line is a non-block closing
512             # Type 3, isolated opening block brace
513             # type 4, isolated closing block brace
514              
515             # Opening token flag values are the vertical tightness flags
516             # 0 do not join with next line
517             # 1 just one join per line
518             # 2 any number of joins
519              
520             # Closing token flag values indicate spacing:
521             # 0 = no space added before closing token
522             # 1 = single space added before closing token
523              
524             sub valign_input {
525              
526             #---------------------------------------------------------------------
527             # This is the front door of the vertical aligner. On each call
528             # we receive one line of specially marked text for vertical alignment.
529             # We compare the line with the current group, and either:
530             # - the line joins the current group if alignments match, or
531             # - the current group is flushed and a new group is started otherwise
532             #---------------------------------------------------------------------
533             #
534             # The key input parameters describing each line are:
535             # $level = indentation level of this line
536             # $rfields = ref to array of fields
537             # $rpatterns = ref to array of patterns, one per field
538             # $rtokens = ref to array of tokens starting fields 1,2,..
539             # $rfield_lengths = ref to array of field display widths
540             #
541             # Here is an example of what this package does. In this example,
542             # we are trying to line up both the '=>' and the '#'.
543             #
544             # '18' => 'grave', # \`
545             # '19' => 'acute', # `'
546             # '20' => 'caron', # \v
547             # <-tabs-><f1-><--field 2 ---><-f3->
548             # | | | |
549             # | | | |
550             # col1 col2 col3 col4
551             #
552             # The calling routine has already broken the entire line into 3 fields as
553             # indicated. (So the work of identifying promising common tokens has
554             # already been done).
555             #
556             # In this example, there will be 2 tokens being matched: '=>' and '#'.
557             # They are the leading parts of fields 2 and 3, but we do need to know
558             # what they are so that we can dump a group of lines when these tokens
559             # change.
560             #
561             # The fields contain the actual characters of each field. The patterns
562             # are like the fields, but they contain mainly token types instead
563             # of tokens, so they have fewer characters. They are used to be
564             # sure we are matching fields of similar type.
565             #
566             # In this example, there will be 4 column indexes being adjusted. The
567             # first one is always at zero. The interior columns are at the start of
568             # the matching tokens, and the last one tracks the maximum line length.
569             #
570             # Each time a new line comes in, it joins the current vertical
571             # group if possible. Otherwise it causes the current group to be flushed
572             # and a new group is started.
573             #
574             # For each new group member, the column locations are increased, as
575             # necessary, to make room for the new fields. When the group is finally
576             # output, these column numbers are used to compute the amount of spaces of
577             # padding needed for each field.
578             #
579             # Programming note: the fields are assumed not to have any tab characters.
580             # Tabs have been previously removed except for tabs in quoted strings and
581             # side comments. Tabs in these fields can mess up the column counting.
582             # The log file warns the user if there are any such tabs.
583              
584 7366     7366 0 15652 my ( $self, $rcall_hash ) = @_;
585              
586             # Unpack the call args. This form is significantly faster than getting them
587             # one-by-one.
588             my (
589              
590             $Kend,
591             $break_alignment_after,
592             $break_alignment_before,
593             $ci_level,
594             $forget_side_comment,
595             $indentation,
596             $is_terminal_ternary,
597             $level,
598             $level_end,
599             $list_seqno,
600             $maximum_line_length,
601             $outdent_long_lines,
602             $rline_alignment,
603             $rvertical_tightness_flags,
604              
605             ) =
606              
607 7366         28864 @{$rcall_hash}{
608 7366         13696 qw(
609             Kend
610             break_alignment_after
611             break_alignment_before
612             ci_level
613             forget_side_comment
614             indentation
615             is_terminal_ternary
616             level
617             level_end
618             list_seqno
619             maximum_line_length
620             outdent_long_lines
621             rline_alignment
622             rvertical_tightness_flags
623             )
624             };
625              
626             my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
627 7366         12061 @{$rline_alignment};
  7366         14354  
628              
629             # The index '$Kend' is a value which passed along with the line text to sub
630             # 'write_code_line' for a convergence check.
631              
632             # number of fields is $jmax
633             # number of tokens between fields is $jmax-1
634 7366         11015 my $jmax = @{$rfields} - 1;
  7366         12509  
635              
636 7366 100       16238 my $leading_space_count =
637             ref($indentation) ? $indentation->get_spaces() : $indentation;
638              
639             # set outdented flag to be sure we either align within statements or
640             # across statement boundaries, but not both.
641 7366         14013 my $is_outdented =
642             $self->[_last_leading_space_count_] > $leading_space_count;
643 7366         11988 $self->[_last_leading_space_count_] = $leading_space_count;
644              
645             # Identify a hanging side comment. Hanging side comments have an empty
646             # initial field.
647 7366   100     24333 my $is_hanging_side_comment =
648             ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
649              
650             # Undo outdented flag for a hanging side comment
651 7366 100       14980 $is_outdented = 0 if $is_hanging_side_comment;
652              
653             # Identify a block comment.
654 7366   100     23777 my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
655              
656             # Block comment .. update count
657 7366 100       14399 if ($is_block_comment) {
658 632         1297 $self->[_consecutive_block_comments_]++;
659             }
660              
661             # Not a block comment ..
662             # Forget side comment column if we saw 2 or more block comments,
663             # and reset the count
664             else {
665              
666 6734 100       14696 if ( $self->[_consecutive_block_comments_] > 1 ) {
667 67         439 $self->forget_side_comment();
668             }
669 6734         10941 $self->[_consecutive_block_comments_] = 0;
670             }
671              
672             # Reset side comment location if we are entering a new block from level 0.
673             # This is intended to keep them from drifting too far to the right.
674 7366 100       14455 if ($forget_side_comment) {
675 44         240 $self->forget_side_comment();
676             }
677              
678 7366         12025 my $is_balanced_line = $level_end == $level;
679              
680 7366         11873 my $group_level = $self->[_group_level_];
681 7366         11561 my $group_maximum_line_length = $self->[_group_maximum_line_length_];
682              
683 7366         9757 DEBUG_VALIGN && do {
684             my $nlines = $self->group_line_count();
685             print STDOUT
686             "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level=$level, group_level=$group_level, level_end=$level_end\n";
687             };
688              
689             # Validate cached line if necessary: If we can produce a container
690             # with just 2 lines total by combining an existing cached opening
691             # token with the closing token to follow, then we will mark both
692             # cached flags as valid.
693 7366         17843 my $cached_line_type = get_cached_line_type();
694 7366 100       15915 if ($cached_line_type) {
695 224         721 my $cached_line_opening_flag = get_cached_line_opening_flag();
696 224 50       618 if ($rvertical_tightness_flags) {
697 224         558 my $cached_seqno = get_cached_seqno();
698 224 100 100     1272 if ( $cached_seqno
      100        
699             && $rvertical_tightness_flags->{_vt_seqno}
700             && $rvertical_tightness_flags->{_vt_seqno} == $cached_seqno )
701             {
702              
703             # Fix for b1187 and b1188: Normally this step is only done
704             # if the number of existing lines is 0 or 1. But to prevent
705             # blinking, this range can be controlled by the caller.
706             # If zero values are given we fall back on the range 0 to 1.
707 4         25 my $line_count = $self->group_line_count();
708 4         13 my $min_lines = $rvertical_tightness_flags->{_vt_min_lines};
709 4         10 my $max_lines = $rvertical_tightness_flags->{_vt_max_lines};
710 4 50       18 $min_lines = 0 unless ($min_lines);
711 4 50       16 $max_lines = 1 unless ($max_lines);
712 4 100 66     28 if ( ( $line_count >= $min_lines )
713             && ( $line_count <= $max_lines ) )
714             {
715 3   50     31 $rvertical_tightness_flags->{_vt_valid_flag} ||= 1;
716 3         19 set_cached_line_valid(1);
717             }
718             }
719             }
720              
721             # do not join an opening block brace (type 3, see VTFLAGS)
722             # with an unbalanced line unless requested with a flag value of 2
723 224 50 100     693 if ( $cached_line_type == 3
      66        
      66        
724             && !$self->group_line_count()
725             && $cached_line_opening_flag < 2
726             && !$is_balanced_line )
727             {
728 0         0 set_cached_line_valid(0);
729             }
730             }
731              
732             # shouldn't happen:
733 7366 50       15745 if ( $level < 0 ) { $level = 0 }
  0         0  
734              
735             # do not align code across indentation level changes
736             # or changes in the maximum line length
737             # or if vertical alignment is turned off
738 7366 100 66     56883 if (
      66        
      66        
      100        
      100        
      100        
      100        
739             $level != $group_level
740             || ( $group_maximum_line_length
741             && $maximum_line_length != $group_maximum_line_length )
742             || $is_outdented
743             || ( $is_block_comment && !$self->[_rOpts_valign_block_comments_] )
744             || ( !$is_block_comment
745             && !$self->[_rOpts_valign_side_comments_]
746             && !$self->[_rOpts_valign_code_] )
747             )
748             {
749              
750 2833         11157 $self->_flush_group_lines( $level - $group_level );
751              
752 2833         5111 $group_level = $level;
753 2833         5135 $self->[_group_level_] = $group_level;
754 2833         4637 $self->[_group_maximum_line_length_] = $maximum_line_length;
755              
756             # Update leading spaces after the above flush because the leading space
757             # count may have been changed if the -icp flag is in effect
758 2833 100       6490 $leading_space_count =
759             ref($indentation) ? $indentation->get_spaces() : $indentation;
760             }
761              
762             # --------------------------------------------------------------------
763             # Collect outdentable block COMMENTS
764             # --------------------------------------------------------------------
765 7366 100       17277 if ( $self->[_group_type_] eq 'COMMENT' ) {
766 558 100 66     2993 if ( $is_block_comment
      66        
767             && $outdent_long_lines
768             && $leading_space_count == $self->[_comment_leading_space_count_] )
769             {
770              
771             # Note that for a comment group we are not storing a line
772             # but rather just the text and its length.
773 77         168 push @{ $self->[_rgroup_lines_] },
  77         358  
774             [ $rfields->[0], $rfield_lengths->[0], $Kend ];
775 77         286 return;
776             }
777             else {
778 481         2072 $self->_flush_group_lines();
779             }
780             }
781              
782 7289         11923 my $rgroup_lines = $self->[_rgroup_lines_];
783 7289 100 100     17225 if ( $break_alignment_before && @{$rgroup_lines} ) {
  111         496  
784 27         121 $rgroup_lines->[-1]->{'end_group'} = 1;
785             }
786              
787             # --------------------------------------------------------------------
788             # add dummy fields for terminal ternary
789             # --------------------------------------------------------------------
790 7289         10382 my $j_terminal_match;
791              
792 7289 100 100     15467 if ( $is_terminal_ternary && @{$rgroup_lines} ) {
  16         114  
793 13         89 $j_terminal_match =
794             fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
795             $rpatterns, $rfield_lengths, $group_level, );
796 13         37 $jmax = @{$rfields} - 1;
  13         48  
797             }
798              
799             # --------------------------------------------------------------------
800             # add dummy fields for else statement
801             # --------------------------------------------------------------------
802              
803             # Note the trailing space after 'else' here. If there were no space between
804             # the else and the next '{' then we would not be able to do vertical
805             # alignment of the '{'.
806 7289 100 100     17978 if ( $rfields->[0] eq 'else '
      66        
807 12         103 && @{$rgroup_lines}
808             && $is_balanced_line )
809             {
810              
811 9         73 $j_terminal_match =
812             fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
813             $rpatterns, $rfield_lengths );
814 9         23 $jmax = @{$rfields} - 1;
  9         31  
815             }
816              
817             # --------------------------------------------------------------------
818             # Handle simple line of code with no fields to match.
819             # --------------------------------------------------------------------
820 7289 100       14478 if ( $jmax <= 0 ) {
821 4272         7606 $self->[_zero_count_]++;
822              
823 4272 100 100     6185 if ( @{$rgroup_lines}
  4272         12809  
824             && !get_recoverable_spaces( $rgroup_lines->[0]->{'indentation'} ) )
825             {
826              
827             # flush the current group if it has some aligned columns..
828             # or we haven't seen a comment lately
829 337 100 100     1593 if ( $rgroup_lines->[0]->{'jmax'} > 1
830             || $self->[_zero_count_] > 3 )
831             {
832 308         1192 $self->_flush_group_lines();
833              
834             # Update '$rgroup_lines' - it will become a ref to empty array.
835             # This allows avoiding a call to get_group_line_count below.
836 308         894 $rgroup_lines = $self->[_rgroup_lines_];
837             }
838             }
839              
840             # start new COMMENT group if this comment may be outdented
841 4272 100 100     12427 if ( $is_block_comment
      66        
842             && $outdent_long_lines
843 531         1954 && !@{$rgroup_lines} )
844             {
845 531         1284 $self->[_group_type_] = 'COMMENT';
846 531         1091 $self->[_comment_leading_space_count_] = $leading_space_count;
847 531         1031 $self->[_group_maximum_line_length_] = $maximum_line_length;
848 531         969 push @{$rgroup_lines},
  531         1984  
849             [ $rfields->[0], $rfield_lengths->[0], $Kend ];
850 531         1995 return;
851             }
852              
853             # just write this line directly if no current group, no side comment,
854             # and no space recovery is needed.
855 3741 100 100     5801 if ( !@{$rgroup_lines}
  3741         12670  
856             && !get_recoverable_spaces($indentation) )
857             {
858              
859 3697         33567 $self->valign_output_step_B(
860             {
861             leading_space_count => $leading_space_count,
862             line => $rfields->[0],
863             line_length => $rfield_lengths->[0],
864             side_comment_length => 0,
865             outdent_long_lines => $outdent_long_lines,
866             rvertical_tightness_flags => $rvertical_tightness_flags,
867             level => $level,
868             level_end => $level_end,
869             Kend => $Kend,
870             maximum_line_length => $maximum_line_length,
871             }
872             );
873 3697         16776 return;
874             }
875             }
876             else {
877 3017         5917 $self->[_zero_count_] = 0;
878             }
879              
880             # --------------------------------------------------------------------
881             # It simplifies things to create a zero length side comment
882             # if none exists.
883             # --------------------------------------------------------------------
884 3061 100 100     13492 if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
885 2736         4755 $jmax += 1;
886 2736         5809 $rtokens->[ $jmax - 1 ] = '#';
887 2736         5465 $rfields->[$jmax] = EMPTY_STRING;
888 2736         4816 $rfield_lengths->[$jmax] = 0;
889 2736         5644 $rpatterns->[$jmax] = '#';
890             }
891              
892             # --------------------------------------------------------------------
893             # create an object to hold this line
894             # --------------------------------------------------------------------
895              
896             # The hash keys below must match the list of keys in %valid_LINE_keys.
897             # Values in this hash are accessed directly, except for 'ralignments',
898             # rather than with get/set calls for efficiency.
899 3061         52591 my $new_line = Perl::Tidy::VerticalAligner::Line->new(
900             {
901             jmax => $jmax,
902             rtokens => $rtokens,
903             rfields => $rfields,
904             rpatterns => $rpatterns,
905             rfield_lengths => $rfield_lengths,
906             indentation => $indentation,
907             leading_space_count => $leading_space_count,
908             outdent_long_lines => $outdent_long_lines,
909             list_seqno => $list_seqno,
910             list_type => EMPTY_STRING,
911             is_hanging_side_comment => $is_hanging_side_comment,
912             rvertical_tightness_flags => $rvertical_tightness_flags,
913             is_terminal_ternary => $is_terminal_ternary,
914             j_terminal_match => $j_terminal_match,
915             end_group => $break_alignment_after,
916             Kend => $Kend,
917             ci_level => $ci_level,
918             level => $level,
919             level_end => $level_end,
920             imax_pair => -1,
921             maximum_line_length => $maximum_line_length,
922              
923             ralignments => [],
924             }
925             );
926              
927 3061         5119 DEVEL_MODE
928             && check_keys( $new_line, \%valid_LINE_keys,
929             "Checking line keys at line definition", 1 );
930              
931             # --------------------------------------------------------------------
932             # Decide if this is a simple list of items.
933             # We use this to be less restrictive in deciding what to align.
934             # --------------------------------------------------------------------
935 3061 100       8820 decide_if_list($new_line) if ($list_seqno);
936              
937             # --------------------------------------------------------------------
938             # Append this line to the current group (or start new group)
939             # --------------------------------------------------------------------
940              
941 3061         4701 push @{ $self->[_rgroup_lines_] }, $new_line;
  3061         7532  
942 3061         5878 $self->[_group_maximum_line_length_] = $maximum_line_length;
943              
944             # output this group if it ends in a terminal else or ternary line
945 3061 100 100     17458 if ( defined($j_terminal_match) ) {
    100          
946 20         126 $self->_flush_group_lines();
947             }
948              
949             # Force break after jump to lower level
950             elsif ($level_end < $level
951             || $is_closing_token{ substr( $rfields->[0], 0, 1 ) } )
952             {
953 119         564 $self->_flush_group_lines(-1);
954             }
955              
956             # --------------------------------------------------------------------
957             # Some old debugging stuff
958             # --------------------------------------------------------------------
959 3061         4833 DEBUG_VALIGN && do {
960             print STDOUT "exiting valign_input fields:";
961             dump_array( @{$rfields} );
962             print STDOUT "exiting valign_input tokens:";
963             dump_array( @{$rtokens} );
964             print STDOUT "exiting valign_input patterns:";
965             dump_array( @{$rpatterns} );
966             };
967              
968 3061         9402 return;
969             } ## end sub valign_input
970              
971             sub join_hanging_comment {
972              
973             # Add dummy fields to a hanging side comment to make it look
974             # like the first line in its potential group. This simplifies
975             # the coding.
976 38     38 0 102 my ( $new_line, $old_line ) = @_;
977              
978 38         86 my $jmax = $new_line->{'jmax'};
979              
980             # must be 2 fields
981 38 50       113 return 0 unless $jmax == 1;
982 38         80 my $rtokens = $new_line->{'rtokens'};
983              
984             # the second field must be a comment
985 38 50       132 return 0 unless $rtokens->[0] eq '#';
986 38         71 my $rfields = $new_line->{'rfields'};
987              
988             # the first field must be empty
989 38 50       222 return 0 unless $rfields->[0] =~ /^\s*$/;
990              
991             # the current line must have fewer fields
992 38         94 my $maximum_field_index = $old_line->{'jmax'};
993 38 100       139 return 0
994             unless $maximum_field_index > $jmax;
995              
996             # looks ok..
997 3         9 my $rpatterns = $new_line->{'rpatterns'};
998 3         5 my $rfield_lengths = $new_line->{'rfield_lengths'};
999              
1000 3         9 $new_line->{'is_hanging_side_comment'} = 1;
1001              
1002 3         6 $jmax = $maximum_field_index;
1003 3         9 $new_line->{'jmax'} = $jmax;
1004 3         6 $rfields->[$jmax] = $rfields->[1];
1005 3         6 $rfield_lengths->[$jmax] = $rfield_lengths->[1];
1006 3         12 $rtokens->[ $jmax - 1 ] = $rtokens->[0];
1007 3         11 $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
1008              
1009 3         12 foreach my $j ( 1 .. $jmax - 1 ) {
1010 3         9 $rfields->[$j] = EMPTY_STRING;
1011 3         7 $rfield_lengths->[$j] = 0;
1012 3         10 $rtokens->[ $j - 1 ] = EMPTY_STRING;
1013 3         10 $rpatterns->[ $j - 1 ] = EMPTY_STRING;
1014             }
1015 3         7 return 1;
1016             } ## end sub join_hanging_comment
1017              
1018             { ## closure for sub decide_if_list
1019              
1020             my %is_comma_token;
1021              
1022             BEGIN {
1023              
1024 38     38   249 my @q = qw( => );
1025 38         182 push @q, ',';
1026 38         9570 @is_comma_token{@q} = (1) x scalar(@q);
1027             } ## end BEGIN
1028              
1029             sub decide_if_list {
1030              
1031 1032     1032 0 1889 my $line = shift;
1032              
1033             # A list will be taken to be a line with a forced break in which all
1034             # of the field separators are commas or comma-arrows (except for the
1035             # trailing #)
1036              
1037 1032         2002 my $rtokens = $line->{'rtokens'};
1038 1032         2140 my $test_token = $rtokens->[0];
1039 1032         2753 my ( $raw_tok, $lev, $tag, $tok_count ) =
1040             decode_alignment_token($test_token);
1041 1032 100       3238 if ( $is_comma_token{$raw_tok} ) {
1042 930         1634 my $list_type = $test_token;
1043 930         1669 my $jmax = $line->{'jmax'};
1044              
1045 930         2752 foreach ( 1 .. $jmax - 2 ) {
1046 871         1768 ( $raw_tok, $lev, $tag, $tok_count ) =
1047             decode_alignment_token( $rtokens->[$_] );
1048 871 100       2516 if ( !$is_comma_token{$raw_tok} ) {
1049 26         65 $list_type = EMPTY_STRING;
1050 26         58 last;
1051             }
1052             }
1053 930         2081 $line->{'list_type'} = $list_type;
1054             }
1055 1032         2021 return;
1056             } ## end sub decide_if_list
1057             }
1058              
1059             sub fix_terminal_ternary {
1060              
1061             # Add empty fields as necessary to align a ternary term
1062             # like this:
1063             #
1064             # my $leapyear =
1065             # $year % 4 ? 0
1066             # : $year % 100 ? 1
1067             # : $year % 400 ? 0
1068             # : 1;
1069             #
1070             # returns the index of the terminal question token, if any
1071              
1072 13     13 0 61 my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
1073             $group_level )
1074             = @_;
1075              
1076 13 50       52 return unless ($old_line);
1077 38     38   346 use constant EXPLAIN_TERNARY => 0;
  38         113  
  38         53384  
1078              
1079 13 50       55 if (%valign_control_hash) {
1080 0         0 my $align_ok = $valign_control_hash{'?'};
1081 0 0       0 $align_ok = $valign_control_default unless defined($align_ok);
1082 0 0       0 return unless ($align_ok);
1083             }
1084              
1085 13         40 my $jmax = @{$rfields} - 1;
  13         58  
1086 13         51 my $rfields_old = $old_line->{'rfields'};
1087              
1088 13         38 my $rpatterns_old = $old_line->{'rpatterns'};
1089 13         34 my $rtokens_old = $old_line->{'rtokens'};
1090 13         27 my $maximum_field_index = $old_line->{'jmax'};
1091              
1092             # look for the question mark after the :
1093 13         51 my ($jquestion);
1094             my $depth_question;
1095 13         47 my $pad = EMPTY_STRING;
1096 13         37 my $pad_length = 0;
1097 13         51 foreach my $j ( 0 .. $maximum_field_index - 1 ) {
1098 14         44 my $tok = $rtokens_old->[$j];
1099 14         76 my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
1100 14 100       73 if ( $raw_tok eq '?' ) {
1101 13         36 $depth_question = $lev;
1102              
1103             # depth must be correct
1104 13 50       56 next unless ( $depth_question eq $group_level );
1105              
1106 13         33 $jquestion = $j;
1107 13 50       99 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
1108 13         74 $pad_length = length($1);
1109 13         64 $pad = SPACE x $pad_length;
1110             }
1111             else {
1112 0         0 return; # shouldn't happen
1113             }
1114 13         62 last;
1115             }
1116             }
1117 13 50       70 return unless ( defined($jquestion) ); # shouldn't happen
1118              
1119             # Now splice the tokens and patterns of the previous line
1120             # into the else line to insure a match. Add empty fields
1121             # as necessary.
1122 13         35 my $jadd = $jquestion;
1123              
1124             # Work on copies of the actual arrays in case we have
1125             # to return due to an error
1126 13         28 my @fields = @{$rfields};
  13         46  
1127 13         44 my @patterns = @{$rpatterns};
  13         47  
1128 13         32 my @tokens = @{$rtokens};
  13         35  
1129 13         34 my @field_lengths = @{$rfield_lengths};
  13         32  
1130              
1131 13         30 EXPLAIN_TERNARY && do {
1132             local $LIST_SEPARATOR = '><';
1133             print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
1134             print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
1135             print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
1136             print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
1137             print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
1138             print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
1139             };
1140              
1141             # handle cases of leading colon on this line
1142 13 50       96 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
1143              
1144 13         60 my ( $colon, $therest ) = ( $1, $2 );
1145              
1146             # Handle sub-case of first field with leading colon plus additional code
1147             # This is the usual situation as at the '1' below:
1148             # ...
1149             # : $year % 400 ? 0
1150             # : 1;
1151 13 50       54 if ($therest) {
1152              
1153             # Split the first field after the leading colon and insert padding.
1154             # Note that this padding will remain even if the terminal value goes
1155             # out on a separate line. This does not seem to look to bad, so no
1156             # mechanism has been included to undo it.
1157 13         38 my $field1 = shift @fields;
1158 13         36 my $field_length1 = shift @field_lengths;
1159 13         37 my $len_colon = length($colon);
1160 13         57 unshift @fields, ( $colon, $pad . $therest );
1161 13         50 unshift @field_lengths,
1162             ( $len_colon, $pad_length + $field_length1 - $len_colon );
1163              
1164             # change the leading pattern from : to ?
1165 13 50       100 return unless ( $patterns[0] =~ s/^\:/?/ );
1166              
1167             # install leading tokens and patterns of existing line
1168 13         53 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
  13         52  
1169 13         40 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  13         63  
1170              
1171             # insert appropriate number of empty fields
1172 13 100       55 splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1173 13 100       66 splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
1174             }
1175              
1176             # handle sub-case of first field just equal to leading colon.
1177             # This can happen for example in the example below where
1178             # the leading '(' would create a new alignment token
1179             # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
1180             # : ( $mname = $name . '->' );
1181             else {
1182              
1183 0 0 0     0 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
1184              
1185             # prepend a leading ? onto the second pattern
1186 0         0 $patterns[1] = "?b" . $patterns[1];
1187              
1188             # pad the second field
1189 0         0 $fields[1] = $pad . $fields[1];
1190 0         0 $field_lengths[1] = $pad_length + $field_lengths[1];
1191              
1192             # install leading tokens and patterns of existing line, replacing
1193             # leading token and inserting appropriate number of empty fields
1194 0         0 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
  0         0  
1195 0         0 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
  0         0  
1196 0 0       0 splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1197 0 0       0 splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
1198             }
1199             }
1200              
1201             # Handle case of no leading colon on this line. This will
1202             # be the case when -wba=':' is used. For example,
1203             # $year % 400 ? 0 :
1204             # 1;
1205             else {
1206              
1207             # install leading tokens and patterns of existing line
1208 0         0 $patterns[0] = '?' . 'b' . $patterns[0];
1209 0         0 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
  0         0  
1210 0         0 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  0         0  
1211              
1212             # insert appropriate number of empty fields
1213 0         0 $jadd = $jquestion + 1;
1214 0         0 $fields[0] = $pad . $fields[0];
1215 0         0 $field_lengths[0] = $pad_length + $field_lengths[0];
1216 0 0       0 splice( @fields, 0, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1217 0 0       0 splice( @field_lengths, 0, 0, (0) x $jadd ) if $jadd;
1218             }
1219              
1220 13         24 EXPLAIN_TERNARY && do {
1221             local $LIST_SEPARATOR = '><';
1222             print STDOUT "MODIFIED TOKENS=<@tokens>\n";
1223             print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
1224             print STDOUT "MODIFIED FIELDS=<@fields>\n";
1225             };
1226              
1227             # all ok .. update the arrays
1228 13         43 @{$rfields} = @fields;
  13         52  
1229 13         35 @{$rtokens} = @tokens;
  13         38  
1230 13         45 @{$rpatterns} = @patterns;
  13         43  
1231 13         33 @{$rfield_lengths} = @field_lengths;
  13         66  
1232              
1233             # force a flush after this line
1234 13         62 return $jquestion;
1235             } ## end sub fix_terminal_ternary
1236              
1237             sub fix_terminal_else {
1238              
1239             # Add empty fields as necessary to align a balanced terminal
1240             # else block to a previous if/elsif/unless block,
1241             # like this:
1242             #
1243             # if ( 1 || $x ) { print "ok 13\n"; }
1244             # else { print "not ok 13\n"; }
1245             #
1246             # returns a positive value if the else block should be indented
1247             #
1248 9     9 0 43 my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
1249              
1250 9 50       41 return unless ($old_line);
1251 9         25 my $jmax = @{$rfields} - 1;
  9         26  
1252 9 50       40 return unless ( $jmax > 0 );
1253              
1254 9 50       33 if (%valign_control_hash) {
1255 0         0 my $align_ok = $valign_control_hash{'{'};
1256 0 0       0 $align_ok = $valign_control_default unless defined($align_ok);
1257 0 0       0 return unless ($align_ok);
1258             }
1259              
1260             # check for balanced else block following if/elsif/unless
1261 9         28 my $rfields_old = $old_line->{'rfields'};
1262              
1263             # TBD: add handling for 'case'
1264 9 100       83 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
1265              
1266             # look for the opening brace after the else, and extract the depth
1267 7         26 my $tok_brace = $rtokens->[0];
1268 7         13 my $depth_brace;
1269 7 50       59 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
  7         30  
1270              
1271             # probably: "else # side_comment"
1272 0         0 else { return }
1273              
1274 7         23 my $rpatterns_old = $old_line->{'rpatterns'};
1275 7         26 my $rtokens_old = $old_line->{'rtokens'};
1276 7         17 my $maximum_field_index = $old_line->{'jmax'};
1277              
1278             # be sure the previous if/elsif is followed by an opening paren
1279 7         15 my $jparen = 0;
1280 7         22 my $tok_paren = '(' . $depth_brace;
1281 7         22 my $tok_test = $rtokens_old->[$jparen];
1282 7 50       37 return unless ( $tok_test eq $tok_paren ); # shouldn't happen
1283              
1284             # Now find the opening block brace
1285 7         15 my ($jbrace);
1286 7         33 foreach my $j ( 1 .. $maximum_field_index - 1 ) {
1287 8         19 my $tok = $rtokens_old->[$j];
1288 8 100       31 if ( $tok eq $tok_brace ) {
1289 7         16 $jbrace = $j;
1290 7         17 last;
1291             }
1292             }
1293 7 50       31 return unless ( defined($jbrace) ); # shouldn't happen
1294              
1295             # Now splice the tokens and patterns of the previous line
1296             # into the else line to insure a match. Add empty fields
1297             # as necessary.
1298 7         21 my $jadd = $jbrace - $jparen;
1299 7         19 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
  7         33  
  7         28  
1300 7         15 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
  7         22  
  7         23  
1301 7         15 splice( @{$rfields}, 1, 0, (EMPTY_STRING) x $jadd );
  7         28  
1302 7         27 splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
  7         25  
1303              
1304             # force a flush after this line if it does not follow a case
1305 7 50       29 if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
  0         0  
1306 7         33 else { return $jbrace }
1307             } ## end sub fix_terminal_else
1308              
1309             my %is_closing_block_type;
1310              
1311             BEGIN {
1312 38     38   257 my @q = qw< } ] >;
1313 38         1177 @is_closing_block_type{@q} = (1) x scalar(@q);
1314             }
1315              
1316             # This is a flag for testing alignment by sub sweep_left_to_right only.
1317             # This test can help find problems with the alignment logic.
1318             # This flag should normally be zero.
1319 38     38   326 use constant TEST_SWEEP_ONLY => 0;
  38         93  
  38         2732  
1320              
1321 38     38   280 use constant EXPLAIN_CHECK_MATCH => 0;
  38         82  
  38         5191  
1322              
1323             sub check_match {
1324              
1325             # See if the current line matches the current vertical alignment group.
1326              
1327 1137     1137 0 2686 my ( $self, $new_line, $base_line, $prev_line, $group_line_count ) = @_;
1328              
1329             # Given:
1330             # $new_line = the line being considered for group inclusion
1331             # $base_line = the first line of the current group
1332             # $prev_line = the line just before $new_line
1333             # $group_line_count = number of lines in the current group
1334              
1335             # returns a flag and a value as follows:
1336             # return (0, $imax_align) if the line does not match
1337             # return (1, $imax_align) if the line matches but does not fit
1338             # return (2, $imax_align) if the line matches and fits
1339              
1340 38     38   322 use constant NO_MATCH => 0;
  38         96  
  38         2434  
1341 38     38   298 use constant MATCH_NO_FIT => 1;
  38         89  
  38         2129  
1342 38     38   246 use constant MATCH_AND_FIT => 2;
  38         102  
  38         66261  
1343              
1344 1137         1732 my $return_value;
1345              
1346             # Returns '$imax_align' which is the index of the maximum matching token.
1347             # It will be used in the subsequent left-to-right sweep to align as many
1348             # tokens as possible for lines which partially match.
1349 1137         1910 my $imax_align = -1;
1350              
1351             # variable $GoToMsg explains reason for no match, for debugging
1352 1137         2033 my $GoToMsg = EMPTY_STRING;
1353              
1354 1137         2140 my $jmax = $new_line->{'jmax'};
1355 1137         2067 my $maximum_field_index = $base_line->{'jmax'};
1356              
1357 1137         1989 my $jlimit = $jmax - 2;
1358 1137 100       2846 if ( $jmax > $maximum_field_index ) {
1359 82         230 $jlimit = $maximum_field_index - 2;
1360             }
1361              
1362 1137 100       2477 if ( $new_line->{'is_hanging_side_comment'} ) {
1363              
1364             # HSC's can join the group if they fit
1365             }
1366              
1367             # Everything else
1368             else {
1369              
1370             # A group with hanging side comments ends with the first non hanging
1371             # side comment.
1372 1099 50       2430 if ( $base_line->{'is_hanging_side_comment'} ) {
1373 0         0 $GoToMsg = "end of hanging side comments";
1374 0         0 $return_value = NO_MATCH;
1375             }
1376             else {
1377              
1378             # The number of tokens that this line shares with the previous
1379             # line has been stored with the previous line. This value was
1380             # calculated and stored by sub 'match_line_pair'.
1381 1099         1972 $imax_align = $prev_line->{'imax_pair'};
1382              
1383             # Only the following ci sequences are accepted (issue c225):
1384             # 0 0 0 ... OK
1385             # 0 1 1 ... OK but marginal*
1386             # 1 1 1 ... OK
1387             # This check is rarely activated, but for example we want
1388             # to avoid something like this 'tail wag dog' situation:
1389             # $tag =~ s/\b([a-z]+)/\L\u$1/gio;
1390             # $tag =~ s/\b([b-df-hj-np-tv-z]+)\b/\U$1/gio
1391             # if $tag =~ /-/;
1392             # *Note: we could set a flag for the 0 1 marginal case and
1393             # use it to prevent alignment of selected token types.
1394 1099         1929 my $ci_prev = $prev_line->{'ci_level'};
1395 1099         1739 my $ci_new = $new_line->{'ci_level'};
1396 1099 50 100     4719 if ( $ci_prev != $ci_new
    100 33        
      66        
1397             && $imax_align >= 0
1398             && ( $ci_new == 0 || $group_line_count > 1 ) )
1399             {
1400 0         0 $imax_align = -1;
1401 0         0 $GoToMsg =
1402             "Rejected ci: ci_prev=$ci_prev ci_new=$ci_new num=$group_line_count\n";
1403 0         0 $return_value = NO_MATCH;
1404             }
1405             elsif ( $imax_align != $jlimit ) {
1406 27         173 $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
1407 27         74 $return_value = NO_MATCH;
1408             }
1409             }
1410             }
1411              
1412 1137 100       2684 if ( !defined($return_value) ) {
1413              
1414             # The tokens match, but the lines must have identical number of
1415             # tokens to join the group.
1416 1110 100 100     3927 if ( $maximum_field_index != $jmax ) {
    100          
1417 118         405 $GoToMsg = "token count differs";
1418 118         244 $return_value = NO_MATCH;
1419             }
1420              
1421             # The tokens match. Now See if there is space for this line in the
1422             # current group.
1423             elsif ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY )
1424             {
1425              
1426 979         3110 $GoToMsg = "match and fit, imax_align=$imax_align, jmax=$jmax\n";
1427 979         1776 $return_value = MATCH_AND_FIT;
1428 979         1873 $imax_align = $jlimit;
1429             }
1430             else {
1431 13         46 $GoToMsg = "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
1432 13         22 $return_value = MATCH_NO_FIT;
1433 13         24 $imax_align = $jlimit;
1434             }
1435             }
1436              
1437             EXPLAIN_CHECK_MATCH
1438 1137         1696 && print
1439             "returning $return_value because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
1440              
1441 1137         3132 return ( $return_value, $imax_align );
1442             } ## end sub check_match
1443              
1444             sub check_fit {
1445              
1446 992     992 0 2242 my ( $self, $new_line, $old_line ) = @_;
1447              
1448             # The new line has alignments identical to the current group. Now we have
1449             # to fit the new line into the group without causing a field to exceed the
1450             # line length limit.
1451             # return true if successful
1452             # return false if not successful
1453              
1454 992         1821 my $jmax = $new_line->{'jmax'};
1455 992         1793 my $leading_space_count = $new_line->{'leading_space_count'};
1456 992         1772 my $rfield_lengths = $new_line->{'rfield_lengths'};
1457 992         3654 my $padding_available = $old_line->get_available_space_on_right();
1458 992         1915 my $jmax_old = $old_line->{'jmax'};
1459              
1460             # Safety check ... only lines with equal array sizes should arrive here
1461             # from sub check_match. So if this error occurs, look at recent changes in
1462             # sub check_match. It is only supposed to check the fit of lines with
1463             # identical numbers of alignment tokens.
1464 992 50       2760 if ( $jmax_old ne $jmax ) {
1465              
1466 0         0 warning(<<EOM);
1467             Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
1468             unexpected difference in array lengths: $jmax != $jmax_old
1469             EOM
1470 0         0 return;
1471             }
1472              
1473             # Save current columns in case this line does not fit.
1474 992         1604 my @alignments = @{ $old_line->{'ralignments'} };
  992         2492  
1475 992         3282 foreach my $alignment (@alignments) {
1476 3441         7494 $alignment->save_column();
1477             }
1478              
1479             # Loop over all alignments ...
1480 992         3076 for my $j ( 0 .. $jmax ) {
1481              
1482 3425         8534 my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
1483              
1484 3425 100       6903 if ( $j == 0 ) {
1485 992         1640 $pad += $leading_space_count;
1486             }
1487              
1488             # Keep going if this field does not need any space.
1489 3425 100       6772 next if ( $pad < 0 );
1490              
1491             # Revert to the starting state if does not fit
1492 2366 100       5108 if ( $pad > $padding_available ) {
1493              
1494             #----------------------------------------------
1495             # Line does not fit -- revert to starting state
1496             #----------------------------------------------
1497 13         36 foreach my $alignment (@alignments) {
1498 39         87 $alignment->restore_column();
1499             }
1500 13         63 return;
1501             }
1502              
1503             # make room for this field
1504 2353         6501 $old_line->increase_field_width( $j, $pad );
1505 2353         4242 $padding_available -= $pad;
1506             }
1507              
1508             #-------------------------------------
1509             # The line fits, the match is accepted
1510             #-------------------------------------
1511 979         5094 return 1;
1512              
1513             } ## end sub check_fit
1514              
1515             sub install_new_alignments {
1516              
1517 2082     2082 0 4007 my ($new_line) = @_;
1518              
1519 2082         4159 my $jmax = $new_line->{'jmax'};
1520 2082         3587 my $rfield_lengths = $new_line->{'rfield_lengths'};
1521 2082         3635 my $col = $new_line->{'leading_space_count'};
1522              
1523 2082         3296 my @alignments;
1524 2082         5076 for my $j ( 0 .. $jmax ) {
1525 6953         10653 $col += $rfield_lengths->[$j];
1526              
1527             # create initial alignments for the new group
1528 6953         22357 my $alignment =
1529             Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
1530 6953         14678 push @alignments, $alignment;
1531             }
1532 2082         6013 $new_line->{'ralignments'} = \@alignments;
1533 2082         4868 return;
1534             } ## end sub install_new_alignments
1535              
1536             sub copy_old_alignments {
1537 979     979 0 2152 my ( $new_line, $old_line ) = @_;
1538 979         1640 my @new_alignments = @{ $old_line->{'ralignments'} };
  979         2814  
1539 979         2387 $new_line->{'ralignments'} = \@new_alignments;
1540 979         2345 return;
1541             } ## end sub copy_old_alignments
1542              
1543             sub dump_array {
1544              
1545             # debug routine to dump array contents
1546 0     0 0 0 local $LIST_SEPARATOR = ')(';
1547 0         0 print STDOUT "(@_)\n";
1548 0         0 return;
1549             } ## end sub dump_array
1550              
1551             sub level_change {
1552              
1553             # compute decrease in level when we remove $diff spaces from the
1554             # leading spaces
1555 10     10 0 27 my ( $self, $leading_space_count, $diff, $level ) = @_;
1556              
1557 10         18 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
1558 10 50       28 if ($rOpts_indent_columns) {
1559 10         26 my $olev =
1560             int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
1561 10         17 my $nlev = int( $leading_space_count / $rOpts_indent_columns );
1562 10         20 $level -= ( $olev - $nlev );
1563 10 50       25 if ( $level < 0 ) { $level = 0 }
  0         0  
1564             }
1565 10         20 return $level;
1566             } ## end sub level_change
1567              
1568             ###############################################
1569             # CODE SECTION 4: Code to process comment lines
1570             ###############################################
1571              
1572             sub _flush_comment_lines {
1573              
1574             # Output a group consisting of COMMENT lines
1575              
1576 531     531   1325 my ($self) = @_;
1577 531         1173 my $rgroup_lines = $self->[_rgroup_lines_];
1578 531 50       909 return unless ( @{$rgroup_lines} );
  531         1549  
1579 531         1035 my $group_level = $self->[_group_level_];
1580 531         1108 my $group_maximum_line_length = $self->[_group_maximum_line_length_];
1581 531         1104 my $leading_space_count = $self->[_comment_leading_space_count_];
1582              
1583             # look for excessively long lines
1584 531         999 my $max_excess = 0;
1585 531         1005 foreach my $item ( @{$rgroup_lines} ) {
  531         1361  
1586 608         1024 my ( $str, $str_len ) = @{$item};
  608         1596  
1587 608         1371 my $excess =
1588             $str_len + $leading_space_count - $group_maximum_line_length;
1589 608 100       2040 if ( $excess > $max_excess ) {
1590 38         101 $max_excess = $excess;
1591             }
1592             }
1593              
1594             # zero leading space count if any lines are too long
1595 531 100       1619 if ( $max_excess > 0 ) {
1596 36         91 $leading_space_count -= $max_excess;
1597 36 50       144 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
  36         68  
1598 36         74 my $file_writer_object = $self->[_file_writer_object_];
1599 36         173 my $last_outdented_line_at =
1600             $file_writer_object->get_output_line_number();
1601 36         78 my $nlines = @{$rgroup_lines};
  36         88  
1602 36         100 $self->[_last_outdented_line_at_] =
1603             $last_outdented_line_at + $nlines - 1;
1604 36         78 my $outdented_line_count = $self->[_outdented_line_count_];
1605 36 100       130 unless ($outdented_line_count) {
1606 18         54 $self->[_first_outdented_line_at_] = $last_outdented_line_at;
1607             }
1608 36         85 $outdented_line_count += $nlines;
1609 36         77 $self->[_outdented_line_count_] = $outdented_line_count;
1610             }
1611              
1612             # write the lines
1613 531         1055 my $outdent_long_lines = 0;
1614              
1615 531         972 foreach my $item ( @{$rgroup_lines} ) {
  531         1263  
1616 608         1051 my ( $str, $str_len, $Kend ) = @{$item};
  608         1500  
1617 608         7062 $self->valign_output_step_B(
1618             {
1619             leading_space_count => $leading_space_count,
1620             line => $str,
1621             line_length => $str_len,
1622             side_comment_length => 0,
1623             outdent_long_lines => $outdent_long_lines,
1624             rvertical_tightness_flags => undef,
1625             level => $group_level,
1626             level_end => $group_level,
1627             Kend => $Kend,
1628             maximum_line_length => $group_maximum_line_length,
1629             }
1630             );
1631             }
1632              
1633 531         2761 $self->initialize_for_new_group();
1634 531         1038 return;
1635             } ## end sub _flush_comment_lines
1636              
1637             ######################################################
1638             # CODE SECTION 5: Code to process groups of code lines
1639             ######################################################
1640              
1641             sub _flush_group_lines {
1642              
1643             # This is the vertical aligner internal flush, which leaves the cache
1644             # intact
1645 5567     5567   11193 my ( $self, $level_jump ) = @_;
1646              
1647             # $level_jump = $next_level-$group_level, if known
1648             # = undef if not known
1649             # Note: only the sign of the jump is needed
1650              
1651 5567         9998 my $rgroup_lines = $self->[_rgroup_lines_];
1652 5567 100       7996 return unless ( @{$rgroup_lines} );
  5567         15118  
1653 2234         4921 my $group_type = $self->[_group_type_];
1654 2234         4174 my $group_level = $self->[_group_level_];
1655              
1656             # Debug
1657 2234         3498 0 && do {
1658             my ( $a, $b, $c ) = caller();
1659             my $nlines = @{$rgroup_lines};
1660             print STDOUT
1661             "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
1662             };
1663              
1664             #-------------------------------------------
1665             # Section 1: Handle a group of COMMENT lines
1666             #-------------------------------------------
1667 2234 100       6187 if ( $group_type eq 'COMMENT' ) {
1668 531         2349 $self->_flush_comment_lines();
1669 531         1699 return;
1670             }
1671              
1672             #------------------------------------------------------------------------
1673             # Section 2: Handle line(s) of CODE. Most of the actual work of vertical
1674             # aligning happens here in the following steps:
1675             #------------------------------------------------------------------------
1676              
1677             # STEP 1: Remove most unmatched tokens. They block good alignments.
1678 1703         6053 my ( $max_lev_diff, $saw_side_comment ) =
1679             delete_unmatched_tokens( $rgroup_lines, $group_level );
1680              
1681             # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
1682             # matching common alignments. The indexes of these subgroups are in the
1683             # return variable.
1684 1703         6871 my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
1685              
1686             # STEP 3: Sweep left to right through the lines, looking for leading
1687             # alignment tokens shared by groups.
1688             sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
1689 1703 100       2794 if ( @{$rgroups} > 1 );
  1703         5895  
1690              
1691             # STEP 4: Move side comments to a common column if possible.
1692 1703 100       4323 if ($saw_side_comment) {
1693 199         980 $self->align_side_comments( $rgroup_lines, $rgroups );
1694             }
1695              
1696             # STEP 5: For the -lp option, increase the indentation of lists
1697             # to the desired amount, but do not exceed the line length limit.
1698              
1699             # We are allowed to shift a group of lines to the right if:
1700             # (1) its level is greater than the level of the previous group, and
1701             # (2) its level is greater than the level of the next line to be written.
1702              
1703 1703         2823 my $extra_indent_ok;
1704 1703 100       4934 if ( $group_level > $self->[_last_level_written_] ) {
1705              
1706             # Use the level jump to next line to come, if given
1707 851 100       2492 if ( defined($level_jump) ) {
1708 571         1431 $extra_indent_ok = $level_jump < 0;
1709             }
1710              
1711             # Otherwise, assume the next line has the level of the end of last line.
1712             # This fixes case c008.
1713             else {
1714 280         762 my $level_end = $rgroup_lines->[-1]->{'level_end'};
1715 280         753 $extra_indent_ok = $group_level > $level_end;
1716             }
1717             }
1718              
1719 1703 100       5844 my $extra_leading_spaces =
1720             $extra_indent_ok
1721             ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
1722             : 0;
1723              
1724             # STEP 6: Output the lines.
1725             # All lines in this group have the same leading spacing and maximum line
1726             # length
1727 1703         3353 my $group_leader_length = $rgroup_lines->[0]->{'leading_space_count'};
1728 1703         3195 my $group_maximum_line_length = $rgroup_lines->[0]->{'maximum_line_length'};
1729              
1730 1703         2761 foreach my $line ( @{$rgroup_lines} ) {
  1703         3658  
1731 3061         18581 $self->valign_output_step_A(
1732             {
1733             line => $line,
1734             min_ci_gap => 0,
1735             do_not_align => 0,
1736             group_leader_length => $group_leader_length,
1737             extra_leading_spaces => $extra_leading_spaces,
1738             level => $group_level,
1739             maximum_line_length => $group_maximum_line_length,
1740             }
1741             );
1742             }
1743              
1744             # Let the formatter know that this object has been processed and any
1745             # recoverable spaces have been handled. This is needed for setting the
1746             # closing paren location in -lp mode.
1747 1703         4336 my $object = $rgroup_lines->[0]->{'indentation'};
1748 1703 100       4729 if ( ref($object) ) { $object->set_recoverable_spaces(0) }
  92         387  
1749              
1750 1703         6252 $self->initialize_for_new_group();
1751 1703         3707 return;
1752             } ## end sub _flush_group_lines
1753              
1754             { ## closure for sub sweep_top_down
1755              
1756             my $rall_lines; # all of the lines
1757             my $grp_level; # level of all lines
1758             my $rgroups; # describes the partition of lines we will make here
1759             my $group_line_count; # number of lines in current partition
1760              
1761 38     38   75443 BEGIN { $rgroups = [] }
1762              
1763             sub initialize_for_new_rgroup {
1764 3785     3785 0 5906 $group_line_count = 0;
1765 3785         5717 return;
1766             }
1767              
1768             sub add_to_rgroup {
1769              
1770 3061     3061 0 5928 my ($jend) = @_;
1771 3061         5426 my $rline = $rall_lines->[$jend];
1772              
1773 3061         4535 my $jbeg = $jend;
1774 3061 100       6869 if ( $group_line_count == 0 ) {
1775 2082         5702 install_new_alignments($rline);
1776             }
1777             else {
1778 979         1596 my $rvals = pop @{$rgroups};
  979         1966  
1779 979         1932 $jbeg = $rvals->[0];
1780 979         2711 copy_old_alignments( $rline, $rall_lines->[$jbeg] );
1781             }
1782 3061         4937 push @{$rgroups}, [ $jbeg, $jend, undef ];
  3061         7468  
1783 3061         5359 $group_line_count++;
1784 3061         5154 return;
1785             } ## end sub add_to_rgroup
1786              
1787             sub get_rgroup_jrange {
1788              
1789 1286 50   1286 0 2067 return unless @{$rgroups};
  1286         3156  
1790 1286 50       3296 return unless ( $group_line_count > 0 );
1791 1286         2038 my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
  1286         2970  
1792 1286         3783 return ( $jbeg, $jend );
1793             } ## end sub get_rgroup_jrange
1794              
1795             sub end_rgroup {
1796              
1797 2101     2101 0 4219 my ($imax_align) = @_;
1798 2101 50       3112 return unless @{$rgroups};
  2101         5065  
1799 2101 100       4992 return unless ( $group_line_count > 0 );
1800              
1801 2082         3158 my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
  2082         3039  
  2082         4820  
1802 2082         3869 push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
  2082         5178  
1803              
1804             # Undo some alignments of poor two-line combinations.
1805             # We had to wait until now to know the line count.
1806 2082 100       6009 if ( $jend - $jbeg == 1 ) {
1807 254         796 my $line_0 = $rall_lines->[$jbeg];
1808 254         647 my $line_1 = $rall_lines->[$jend];
1809              
1810 254         715 my $imax_pair = $line_1->{'imax_pair'};
1811 254 50       875 if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
  0         0  
1812              
1813             ## flag for possible future use:
1814             ## my $is_isolated_pair = $imax_pair < 0
1815             ## && ( $jbeg == 0
1816             ## || $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} < 0 );
1817              
1818             my $imax_prev =
1819 254 100       870 $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} : -1;
1820              
1821 254         1246 my ( $is_marginal, $imax_align_fix ) =
1822             is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
1823             $imax_prev );
1824 254 100       938 if ($is_marginal) {
1825 14         67 combine_fields( $line_0, $line_1, $imax_align_fix );
1826             }
1827             }
1828              
1829 2082         5380 initialize_for_new_rgroup();
1830 2082         3439 return;
1831             } ## end sub end_rgroup
1832              
1833             sub block_penultimate_match {
1834              
1835             # emergency reset to prevent sweep_left_to_right from trying to match a
1836             # failed terminal else match
1837 1 50   1 0 3 return unless @{$rgroups} > 1;
  1         4  
1838 1         3 $rgroups->[-2]->[2] = -1;
1839 1         3 return;
1840             } ## end sub block_penultimate_match
1841              
1842             sub sweep_top_down {
1843 1703     1703 0 3905 my ( $self, $rlines, $group_level ) = @_;
1844              
1845             # Partition the set of lines into final alignment subgroups
1846             # and store the alignments with the lines.
1847              
1848             # The alignment subgroups we are making here are groups of consecutive
1849             # lines which have (1) identical alignment tokens and (2) do not
1850             # exceed the allowable maximum line length. A later sweep from
1851             # left-to-right ('sweep_lr') will handle additional alignments.
1852              
1853             # transfer args to closure variables
1854 1703         19824 $rall_lines = $rlines;
1855 1703         4243 $grp_level = $group_level;
1856 1703         5310 $rgroups = [];
1857 1703         5331 initialize_for_new_rgroup();
1858 1703 50       2636 return unless @{$rlines}; # shouldn't happen
  1703         5051  
1859              
1860             # Unset the _end_group flag for the last line if it it set because it
1861             # is not needed and can causes problems for -lp formatting
1862 1703         4100 $rall_lines->[-1]->{'end_group'} = 0;
1863              
1864             # Loop over all lines ...
1865 1703         3086 my $jline = -1;
1866 1703         2917 foreach my $new_line ( @{$rall_lines} ) {
  1703         4165  
1867 3061         4653 $jline++;
1868              
1869             # Start a new subgroup if necessary
1870 3061 100       6876 if ( !$group_line_count ) {
1871 1775         5251 add_to_rgroup($jline);
1872 1775 100       5097 if ( $new_line->{'end_group'} ) {
1873 22         95 end_rgroup(-1);
1874             }
1875 1775         3759 next;
1876             }
1877              
1878 1286         2736 my $j_terminal_match = $new_line->{'j_terminal_match'};
1879 1286         3634 my ( $jbeg, $jend ) = get_rgroup_jrange();
1880 1286 50       3348 if ( !defined($jbeg) ) {
1881              
1882             # safety check, shouldn't happen
1883 0         0 warning(<<EOM);
1884             Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
1885             undefined index for group line count $group_line_count
1886             EOM
1887 0         0 $jbeg = $jline;
1888             }
1889 1286         2402 my $base_line = $rall_lines->[$jbeg];
1890              
1891             # Initialize a global flag saying if the last line of the group
1892             # should match end of group and also terminate the group. There
1893             # should be no returns between here and where the flag is handled
1894             # at the bottom.
1895 1286         2038 my $col_matching_terminal = 0;
1896 1286 100       3048 if ( defined($j_terminal_match) ) {
1897              
1898             # remember the column of the terminal ? or { to match with
1899 19         97 $col_matching_terminal =
1900             $base_line->get_column($j_terminal_match);
1901              
1902             # Ignore an undefined value as a defensive step; shouldn't
1903             # normally happen.
1904 19 50       92 $col_matching_terminal = 0
1905             unless defined($col_matching_terminal);
1906             }
1907              
1908             # -------------------------------------------------------------
1909             # Allow hanging side comment to join current group, if any. The
1910             # only advantage is to keep the other tokens in the same group. For
1911             # example, this would make the '=' align here:
1912             # $ax = 1; # side comment
1913             # # hanging side comment
1914             # $boondoggle = 5; # side comment
1915             # $beetle = 5; # side comment
1916              
1917             # here is another example..
1918              
1919             # _rtoc_name_count => {}, # hash to track ..
1920             # _rpackage_stack => [], # stack to check ..
1921             # # name changes
1922             # _rlast_level => \$last_level, # brace indentation
1923             #
1924             #
1925             # If this were not desired, the next step could be skipped.
1926             # -------------------------------------------------------------
1927 1286 100       4378 if ( $new_line->{'is_hanging_side_comment'} ) {
    100          
1928 38         147 join_hanging_comment( $new_line, $base_line );
1929             }
1930              
1931             # If this line has no matching tokens, then flush out the lines
1932             # BEFORE this line unless both it and the previous line have side
1933             # comments. This prevents this line from pushing side comments out
1934             # to the right.
1935             elsif ( $new_line->{'jmax'} == 1 ) {
1936              
1937             # There are no matching tokens, so now check side comments.
1938             # Programming note: accessing arrays with index -1 is
1939             # risky in Perl, but we have verified there is at least one
1940             # line in the group and that there is at least one field.
1941             my $prev_comment =
1942 194         674 $rall_lines->[ $jline - 1 ]->{'rfields'}->[-1];
1943 194         472 my $side_comment = $new_line->{'rfields'}->[-1];
1944 194 100 100     1022 end_rgroup(-1) unless ( $side_comment && $prev_comment );
1945             }
1946              
1947             # See if the new line matches and fits the current group,
1948             # if it still exists. Flush the current group if not.
1949 1286         2195 my $match_code;
1950 1286 100       3044 if ($group_line_count) {
1951 1137         4874 ( $match_code, my $imax_align ) =
1952             $self->check_match( $new_line, $base_line,
1953             $rall_lines->[ $jline - 1 ],
1954             $group_line_count );
1955 1137 100       3161 if ( $match_code != 2 ) { end_rgroup($imax_align) }
  158         471  
1956             }
1957              
1958             # Store the new line
1959 1286         3500 add_to_rgroup($jline);
1960              
1961 1286 100       5815 if ( defined($j_terminal_match) ) {
    100          
1962              
1963             # Decide if we should fix a terminal match. We can either:
1964             # 1. fix it and prevent the sweep_lr from changing it, or
1965             # 2. leave it alone and let sweep_lr try to fix it.
1966              
1967             # The current logic is to fix it if:
1968             # -it has not joined to previous lines,
1969             # -and either the previous subgroup has just 1 line, or
1970             # -this line matched but did not fit (so sweep won't work)
1971 19         69 my $fixit;
1972 19 100       88 if ( $group_line_count == 1 ) {
1973 3   66     26 $fixit ||= $match_code;
1974 3 100       13 if ( !$fixit ) {
1975 2 50       7 if ( @{$rgroups} > 1 ) {
  2         8  
1976 2         4 my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
  2         8  
1977 2         8 my $nlines = $jendx - $jbegx + 1;
1978 2   66     13 $fixit ||= $nlines <= 1;
1979             }
1980             }
1981             }
1982              
1983 19 100       84 if ($fixit) {
1984 2         6 $base_line = $new_line;
1985 2         8 my $col_now = $base_line->get_column($j_terminal_match);
1986              
1987             # Ignore an undefined value as a defensive step; shouldn't
1988             # normally happen.
1989 2 50       12 $col_now = 0 unless defined($col_now);
1990              
1991 2         8 my $pad = $col_matching_terminal - $col_now;
1992 2         11 my $padding_available =
1993             $base_line->get_available_space_on_right();
1994 2 100 33     25 if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
      66        
1995 1         6 $base_line->increase_field_width( $j_terminal_match,
1996             $pad );
1997             }
1998              
1999             # do not let sweep_left_to_right change an isolated 'else'
2000 2 100       11 if ( !$new_line->{'is_terminal_ternary'} ) {
2001 1         3 block_penultimate_match();
2002             }
2003             }
2004 19         92 end_rgroup(-1);
2005             }
2006              
2007             # end the group if we know we cannot match next line.
2008             elsif ( $new_line->{'end_group'} ) {
2009 50         271 end_rgroup(-1);
2010             }
2011             } ## end loop over lines
2012              
2013 1703         6045 end_rgroup(-1);
2014 1703         3787 return ($rgroups);
2015             } ## end sub sweep_top_down
2016             }
2017              
2018             sub two_line_pad {
2019              
2020 18     18 0 66 my ( $line_m, $line, $imax_min ) = @_;
2021              
2022             # Given:
2023             # two isolated (list) lines
2024             # imax_min = number of common alignment tokens
2025             # Return:
2026             # $pad_max = maximum suggested pad distance
2027             # = 0 if alignment not recommended
2028             # Note that this is only for two lines which do not have alignment tokens
2029             # in common with any other lines. It is intended for lists, but it might
2030             # also be used for two non-list lines with a common leading '='.
2031              
2032             # Allow alignment if the difference in the two unpadded line lengths
2033             # is not more than either line length. The idea is to avoid
2034             # aligning lines with very different field lengths, like these two:
2035              
2036             # [
2037             # 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
2038             # 1, 0, 0, 0, undef, 0, 0
2039             # ];
2040 18         55 my $rfield_lengths = $line->{'rfield_lengths'};
2041 18         45 my $rfield_lengths_m = $line_m->{'rfield_lengths'};
2042              
2043             # Safety check - shouldn't happen
2044             return 0
2045 18 50 33     36 unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
  18         94  
  18         78  
2046              
2047 18         49 my $lensum_m = 0;
2048 18         43 my $lensum = 0;
2049 18         61 foreach my $i ( 0 .. $imax_min ) {
2050 49         87 $lensum_m += $rfield_lengths_m->[$i];
2051 49         95 $lensum += $rfield_lengths->[$i];
2052             }
2053              
2054 18 100       121 my ( $lenmin, $lenmax ) =
2055             $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
2056              
2057 18         34 my $patterns_match;
2058 18 50 66     132 if ( $line_m->{'list_type'} && $line->{'list_type'} ) {
2059 16         41 $patterns_match = 1;
2060 16         41 my $rpatterns_m = $line_m->{'rpatterns'};
2061 16         45 my $rpatterns = $line->{'rpatterns'};
2062 16         51 foreach my $i ( 0 .. $imax_min ) {
2063 46         83 my $pat = $rpatterns->[$i];
2064 46         84 my $pat_m = $rpatterns_m->[$i];
2065 46 100       158 if ( $pat ne $pat_m ) { $patterns_match = 0; last }
  2         5  
  2         7  
2066             }
2067             }
2068              
2069 18         100 my $pad_max = $lenmax;
2070 18 50 66     114 if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
  0         0  
2071              
2072 18         55 return $pad_max;
2073             } ## end sub two_line_pad
2074              
2075             sub sweep_left_to_right {
2076              
2077 255     255 0 903 my ( $rlines, $rgroups, $group_level ) = @_;
2078              
2079             # So far we have divided the lines into groups having an equal number of
2080             # identical alignments. Here we are going to look for common leading
2081             # alignments between the different groups and align them when possible.
2082             # For example, the three lines below are in three groups because each line
2083             # has a different number of commas. In this routine we will sweep from
2084             # left to right, aligning the leading commas as we go, but stopping if we
2085             # hit the line length limit.
2086              
2087             # my ( $num, $numi, $numj, $xyza, $ka, $xyzb, $kb, $aff, $error );
2088             # my ( $i, $j, $error, $aff, $asum, $avec );
2089             # my ( $km, $area, $varea );
2090              
2091             # nothing to do if just one group
2092 255         1451 my $ng_max = @{$rgroups} - 1;
  255         683  
2093 255 50       809 return unless ( $ng_max > 0 );
2094              
2095             #---------------------------------------------------------------------
2096             # Step 1: Loop over groups to find all common leading alignment tokens
2097             #---------------------------------------------------------------------
2098              
2099 255         3750 my $line;
2100             my $rtokens;
2101 255         0 my $imax; # index of maximum non-side-comment alignment token
2102 255         0 my $istop; # an optional stopping index
2103 255         0 my $jbeg; # starting line index
2104 255         0 my $jend; # ending line index
2105              
2106 255         0 my $line_m;
2107 255         0 my $rtokens_m;
2108 255         0 my $imax_m;
2109 255         0 my $istop_m;
2110 255         0 my $jbeg_m;
2111 255         0 my $jend_m;
2112              
2113 255         0 my $istop_mm;
2114              
2115             # Look at neighboring pairs of groups and form a simple list
2116             # of all common leading alignment tokens. Foreach such match we
2117             # store [$i, $ng], where
2118             # $i = index of the token in the line (0,1,...)
2119             # $ng is the second of the two groups with this common token
2120 255         0 my @icommon;
2121              
2122             # Hash to hold the maximum alignment change for any group
2123 255         0 my %max_move;
2124              
2125             # a small number of columns
2126 255         466 my $short_pad = 4;
2127              
2128 255         526 my $ng = -1;
2129 255         481 foreach my $item ( @{$rgroups} ) {
  255         701  
2130 634         898 $ng++;
2131              
2132 634         1080 $istop_mm = $istop_m;
2133              
2134             # save _m values of previous group
2135 634         957 $line_m = $line;
2136 634         909 $rtokens_m = $rtokens;
2137 634         959 $imax_m = $imax;
2138 634         998 $istop_m = $istop;
2139 634         973 $jbeg_m = $jbeg;
2140 634         1008 $jend_m = $jend;
2141              
2142             # Get values for this group. Note that we just have to use values for
2143             # one of the lines of the group since all members have the same
2144             # alignments.
2145 634         971 ( $jbeg, $jend, $istop ) = @{$item};
  634         1302  
2146              
2147 634         1163 $line = $rlines->[$jbeg];
2148 634         1169 $rtokens = $line->{'rtokens'};
2149 634         1108 $imax = $line->{'jmax'} - 2;
2150 634 50       1429 $istop = -1 unless ( defined($istop) );
2151 634 50       1388 $istop = $imax if ( $istop > $imax );
2152              
2153             # Initialize on first group
2154 634 100       1487 next if ( $ng == 0 );
2155              
2156             # Use the minimum index limit of the two groups
2157 379 100       1426 my $imax_min = $imax > $imax_m ? $imax_m : $imax;
2158              
2159             # Also impose a limit if given.
2160 379 100       1086 if ( $istop_m < $imax_min ) {
2161 51         157 $imax_min = $istop_m;
2162             }
2163              
2164             # Special treatment of two one-line groups isolated from other lines,
2165             # unless they form a simple list or a terminal match. Otherwise the
2166             # alignment can look strange in some cases.
2167 379         910 my $list_type = $rlines->[$jbeg]->{'list_type'};
2168 379 100 100     4542 if (
      100        
      100        
      100        
      100        
      100        
      100        
      100        
2169             $jend == $jbeg
2170             && $jend_m == $jbeg_m
2171             && ( $ng == 1 || $istop_mm < 0 )
2172             && ( $ng == $ng_max || $istop < 0 )
2173             && !$line->{'j_terminal_match'}
2174              
2175             # Only do this for imperfect matches. This is normally true except
2176             # when two perfect matches cannot form a group because the line
2177             # length limit would be exceeded. In that case we can still try
2178             # to match as many alignments as possible.
2179             && ( $imax != $imax_m || $istop_m != $imax_m )
2180             )
2181             {
2182              
2183             # We will just align assignments and simple lists
2184 73 100       329 next unless ( $imax_min >= 0 );
2185             next
2186 21 100 100     354 unless ( $rtokens->[0] =~ /^=\d/
2187             || $list_type );
2188              
2189             # In this case we will limit padding to a short distance. This
2190             # is a compromise to keep some vertical alignment but prevent large
2191             # gaps, which do not look good for just two lines.
2192 18         97 my $pad_max =
2193             two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
2194 18 50       96 next unless ($pad_max);
2195 18         59 my $ng_m = $ng - 1;
2196 18         64 $max_move{"$ng_m"} = $pad_max;
2197 18         68 $max_move{"$ng"} = $pad_max;
2198             }
2199              
2200             # Loop to find all common leading tokens.
2201 324 100       1237 if ( $imax_min >= 0 ) {
2202 78         240 foreach my $i ( 0 .. $imax_min ) {
2203 144         278 my $tok = $rtokens->[$i];
2204 144         285 my $tok_m = $rtokens_m->[$i];
2205 144 50       395 last if ( $tok ne $tok_m );
2206 144         496 push @icommon, [ $i, $ng, $tok ];
2207             }
2208             }
2209             }
2210 255 100       1215 return unless @icommon;
2211              
2212             #----------------------------------------------------------
2213             # Step 2: Reorder and consolidate the list into a task list
2214             #----------------------------------------------------------
2215              
2216             # We have to work first from lowest token index to highest, then by group,
2217             # sort our list first on token index then group number
2218 64 50       416 @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
  160         464  
2219              
2220             # Make a task list of the form
2221             # [$i, ng_beg, $ng_end, $tok], ..
2222             # where
2223             # $i is the index of the token to be aligned
2224             # $ng_beg..$ng_end is the group range for this action
2225 64         146 my @todo;
2226 64         200 my ( $i, $ng_end, $tok );
2227 64         181 foreach my $item (@icommon) {
2228 144         261 my $ng_last = $ng_end;
2229 144         234 my $i_last = $i;
2230 144         239 ( $i, $ng_end, $tok ) = @{$item};
  144         329  
2231 144         272 my $ng_beg = $ng_end - 1;
2232 144 100 100     687 if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
      66        
2233 29         55 my $var = pop(@todo);
2234 29         60 $ng_beg = $var->[1];
2235             }
2236 144         347 my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
2237 144         612 push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
2238             }
2239              
2240             #------------------------------
2241             # Step 3: Execute the task list
2242             #------------------------------
2243 64         478 do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
2244             $group_level );
2245 64         290 return;
2246             } ## end sub sweep_left_to_right
2247              
2248             { ## closure for sub do_left_to_right_sweep
2249              
2250             my %is_good_alignment_token;
2251              
2252             BEGIN {
2253              
2254             # One of the most difficult aspects of vertical alignment is knowing
2255             # when not to align. Alignment can go from looking very nice to very
2256             # bad when overdone. In the sweep algorithm there are two special
2257             # cases where we may need to limit padding to a '$short_pad' distance
2258             # to avoid some very ugly formatting:
2259              
2260             # 1. Two isolated lines with partial alignment
2261             # 2. A 'tail-wag-dog' situation, in which a single terminal
2262             # line with partial alignment could cause a significant pad
2263             # increase in many previous lines if allowed to join the alignment.
2264              
2265             # For most alignment tokens, we will allow only a small pad to be
2266             # introduced (the hardwired $short_pad variable) . But for some 'good'
2267             # alignments we can be less restrictive.
2268              
2269             # These are 'good' alignments, which are allowed more padding:
2270 38     38   268 my @q = qw(
2271             => = ? if unless or || {
2272             );
2273 38         139 push @q, ',';
2274 38         266 @is_good_alignment_token{@q} = (0) x scalar(@q);
2275              
2276             # Promote a few of these to 'best', with essentially no pad limit:
2277 38         115 $is_good_alignment_token{'='} = 1;
2278 38         108 $is_good_alignment_token{'if'} = 1;
2279 38         98 $is_good_alignment_token{'unless'} = 1;
2280 38         32846 $is_good_alignment_token{'=>'} = 1;
2281              
2282             # Note the hash values are set so that:
2283             # if ($is_good_alignment_token{$raw_tok}) => best
2284             # if defined ($is_good_alignment_token{$raw_tok}) => good or best
2285              
2286             } ## end BEGIN
2287              
2288             sub move_to_common_column {
2289              
2290             # This is a sub called by sub do_left_to_right_sweep to
2291             # move the alignment column of token $itok to $col_want for a
2292             # sequence of groups.
2293 118     118 0 393 my ( $rlines, $rgroups, $rmax_move, $ngb, $nge, $itok, $col_want,
2294             $raw_tok )
2295             = @_;
2296 118 100 66     535 return unless ( defined($ngb) && $nge > $ngb );
2297 108         307 foreach my $ng ( $ngb .. $nge ) {
2298              
2299 242         362 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
  242         552  
2300 242         414 my $line = $rlines->[$jbeg];
2301 242         589 my $col = $line->get_column($itok);
2302 242         492 my $move = $col_want - $col;
2303 242 100       846 if ( $move > 0 ) {
    50          
2304              
2305             # limit padding increase in isolated two lines
2306             next
2307             if ( defined( $rmax_move->{$ng} )
2308             && $move > $rmax_move->{$ng}
2309 77 50 66     428 && !$is_good_alignment_token{$raw_tok} );
      33        
2310              
2311 77         256 $line->increase_field_width( $itok, $move );
2312             }
2313             elsif ( $move < 0 ) {
2314              
2315             # spot to take special action on failure to move
2316             }
2317             }
2318 108         379 return;
2319             } ## end sub move_to_common_column
2320              
2321             sub do_left_to_right_sweep {
2322 64     64 0 260 my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
2323             = @_;
2324              
2325             # $blocking_level[$nj is the level at a match failure between groups
2326             # $ng-1 and $ng
2327 64         120 my @blocking_level;
2328 64         193 my $group_list_type = $rlines->[0]->{'list_type'};
2329              
2330 64         134 foreach my $task ( @{$rtodo} ) {
  64         204  
2331 115         195 my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
  115         325  
2332              
2333             # Nothing to do for a single group
2334 115 50       365 next unless ( $ng_end > $ng_beg );
2335              
2336 115         333 my $ng_first; # index of the first group of a continuous sequence
2337             my $col_want; # the common alignment column of a sequence of groups
2338 115         0 my $col_limit; # maximum column before bumping into max line length
2339 115         216 my $line_count_ng_m = 0;
2340 115         224 my $jmax_m;
2341             my $it_stop_m;
2342              
2343             # Loop over the groups
2344             # 'ix_' = index in the array of lines
2345             # 'ng_' = index in the array of groups
2346             # 'it_' = index in the array of tokens
2347 115         216 my $ix_min = $rgroups->[$ng_beg]->[0];
2348 115         217 my $ix_max = $rgroups->[$ng_end]->[1];
2349 115         234 my $lines_total = $ix_max - $ix_min + 1;
2350 115         273 foreach my $ng ( $ng_beg .. $ng_end ) {
2351 259         409 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
  259         593  
2352 259         466 my $line_count_ng = $ix_end - $ix_beg + 1;
2353              
2354             # Important: note that since all lines in a group have a common
2355             # alignments object, we just have to work on one of the lines
2356             # (the first line). All of the rest will be changed
2357             # automatically.
2358 259         429 my $line = $rlines->[$ix_beg];
2359 259         418 my $jmax = $line->{'jmax'};
2360              
2361             # the maximum space without exceeding the line length:
2362 259         692 my $avail = $line->get_available_space_on_right();
2363 259         646 my $col = $line->get_column($itok);
2364 259         528 my $col_max = $col + $avail;
2365              
2366             # Initialize on first group
2367 259 100       707 if ( !defined($col_want) ) {
2368 115         196 $ng_first = $ng;
2369 115         198 $col_want = $col;
2370 115         186 $col_limit = $col_max;
2371 115         210 $line_count_ng_m = $line_count_ng;
2372 115         201 $jmax_m = $jmax;
2373 115         188 $it_stop_m = $it_stop;
2374 115         261 next;
2375             }
2376              
2377             # RULE: Throw a blocking flag upon encountering a token level
2378             # different from the level of the first blocking token. For
2379             # example, in the following example, if the = matches get
2380             # blocked between two groups as shown, then we want to start
2381             # blocking matches at the commas, which are at deeper level, so
2382             # that we do not get the big gaps shown here:
2383              
2384             # my $unknown3 = pack( "v", -2 );
2385             # my $unknown4 = pack( "v", 0x09 );
2386             # my $unknown5 = pack( "VVV", 0x06, 0x00, 0x00 );
2387             # my $num_bbd_blocks = pack( "V", $num_lists );
2388             # my $root_startblock = pack( "V", $root_start );
2389             # my $unknown6 = pack( "VV", 0x00, 0x1000 );
2390              
2391             # On the other hand, it is okay to keep matching at the same
2392             # level such as in a simple list of commas and/or fat commas.
2393              
2394 144   66     575 my $is_blocked = defined( $blocking_level[$ng] )
2395             && $lev > $blocking_level[$ng];
2396              
2397             # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
2398             # Do not let one or two lines with a **different number of
2399             # alignments** open up a big gap in a large block. For
2400             # example, we will prevent something like this, where the first
2401             # line pries open the rest:
2402              
2403             # $worksheet->write( "B7", "http://www.perl.com", undef, $format );
2404             # $worksheet->write( "C7", "", $format );
2405             # $worksheet->write( "D7", "", $format );
2406             # $worksheet->write( "D8", "", $format );
2407             # $worksheet->write( "D8", "", $format );
2408              
2409             # We should exclude from consideration two groups which are
2410             # effectively the same but separated because one does not
2411             # fit in the maximum allowed line length.
2412 144   100     511 my $is_same_group =
2413             $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
2414              
2415 144         281 my $lines_above = $ix_beg - $ix_min;
2416 144         247 my $lines_below = $lines_total - $lines_above;
2417              
2418             # Increase the tolerable gap for certain favorable factors
2419 144         256 my $factor = 1;
2420 144         270 my $top_level = $lev == $group_level;
2421              
2422             # Align best top level alignment tokens like '=', 'if', ...
2423             # A factor of 10 allows a gap of up to 40 spaces
2424 144 100 100     1197 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
    100 66        
      66        
2425 31         72 $factor = 10;
2426             }
2427              
2428             # Otherwise allow some minimal padding of good alignments
2429             elsif (
2430              
2431             defined( $is_good_alignment_token{$raw_tok} )
2432              
2433             # We have to be careful if there are just 2 lines. This
2434             # two-line factor allows large gaps only for 2 lines which
2435             # are simple lists with fewer items on the second line. It
2436             # gives results similar to previous versions of perltidy.
2437             && ( $lines_total > 2
2438             || $group_list_type && $jmax < $jmax_m && $top_level )
2439             )
2440             {
2441 102         205 $factor += 1;
2442 102 100       257 if ($top_level) {
2443 66         115 $factor += 1;
2444             }
2445             }
2446              
2447 144         268 my $is_big_gap;
2448 144 100       373 if ( !$is_same_group ) {
2449 118   66     957 $is_big_gap ||=
      33        
2450             ( $lines_above == 1
2451             || $lines_above == 2 && $lines_below >= 4 )
2452             && $col_want > $col + $short_pad * $factor;
2453 118   66     918 $is_big_gap ||=
      33        
2454             ( $lines_below == 1
2455             || $lines_below == 2 && $lines_above >= 4 )
2456             && $col > $col_want + $short_pad * $factor;
2457             }
2458              
2459             # if match is limited by gap size, stop aligning at this level
2460 144 50       363 if ($is_big_gap) {
2461 0         0 $blocking_level[$ng] = $lev - 1;
2462             }
2463              
2464             # quit and restart if it cannot join this batch
2465 144 50 100     1065 if ( $col_want > $col_max
      66        
      66        
2466             || $col > $col_limit
2467             || $is_big_gap
2468             || $is_blocked )
2469             {
2470              
2471             # remember the level of the first blocking token
2472 10 100       33 if ( !defined( $blocking_level[$ng] ) ) {
2473 9         26 $blocking_level[$ng] = $lev;
2474             }
2475              
2476             move_to_common_column(
2477 10         40 $rlines, $rgroups, $rmax_move, $ng_first,
2478             $ng - 1, $itok, $col_want, $raw_tok
2479             );
2480 10         17 $ng_first = $ng;
2481 10         22 $col_want = $col;
2482 10         17 $col_limit = $col_max;
2483 10         17 $line_count_ng_m = $line_count_ng;
2484 10         22 $jmax_m = $jmax;
2485 10         28 $it_stop_m = $it_stop;
2486 10         25 next;
2487             }
2488              
2489 134         259 $line_count_ng_m += $line_count_ng;
2490              
2491             # update the common column and limit
2492 134 100       330 if ( $col > $col_want ) { $col_want = $col }
  42         88  
2493 134 100       418 if ( $col_max < $col_limit ) { $col_limit = $col_max }
  35         100  
2494              
2495             } ## end loop over groups
2496              
2497 115 100       366 if ( $ng_end > $ng_first ) {
2498 108         380 move_to_common_column(
2499             $rlines, $rgroups, $rmax_move, $ng_first,
2500             $ng_end, $itok, $col_want, $raw_tok
2501             );
2502             } ## end loop over groups for one task
2503             } ## end loop over tasks
2504              
2505 64         208 return;
2506             } ## end sub do_left_to_right_sweep
2507             }
2508              
2509             sub delete_selected_tokens {
2510              
2511 469     469 0 1134 my ( $line_obj, $ridel ) = @_;
2512              
2513             # $line_obj is the line to be modified
2514             # $ridel is a ref to list of indexes to be deleted
2515              
2516             # remove an unused alignment token(s) to improve alignment chances
2517              
2518 469 50 33     2206 return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
  469   33     1504  
2519              
2520 469         1139 my $jmax_old = $line_obj->{'jmax'};
2521 469         997 my $rfields_old = $line_obj->{'rfields'};
2522 469         930 my $rfield_lengths_old = $line_obj->{'rfield_lengths'};
2523 469         930 my $rpatterns_old = $line_obj->{'rpatterns'};
2524 469         866 my $rtokens_old = $line_obj->{'rtokens'};
2525 469         877 my $j_terminal_match = $line_obj->{'j_terminal_match'};
2526              
2527 38     38   377 use constant EXPLAIN_DELETE_SELECTED => 0;
  38         89  
  38         33580  
2528              
2529 469         1158 local $LIST_SEPARATOR = '> <';
2530 469         730 EXPLAIN_DELETE_SELECTED && print <<EOM;
2531             delete indexes: <@{$ridel}>
2532             old jmax: $jmax_old
2533             old tokens: <@{$rtokens_old}>
2534             old patterns: <@{$rpatterns_old}>
2535             old fields: <@{$rfields_old}>
2536             old field_lengths: <@{$rfield_lengths_old}>
2537             EOM
2538              
2539 469         964 my $rfields_new = [];
2540 469         1007 my $rpatterns_new = [];
2541 469         959 my $rtokens_new = [];
2542 469         1008 my $rfield_lengths_new = [];
2543              
2544             # Convert deletion list to a hash to allow any order, multiple entries,
2545             # and avoid problems with index values out of range
2546 469         816 my %delete_me;
2547 469         792 @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
  469         1666  
  469         1102  
2548              
2549 469         1192 my $pattern_0 = $rpatterns_old->[0];
2550 469         1059 my $field_0 = $rfields_old->[0];
2551 469         897 my $field_length_0 = $rfield_lengths_old->[0];
2552 469         1698 push @{$rfields_new}, $field_0;
  469         1146  
2553 469         778 push @{$rfield_lengths_new}, $field_length_0;
  469         985  
2554 469         770 push @{$rpatterns_new}, $pattern_0;
  469         1100  
2555              
2556             # Loop to either copy items or concatenate fields and patterns
2557 469         814 my $jmin_del;
2558 469         1342 foreach my $j ( 0 .. $jmax_old - 1 ) {
2559 1515         2566 my $token = $rtokens_old->[$j];
2560 1515         2780 my $field = $rfields_old->[ $j + 1 ];
2561 1515         2299 my $field_length = $rfield_lengths_old->[ $j + 1 ];
2562 1515         2535 my $pattern = $rpatterns_old->[ $j + 1 ];
2563 1515 100       3447 if ( !$delete_me{$j} ) {
2564 743         1373 push @{$rtokens_new}, $token;
  743         1723  
2565 743         1252 push @{$rfields_new}, $field;
  743         1444  
2566 743         1212 push @{$rpatterns_new}, $pattern;
  743         1407  
2567 743         1142 push @{$rfield_lengths_new}, $field_length;
  743         1893  
2568             }
2569             else {
2570 772 100       2033 if ( !defined($jmin_del) ) { $jmin_del = $j }
  469         860  
2571 772         1799 $rfields_new->[-1] .= $field;
2572 772         1303 $rfield_lengths_new->[-1] += $field_length;
2573 772         1708 $rpatterns_new->[-1] .= $pattern;
2574             }
2575             }
2576              
2577             # ----- x ------ x ------ x ------
2578             #t 0 1 2 <- token indexing
2579             #f 0 1 2 3 <- field and pattern
2580              
2581 469         930 my $jmax_new = @{$rfields_new} - 1;
  469         1252  
2582 469         1011 $line_obj->{'rtokens'} = $rtokens_new;
2583 469         962 $line_obj->{'rpatterns'} = $rpatterns_new;
2584 469         880 $line_obj->{'rfields'} = $rfields_new;
2585 469         871 $line_obj->{'rfield_lengths'} = $rfield_lengths_new;
2586 469         851 $line_obj->{'jmax'} = $jmax_new;
2587              
2588             # The value of j_terminal_match will be incorrect if we delete tokens prior
2589             # to it. We will have to give up on aligning the terminal tokens if this
2590             # happens.
2591 469 100 100     1470 if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
2592 1         4 $line_obj->{'j_terminal_match'} = undef;
2593             }
2594              
2595             # update list type -
2596 469 100       2030 if ( $line_obj->{'list_seqno'} ) {
2597              
2598             ## This works, but for efficiency see if we need to make a change:
2599             ## decide_if_list($line_obj);
2600              
2601             # An existing list will still be a list but with possibly different
2602             # leading token
2603 76         193 my $old_list_type = $line_obj->{'list_type'};
2604 76         193 my $new_list_type = EMPTY_STRING;
2605 76 100       525 if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
2606 49         129 $new_list_type = $rtokens_new->[0];
2607             }
2608 76 100 100     409 if ( !$old_list_type || $old_list_type ne $new_list_type ) {
2609 44         160 decide_if_list($line_obj);
2610             }
2611             }
2612              
2613 469         760 EXPLAIN_DELETE_SELECTED && print <<EOM;
2614              
2615             new jmax: $jmax_new
2616             new tokens: <@{$rtokens_new}>
2617             new patterns: <@{$rpatterns_new}>
2618             new fields: <@{$rfields_new}>
2619             EOM
2620 469         2758 return;
2621             } ## end sub delete_selected_tokens
2622              
2623             { ## closure for sub decode_alignment_token
2624              
2625             # This routine is called repeatedly for each token, so it needs to be
2626             # efficient. We can speed things up by remembering the inputs and outputs
2627             # in a hash.
2628             my %decoded_token;
2629              
2630             sub initialize_decode {
2631              
2632             # We will re-initialize the hash for each file. Otherwise, there is
2633             # a danger that the hash can become arbitrarily large if a very large
2634             # number of files is processed at once.
2635 555     555 0 4154 %decoded_token = ();
2636 555         1215 return;
2637             } ## end sub initialize_decode
2638              
2639             sub decode_alignment_token {
2640              
2641             # Unpack the values packed in an alignment token
2642             #
2643             # Usage:
2644             # my ( $raw_tok, $lev, $tag, $tok_count ) =
2645             # decode_alignment_token($token);
2646              
2647             # Alignment tokens have a trailing decimal level and optional tag (for
2648             # commas):
2649             # For example, the first comma in the following line
2650             # sub banner { crlf; report( shift, '/', shift ); crlf }
2651             # is decorated as follows:
2652             # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6)
2653              
2654             # An optional token count may be appended with a leading dot.
2655             # Currently this is only done for '=' tokens but this could change.
2656             # For example, consider the following line:
2657             # $nport = $port = shift || $name;
2658             # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
2659             # The second '=' will be '=0.2' [level 0, second equals]
2660 9326     9326 0 16539 my ($tok) = @_;
2661              
2662 9326 100       19763 if ( defined( $decoded_token{$tok} ) ) {
2663 7893         11304 return @{ $decoded_token{$tok} };
  7893         31406  
2664             }
2665              
2666 1433         3667 my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, EMPTY_STRING, 1 );
2667 1433 100       9205 if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
2668 1129         3295 $raw_tok = $1;
2669 1129         2176 $lev = $2;
2670 1129 100       3337 $tag = $3 if ($3);
2671 1129 100       3215 $tok_count = $5 if ($5);
2672             }
2673 1433         5051 my @vals = ( $raw_tok, $lev, $tag, $tok_count );
2674 1433         4252 $decoded_token{$tok} = \@vals;
2675 1433         6338 return @vals;
2676             } ## end sub decode_alignment_token
2677             }
2678              
2679             { ## closure for sub delete_unmatched_tokens
2680              
2681             my %is_assignment;
2682             my %keep_after_deleted_assignment;
2683              
2684             BEGIN {
2685 38     38   262 my @q;
2686              
2687 38         239 @q = qw(
2688             = **= += *= &= <<= &&=
2689             -= /= |= >>= ||= //=
2690             .= %= ^=
2691             x=
2692             );
2693 38         643 @is_assignment{@q} = (1) x scalar(@q);
2694              
2695             # These tokens may be kept following an = deletion
2696 38         206 @q = qw(
2697             if unless or ||
2698             );
2699 38         84392 @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
2700              
2701             } ## end BEGIN
2702              
2703             sub delete_unmatched_tokens {
2704 1703     1703 0 4131 my ( $rlines, $group_level ) = @_;
2705              
2706             # This is a important first step in vertical alignment in which
2707             # we remove as many obviously un-needed alignment tokens as possible.
2708             # This will prevent them from interfering with the final alignment.
2709              
2710             # Returns:
2711 1703         2932 my $max_lev_diff = 0; # used to avoid a call to prune_tree
2712 1703         2719 my $saw_side_comment = 0; # used to avoid a call for side comments
2713              
2714             # Handle no lines -- shouldn't happen
2715 1703 50       2642 return unless @{$rlines};
  1703         4265  
2716              
2717             # Handle a single line
2718 1703 100       2840 if ( @{$rlines} == 1 ) {
  1703         4566  
2719 1121         2481 my $line = $rlines->[0];
2720 1121         2365 my $jmax = $line->{'jmax'};
2721 1121         2397 my $length = $line->{'rfield_lengths'}->[$jmax];
2722 1121         2401 $saw_side_comment = $length > 0;
2723 1121         4755 return ( $max_lev_diff, $saw_side_comment );
2724             }
2725              
2726             # ignore hanging side comments in these operations
2727 582         1532 my @filtered = grep { !$_->{'is_hanging_side_comment'} } @{$rlines};
  1940         6065  
  582         1673  
2728 582         1662 my $rnew_lines = \@filtered;
2729              
2730 582         1137 $saw_side_comment = @filtered != @{$rlines};
  582         1403  
2731 582         2106 $max_lev_diff = 0;
2732              
2733             # nothing to do if all lines were hanging side comments
2734 582         1032 my $jmax = @{$rnew_lines} - 1;
  582         1314  
2735 582 100       1837 return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
2736              
2737             #----------------------------------------------------
2738             # Create a hash of alignment token info for each line
2739             #----------------------------------------------------
2740 581         2053 ( my $rline_hashes, my $requals_info, $saw_side_comment, $max_lev_diff )
2741             = make_alignment_info( $group_level, $rnew_lines, $saw_side_comment );
2742              
2743             #------------------------------------------------------------
2744             # Find independent subgroups of lines. Neighboring subgroups
2745             # do not have a common alignment token.
2746             #------------------------------------------------------------
2747 581         1287 my @subgroups;
2748 581         1545 push @subgroups, [ 0, $jmax ];
2749 581         1805 foreach my $jl ( 0 .. $jmax - 1 ) {
2750 1313 100       3690 if ( $rnew_lines->[$jl]->{'end_group'} ) {
2751 72         183 $subgroups[-1]->[1] = $jl;
2752 72         249 push @subgroups, [ $jl + 1, $jmax ];
2753             }
2754             }
2755              
2756             #-----------------------------------------------------------
2757             # PASS 1 over subgroups to remove unmatched alignment tokens
2758             #-----------------------------------------------------------
2759             delete_unmatched_tokens_main_loop(
2760 581         2932 $group_level, $rnew_lines, \@subgroups,
2761             $rline_hashes, $requals_info
2762             );
2763              
2764             #----------------------------------------------------------------
2765             # PASS 2: Construct a tree of matched lines and delete some small
2766             # deeper levels of tokens. They also block good alignments.
2767             #----------------------------------------------------------------
2768 581 100       2565 prune_alignment_tree($rnew_lines) if ($max_lev_diff);
2769              
2770             #--------------------------------------------
2771             # PASS 3: compare all lines for common tokens
2772             #--------------------------------------------
2773 581         2763 match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
2774              
2775 581         5982 return ( $max_lev_diff, $saw_side_comment );
2776             } ## end sub delete_unmatched_tokens
2777              
2778             sub make_alignment_info {
2779              
2780 581     581 0 1696 my ( $group_level, $rnew_lines, $saw_side_comment ) = @_;
2781              
2782             #------------------------------------------------------------
2783             # Loop to create a hash of alignment token info for each line
2784             #------------------------------------------------------------
2785 581         1290 my $rline_hashes = [];
2786 581         1159 my @equals_info;
2787             my @line_info; # no longer used
2788 581         1017 my $jmax = @{$rnew_lines} - 1;
  581         1385  
2789 581         1142 my $max_lev_diff = 0;
2790 581         1216 foreach my $line ( @{$rnew_lines} ) {
  581         1691  
2791 1894         3517 my $rhash = {};
2792 1894         3695 my $rtokens = $line->{'rtokens'};
2793 1894         3263 my $rpatterns = $line->{'rpatterns'};
2794 1894         2860 my $i = 0;
2795 1894         4584 my ( $i_eq, $tok_eq, $pat_eq );
2796 1894         0 my ( $lev_min, $lev_max );
2797 1894         2682 foreach my $tok ( @{$rtokens} ) {
  1894         3650  
2798 5158         9527 my ( $raw_tok, $lev, $tag, $tok_count ) =
2799             decode_alignment_token($tok);
2800              
2801 5158 100       11492 if ( $tok ne '#' ) {
2802 3264 100       6209 if ( !defined($lev_min) ) {
2803 1775         2917 $lev_min = $lev;
2804 1775         2884 $lev_max = $lev;
2805             }
2806             else {
2807 1489 100       3922 if ( $lev < $lev_min ) { $lev_min = $lev }
  75         204  
2808 1489 100       3095 if ( $lev > $lev_max ) { $lev_max = $lev }
  256         512  
2809             }
2810             }
2811             else {
2812 1894 100       4800 if ( !$saw_side_comment ) {
2813 1705         3838 my $length = $line->{'rfield_lengths'}->[ $i + 1 ];
2814 1705   66     5413 $saw_side_comment ||= $length;
2815             }
2816             }
2817              
2818             # Possible future upgrade: for multiple matches,
2819             # record [$i1, $i2, ..] instead of $i
2820 5158         16800 $rhash->{$tok} =
2821             [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
2822              
2823             # remember the first equals at line level
2824 5158 100 100     16945 if ( !defined($i_eq) && $raw_tok eq '=' ) {
2825              
2826 516 100       1453 if ( $lev eq $group_level ) {
2827 405         689 $i_eq = $i;
2828 405         734 $tok_eq = $tok;
2829 405         898 $pat_eq = $rpatterns->[$i];
2830             }
2831             }
2832 5158         9293 $i++;
2833             }
2834 1894         3156 push @{$rline_hashes}, $rhash;
  1894         3751  
2835 1894         5902 push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
2836 1894         5943 push @line_info, [ $lev_min, $lev_max ];
2837 1894 100       4344 if ( defined($lev_min) ) {
2838 1775         3164 my $lev_diff = $lev_max - $lev_min;
2839 1775 100       4843 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
  160         430  
2840             }
2841             }
2842              
2843             #----------------------------------------------------
2844             # Loop to compare each line pair and remember matches
2845             #----------------------------------------------------
2846 581         1822 my $rtok_hash = {};
2847 581         1310 my $nr = 0;
2848 581         1988 foreach my $jl ( 0 .. $jmax - 1 ) {
2849 1313         2184 my $nl = $nr;
2850 1313         2035 $nr = 0;
2851 1313         2188 my $jr = $jl + 1;
2852 1313         2295 my $rhash_l = $rline_hashes->[$jl];
2853 1313         2133 my $rhash_r = $rline_hashes->[$jr];
2854 1313         2824 foreach my $tok ( keys %{$rhash_l} ) {
  1313         4630  
2855 3146 100       6770 if ( defined( $rhash_r->{$tok} ) ) {
2856 2662         4128 my $il = $rhash_l->{$tok}->[0];
2857 2662         4055 my $ir = $rhash_r->{$tok}->[0];
2858 2662         4083 $rhash_l->{$tok}->[2] = $ir;
2859 2662         3841 $rhash_r->{$tok}->[1] = $il;
2860 2662 100       6027 if ( $tok ne '#' ) {
2861 1349         2040 push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
  1349         3684  
2862 1349         2610 $nr++;
2863             }
2864             }
2865             }
2866              
2867             # Set a line break if no matching tokens between these lines
2868             # (this is not strictly necessary now but does not hurt)
2869 1313 100 100     5045 if ( $nr == 0 && $nl > 0 ) {
2870 36         128 $rnew_lines->[$jl]->{'end_group'} = 1;
2871             }
2872              
2873             # Also set a line break if both lines have simple equals but with
2874             # different leading characters in patterns. This check is similar
2875             # to one in sub check_match, and will prevent sub
2876             # prune_alignment_tree from removing alignments which otherwise
2877             # should be kept. This fix is rarely needed, but it can
2878             # occasionally improve formatting.
2879             # For example:
2880             # my $name = $this->{Name};
2881             # $type = $this->ctype($genlooptype) if defined $genlooptype;
2882             # my $declini = ( $asgnonly ? "" : "\t$type *" );
2883             # my $cast = ( $type ? "($type *)" : "" );
2884             # The last two lines start with 'my' and will not match the
2885             # previous line starting with $type, so we do not want
2886             # prune_alignment tree to delete their ? : alignments at a deeper
2887             # level.
2888 1313         2501 my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
  1313         3242  
2889 1313         2069 my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
  1313         2716  
2890 1313 100 100     5147 if ( defined($i_eq_l) && defined($i_eq_r) ) {
2891              
2892             # Also, do not align equals across a change in ci level
2893             my $ci_jump = $rnew_lines->[$jl]->{'ci_level'} !=
2894 199         596 $rnew_lines->[$jr]->{'ci_level'};
2895              
2896 199 100 66     2180 if (
      66        
      100        
      100        
2897             $tok_eq_l eq $tok_eq_r
2898             && $i_eq_l == 0
2899             && $i_eq_r == 0
2900             && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
2901             || $ci_jump )
2902             )
2903             {
2904 12         50 $rnew_lines->[$jl]->{'end_group'} = 1;
2905             }
2906             }
2907             }
2908 581         4272 return ( $rline_hashes, \@equals_info, $saw_side_comment,
2909             $max_lev_diff );
2910             } ## end sub make_alignment_info
2911              
2912             sub delete_unmatched_tokens_main_loop {
2913              
2914             my (
2915 581     581 0 1809 $group_level, $rnew_lines, $rsubgroups,
2916             $rline_hashes, $requals_info
2917             ) = @_;
2918              
2919             #--------------------------------------------------------------
2920             # Main loop over subgroups to remove unmatched alignment tokens
2921             #--------------------------------------------------------------
2922              
2923             # flag to allow skipping pass 2 - not currently used
2924 581         1017 my $saw_large_group;
2925              
2926 581         1458 my $has_terminal_match = $rnew_lines->[-1]->{'j_terminal_match'};
2927              
2928 581         1129 foreach my $item ( @{$rsubgroups} ) {
  581         1545  
2929 653         1189 my ( $jbeg, $jend ) = @{$item};
  653         1649  
2930              
2931 653         1580 my $nlines = $jend - $jbeg + 1;
2932              
2933             #---------------------------------------------------
2934             # Look for complete if/elsif/else and ternary blocks
2935             #---------------------------------------------------
2936              
2937             # We are looking for a common '$dividing_token' like these:
2938              
2939             # if ( $b and $s ) { $p->{'type'} = 'a'; }
2940             # elsif ($b) { $p->{'type'} = 'b'; }
2941             # elsif ($s) { $p->{'type'} = 's'; }
2942             # else { $p->{'type'} = ''; }
2943             # ^----------- dividing_token
2944              
2945             # my $severity =
2946             # !$routine ? '[PFX]'
2947             # : $routine =~ /warn.*_d\z/ ? '[DS]'
2948             # : $routine =~ /ck_warn/ ? 'W'
2949             # : $routine =~ /ckWARN\d*reg_d/ ? 'S'
2950             # : $routine =~ /ckWARN\d*reg/ ? 'W'
2951             # : $routine =~ /vWARN\d/ ? '[WDS]'
2952             # : '[PFX]';
2953             # ^----------- dividing_token
2954              
2955             # Only look for groups which are more than 2 lines long. Two lines
2956             # can get messed up doing this, probably due to the various
2957             # two-line rules.
2958              
2959 653         1433 my $dividing_token;
2960             my %token_line_count;
2961 653 100       1986 if ( $nlines > 2 ) {
2962              
2963 301         951 foreach my $jj ( $jbeg .. $jend ) {
2964 1281         2521 my %seen;
2965 1281         2105 my $line = $rnew_lines->[$jj];
2966 1281         2033 my $rtokens = $line->{'rtokens'};
2967 1281         1782 foreach my $tok ( @{$rtokens} ) {
  1281         2299  
2968 3581 100       6791 if ( !$seen{$tok} ) {
2969 3065         4671 $seen{$tok}++;
2970 3065         6084 $token_line_count{$tok}++;
2971             }
2972             }
2973             }
2974              
2975 301         1623 foreach my $tok ( keys %token_line_count ) {
2976 927 100       3364 if ( $token_line_count{$tok} == $nlines ) {
2977 562 100 100     3209 if ( substr( $tok, 0, 1 ) eq '?'
      100        
2978             || substr( $tok, 0, 1 ) eq '{'
2979             && $tok =~ /^\{\d+if/ )
2980             {
2981 21         67 $dividing_token = $tok;
2982 21         67 last;
2983             }
2984             }
2985             }
2986             }
2987              
2988             #-------------------------------------------------------------
2989             # Loop over subgroup lines to remove unwanted alignment tokens
2990             #-------------------------------------------------------------
2991 653         2046 foreach my $jj ( $jbeg .. $jend ) {
2992 1894         3313 my $line = $rnew_lines->[$jj];
2993 1894         3078 my $rtokens = $line->{'rtokens'};
2994 1894         2956 my $rhash = $rline_hashes->[$jj];
2995 1894         3055 my $i_eq = $requals_info->[$jj]->[0];
2996 1894         2735 my @idel;
2997 1894         2765 my $imax = @{$rtokens} - 2;
  1894         3464  
2998 1894         2993 my $delete_above_level;
2999             my $deleted_assignment_token;
3000              
3001 1894         2936 my $saw_dividing_token = EMPTY_STRING;
3002 1894   100     8694 $saw_large_group ||= $nlines > 2 && $imax > 1;
      100        
3003              
3004             # Loop over all alignment tokens
3005 1894         3736 foreach my $i ( 0 .. $imax ) {
3006 3264         5309 my $tok = $rtokens->[$i];
3007 3264 50       6396 next if ( $tok eq '#' ); # shouldn't happen
3008             my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
3009 3264         4467 @{ $rhash->{$tok} };
  3264         8270  
3010              
3011             #------------------------------------------------------
3012             # Here is the basic RULE: remove an unmatched alignment
3013             # which does not occur in the surrounding lines.
3014             #------------------------------------------------------
3015 3264   100     8742 my $delete_me = !defined($il) && !defined($ir);
3016              
3017             # Apply any user controls. Note that not all lines pass
3018             # this way so they have to be applied elsewhere too.
3019 3264         4576 my $align_ok = 1;
3020 3264 100       6246 if (%valign_control_hash) {
3021 31         48 $align_ok = $valign_control_hash{$raw_tok};
3022 31 100       73 $align_ok = $valign_control_default
3023             unless defined($align_ok);
3024 31   100     90 $delete_me ||= !$align_ok;
3025             }
3026              
3027             # But now we modify this with exceptions...
3028              
3029             # EXCEPTION 1: If we are in a complete ternary or
3030             # if/elsif/else group, and this token is not on every line
3031             # of the group, should we delete it to preserve overall
3032             # alignment?
3033 3264 100       6051 if ($dividing_token) {
3034 147 100       300 if ( $token_line_count{$tok} >= $nlines ) {
3035 120   100     419 $saw_dividing_token ||= $tok eq $dividing_token;
3036             }
3037             else {
3038              
3039             # For shorter runs, delete toks to save alignment.
3040             # For longer runs, keep toks after the '{' or '?'
3041             # to allow sub-alignments within braces. The
3042             # number 5 lines is arbitrary but seems to work ok.
3043 27   66     96 $delete_me ||=
      100        
3044             ( $nlines < 5 || !$saw_dividing_token );
3045             }
3046             }
3047              
3048             # EXCEPTION 2: Remove all tokens above a certain level
3049             # following a previous deletion. For example, we have to
3050             # remove tagged higher level alignment tokens following a
3051             # '=>' deletion because the tags of higher level tokens
3052             # will now be incorrect. For example, this will prevent
3053             # aligning commas as follows after deleting the second '=>'
3054             # $w->insert(
3055             # ListBox => origin => [ 270, 160 ],
3056             # size => [ 200, 55 ],
3057             # );
3058 3264 100       6093 if ( defined($delete_above_level) ) {
3059 280 100       1016 if ( $lev > $delete_above_level ) {
3060 132   100     411 $delete_me ||= 1;
3061             }
3062 148         380 else { $delete_above_level = undef }
3063             }
3064              
3065             # EXCEPTION 3: Remove all but certain tokens after an
3066             # assignment deletion.
3067 3264 100 100     6173 if (
      100        
3068             $deleted_assignment_token
3069             && ( $lev > $group_level
3070             || !$keep_after_deleted_assignment{$raw_tok} )
3071             )
3072             {
3073 41   100     127 $delete_me ||= 1;
3074             }
3075              
3076             # EXCEPTION 4: Do not touch the first line of a 2 line
3077             # terminal match, such as below, because j_terminal has
3078             # already been set.
3079             # if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
3080             # else { $tago = $tagc = ''; }
3081             # But see snippets 'else1.t' and 'else2.t'
3082 3264 100 100     8242 $delete_me = 0
      100        
3083             if ( $jj == $jbeg
3084             && $has_terminal_match
3085             && $nlines == 2 );
3086              
3087             # EXCEPTION 5: misc additional rules for commas and equals
3088 3264 100 100     7671 if ( $delete_me && $tok_count == 1 ) {
3089              
3090             # okay to delete second and higher copies of a token
3091              
3092             # for a comma...
3093 721 100       1879 if ( $raw_tok eq ',' ) {
3094              
3095             # Do not delete commas before an equals
3096 262 100 100     980 $delete_me = 0
3097             if ( defined($i_eq) && $i < $i_eq );
3098              
3099             # Do not delete line-level commas
3100 262 100       696 $delete_me = 0 if ( $lev <= $group_level );
3101             }
3102              
3103             # For an assignment at group level..
3104 721 100 100     2661 if ( $is_assignment{$raw_tok}
3105             && $lev == $group_level )
3106             {
3107              
3108             # Do not delete if it is the last alignment of
3109             # multiple tokens; this will prevent some
3110             # undesirable alignments
3111 106 100 100     653 if ( $imax > 0 && $i == $imax ) {
3112 12         36 $delete_me = 0;
3113             }
3114              
3115             # Otherwise, set a flag to delete most
3116             # remaining tokens
3117 94         237 else { $deleted_assignment_token = $raw_tok }
3118             }
3119             }
3120              
3121             # Do not let a user exclusion be reactivated by above rules
3122 3264   66     10020 $delete_me ||= !$align_ok;
3123              
3124             #------------------------------------
3125             # Add this token to the deletion list
3126             #------------------------------------
3127 3264 100       7088 if ($delete_me) {
3128 661         1226 push @idel, $i;
3129              
3130             # update deletion propagation flags
3131 661 100 66     2171 if ( !defined($delete_above_level)
3132             || $lev < $delete_above_level )
3133             {
3134              
3135             # delete all following higher level alignments
3136 529         953 $delete_above_level = $lev;
3137              
3138             # but keep deleting after => to next lower level
3139             # to avoid some bizarre alignments
3140 529 100       1622 if ( $raw_tok eq '=>' ) {
3141 53         145 $delete_above_level = $lev - 1;
3142             }
3143             }
3144             }
3145             } # End loop over alignment tokens
3146              
3147             # Process all deletion requests for this line
3148 1894 100       5858 if (@idel) {
3149 413         1759 delete_selected_tokens( $line, \@idel );
3150             }
3151             } # End loop over lines
3152             } ## end main loop over subgroups
3153              
3154 581         1560 return;
3155             } ## end sub delete_unmatched_tokens_main_loop
3156             }
3157              
3158             sub match_line_pairs {
3159 581     581 0 1845 my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
3160              
3161             # Compare each pair of lines and save information about common matches
3162             # $rlines = list of lines including hanging side comments
3163             # $rnew_lines = list of lines without any hanging side comments
3164             # $rsubgroups = list of subgroups of the new lines
3165              
3166             # TODO:
3167             # Maybe change: imax_pair => pair_match_info = ref to array
3168             # = [$imax_align, $rMsg, ... ]
3169             # This may eventually have multi-level match info
3170              
3171             # Previous line vars
3172 581         2294 my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
3173             $list_type_m, $ci_level_m );
3174              
3175             # Current line vars
3176 581         0 my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
3177             $ci_level );
3178              
3179             # loop over subgroups
3180 581         1008 foreach my $item ( @{$rsubgroups} ) {
  581         1432  
3181 653         1103 my ( $jbeg, $jend ) = @{$item};
  653         1502  
3182 653         1744 my $nlines = $jend - $jbeg + 1;
3183 653 100       2001 next unless ( $nlines > 1 );
3184              
3185             # loop over lines in a subgroup
3186 562         1591 foreach my $jj ( $jbeg .. $jend ) {
3187              
3188 1803         2762 $line_m = $line;
3189 1803         2595 $rtokens_m = $rtokens;
3190 1803         2469 $rpatterns_m = $rpatterns;
3191 1803         2622 $rfield_lengths_m = $rfield_lengths;
3192 1803         2619 $imax_m = $imax;
3193 1803         2728 $list_type_m = $list_type;
3194 1803         2560 $ci_level_m = $ci_level;
3195              
3196 1803         2969 $line = $rnew_lines->[$jj];
3197 1803         2952 $rtokens = $line->{'rtokens'};
3198 1803         2841 $rpatterns = $line->{'rpatterns'};
3199 1803         2836 $rfield_lengths = $line->{'rfield_lengths'};
3200 1803         2475 $imax = @{$rtokens} - 2;
  1803         2817  
3201 1803         3075 $list_type = $line->{'list_type'};
3202 1803         2852 $ci_level = $line->{'ci_level'};
3203              
3204             # nothing to do for first line
3205 1803 100       4159 next if ( $jj == $jbeg );
3206              
3207 1241         2979 my $ci_jump = $ci_level - $ci_level_m;
3208              
3209 1241 100       3221 my $imax_min = $imax_m < $imax ? $imax_m : $imax;
3210              
3211 1241         2092 my $imax_align = -1;
3212              
3213             # find number of leading common tokens
3214              
3215             #---------------------------------
3216             # No match to hanging side comment
3217             #---------------------------------
3218 1241 50 100     4876 if ( $line->{'is_hanging_side_comment'} ) {
    100          
3219              
3220             # Should not get here; HSC's have been filtered out
3221 0         0 $imax_align = -1;
3222             }
3223              
3224             #-----------------------------
3225             # Handle comma-separated lists
3226             #-----------------------------
3227             elsif ( $list_type && $list_type eq $list_type_m ) {
3228              
3229             # do not align lists across a ci jump with new list method
3230 488 50       1178 if ($ci_jump) { $imax_min = -1 }
  0         0  
3231              
3232 488         912 my $i_nomatch = $imax_min + 1;
3233 488         1051 foreach my $i ( 0 .. $imax_min ) {
3234 883         1487 my $tok = $rtokens->[$i];
3235 883         1364 my $tok_m = $rtokens_m->[$i];
3236 883 50       2099 if ( $tok ne $tok_m ) {
3237 0         0 $i_nomatch = $i;
3238 0         0 last;
3239             }
3240             }
3241              
3242 488         904 $imax_align = $i_nomatch - 1;
3243             }
3244              
3245             #-----------------
3246             # Handle non-lists
3247             #-----------------
3248             else {
3249 753         1531 my $i_nomatch = $imax_min + 1;
3250 753         1551 foreach my $i ( 0 .. $imax_min ) {
3251 739         1492 my $tok = $rtokens->[$i];
3252 739         1235 my $tok_m = $rtokens_m->[$i];
3253 739 100       1700 if ( $tok ne $tok_m ) {
3254 19         41 $i_nomatch = $i;
3255 19         57 last;
3256             }
3257              
3258 720         1374 my $pat = $rpatterns->[$i];
3259 720         1165 my $pat_m = $rpatterns_m->[$i];
3260              
3261             # If patterns don't match, we have to be careful...
3262 720 100       1803 if ( $pat_m ne $pat ) {
3263 166         395 my $pad =
3264             $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
3265 166         538 my ( $match_code, $rmsg ) =
3266             compare_patterns( $group_level,
3267             $tok, $tok_m, $pat, $pat_m, $pad );
3268 166 100       553 if ($match_code) {
3269 8 100       35 if ( $match_code == 1 ) { $i_nomatch = $i }
  7 50       18  
3270 1         2 elsif ( $match_code == 2 ) { $i_nomatch = 0 }
3271 8         26 last;
3272             }
3273             }
3274             }
3275 753         1274 $imax_align = $i_nomatch - 1;
3276             }
3277              
3278 1241         2689 $line_m->{'imax_pair'} = $imax_align;
3279              
3280             } ## end loop over lines
3281              
3282             # Put fence at end of subgroup
3283 562         1986 $line->{'imax_pair'} = -1;
3284              
3285             } ## end loop over subgroups
3286              
3287             # if there are hanging side comments, propagate the pair info down to them
3288             # so that lines can just look back one line for their pair info.
3289 581 100       1157 if ( @{$rlines} > @{$rnew_lines} ) {
  581         1191  
  581         1818  
3290 24         65 my $last_pair_info = -1;
3291 24         45 foreach my $line ( @{$rlines} ) {
  24         61  
3292 95 100       219 if ( $line->{'is_hanging_side_comment'} ) {
3293 39         88 $line->{'imax_pair'} = $last_pair_info;
3294             }
3295             else {
3296 56         110 $last_pair_info = $line->{'imax_pair'};
3297             }
3298             }
3299             }
3300 581         1395 return;
3301             } ## end sub match_line_pairs
3302              
3303             sub compare_patterns {
3304              
3305 166     166 0 540 my ( $group_level, $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
3306              
3307             # helper routine for sub match_line_pairs to decide if patterns in two
3308             # lines match well enough..Given
3309             # $tok_m, $pat_m = token and pattern of first line
3310             # $tok, $pat = token and pattern of second line
3311             # $pad = 0 if no padding is needed, !=0 otherwise
3312             # return code:
3313             # 0 = patterns match, continue
3314             # 1 = no match
3315             # 2 = no match, and lines do not match at all
3316              
3317 166         331 my $GoToMsg = EMPTY_STRING;
3318 166         285 my $return_code = 0;
3319              
3320 38     38   440 use constant EXPLAIN_COMPARE_PATTERNS => 0;
  38         133  
  38         46736  
3321              
3322 166         359 my ( $alignment_token, $lev, $tag, $tok_count ) =
3323             decode_alignment_token($tok);
3324              
3325             # We have to be very careful about aligning commas
3326             # when the pattern's don't match, because it can be
3327             # worse to create an alignment where none is needed
3328             # than to omit one. Here's an example where the ','s
3329             # are not in named containers. The first line below
3330             # should not match the next two:
3331             # ( $a, $b ) = ( $b, $r );
3332             # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
3333             # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
3334 166 100       857 if ( $alignment_token eq ',' ) {
    100          
    100          
3335              
3336             # do not align commas unless they are in named
3337             # containers
3338 26 100       129 if ( $tok !~ /[A-Za-z]/ ) {
3339 3         8 $return_code = 1;
3340 3         7 $GoToMsg = "do not align commas in unnamed containers";
3341             }
3342             else {
3343 23         51 $return_code = 0;
3344             }
3345             }
3346              
3347             # do not align parens unless patterns match;
3348             # large ugly spaces can occur in math expressions.
3349             elsif ( $alignment_token eq '(' ) {
3350              
3351             # But we can allow a match if the parens don't
3352             # require any padding.
3353 4 50       17 if ( $pad != 0 ) {
3354 4         8 $return_code = 1;
3355 4         11 $GoToMsg = "do not align '(' unless patterns match or pad=0";
3356             }
3357             else {
3358 0         0 $return_code = 0;
3359             }
3360             }
3361              
3362             # Handle an '=' alignment with different patterns to
3363             # the left.
3364             elsif ( $alignment_token eq '=' ) {
3365              
3366             # It is best to be a little restrictive when
3367             # aligning '=' tokens. Here is an example of
3368             # two lines that we will not align:
3369             # my $variable=6;
3370             # $bb=4;
3371             # The problem is that one is a 'my' declaration,
3372             # and the other isn't, so they're not very similar.
3373             # We will filter these out by comparing the first
3374             # letter of the pattern. This is crude, but works
3375             # well enough.
3376 16 50       142 if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
    100          
3377 0         0 $GoToMsg = "first character before equals differ";
3378 0         0 $return_code = 1;
3379             }
3380              
3381             # The introduction of sub 'prune_alignment_tree'
3382             # enabled alignment of lists left of the equals with
3383             # other scalar variables. For example:
3384             # my ( $D, $s, $e ) = @_;
3385             # my $d = length $D;
3386             # my $c = $e - $s - $d;
3387              
3388             # But this would change formatting of a lot of scripts,
3389             # so for now we prevent alignment of comma lists on the
3390             # left with scalars on the left. We will also prevent
3391             # any partial alignments.
3392              
3393             # set return code 2 if the = is at line level, but
3394             # set return code 1 if the = is below line level, i.e.
3395             # sub new { my ( $p, $v ) = @_; bless \$v, $p }
3396             # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
3397              
3398             elsif ( ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) ) {
3399 1         2 $GoToMsg = "mixed commas/no-commas before equals";
3400 1         4 $return_code = 1;
3401 1 50       5 if ( $lev eq $group_level ) {
3402 1         3 $return_code = 2;
3403             }
3404             }
3405             else {
3406 15         42 $return_code = 0;
3407             }
3408             }
3409             else {
3410 120         230 $return_code = 0;
3411             }
3412              
3413 166         284 EXPLAIN_COMPARE_PATTERNS
3414             && $return_code
3415             && print STDERR "no match because $GoToMsg\n";
3416              
3417 166         467 return ( $return_code, \$GoToMsg );
3418              
3419             } ## end sub compare_patterns
3420              
3421             sub fat_comma_to_comma {
3422 761     761 0 1814 my ($str) = @_;
3423              
3424             # We are changing '=>' to ',' and removing any trailing decimal count
3425             # because currently fat commas have a count and commas do not.
3426             # For example, we will change '=>2+{-3.2' into ',2+{-3'
3427 761 100       2315 if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
  181         504  
3428 761         1906 return $str;
3429             } ## end sub fat_comma_to_comma
3430              
3431             sub get_line_token_info {
3432              
3433             # scan lines of tokens and return summary information about the range of
3434             # levels and patterns.
3435 152     152 0 411 my ($rlines) = @_;
3436              
3437             # First scan to check monotonicity. Here is an example of several
3438             # lines which are monotonic. The = is the lowest level, and
3439             # the commas are all one level deeper. So this is not nonmonotonic.
3440             # $$d{"weeks"} = [ "w", "wk", "wks", "week", "weeks" ];
3441             # $$d{"days"} = [ "d", "day", "days" ];
3442             # $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ];
3443 152         280 my @all_token_info;
3444 152         300 my $all_monotonic = 1;
3445 152         338 foreach my $jj ( 0 .. @{$rlines} - 1 ) {
  152         549  
3446 623         1206 my ($line) = $rlines->[$jj];
3447 623         1098 my $rtokens = $line->{'rtokens'};
3448 623         948 my $last_lev;
3449 623         934 my $is_monotonic = 1;
3450 623         921 my $i = -1;
3451 623         949 foreach my $tok ( @{$rtokens} ) {
  623         1170  
3452 1633         2163 $i++;
3453 1633         2788 my ( $raw_tok, $lev, $tag, $tok_count ) =
3454             decode_alignment_token($tok);
3455 1633         2729 push @{ $all_token_info[$jj] },
  1633         5038  
3456             [ $raw_tok, $lev, $tag, $tok_count ];
3457 1633 100       3974 last if ( $tok eq '#' );
3458 1010 100 100     3083 if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
  81         195  
3459 1010         1779 $last_lev = $lev;
3460             }
3461 623 100       2155 if ( !$is_monotonic ) { $all_monotonic = 0 }
  78         179  
3462             }
3463              
3464 152         603 my $rline_values = [];
3465 152         466 foreach my $jj ( 0 .. @{$rlines} - 1 ) {
  152         553  
3466 623         1206 my ($line) = $rlines->[$jj];
3467              
3468 623         1117 my $rtokens = $line->{'rtokens'};
3469 623         984 my $i = -1;
3470 623         1016 my ( $lev_min, $lev_max );
3471 623         996 my $token_pattern_max = EMPTY_STRING;
3472 623         881 my %saw_level;
3473 623         895 my $is_monotonic = 1;
3474              
3475             # find the index of the last token before the side comment
3476 623         865 my $imax = @{$rtokens} - 2;
  623         1132  
3477 623         997 my $imax_true = $imax;
3478              
3479             # If the entire group is monotonic, and the line ends in a comma list,
3480             # walk it back to the first such comma. this will have the effect of
3481             # making all trailing ragged comma lists match in the prune tree
3482             # routine. these trailing comma lists can better be handled by later
3483             # alignment rules.
3484              
3485             # Treat fat commas the same as commas here by converting them to
3486             # commas. This will improve the chance of aligning the leading parts
3487             # of ragged lists.
3488              
3489 623         1580 my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
3490 623 100 100     2578 if ( $all_monotonic && $tok_end =~ /^,/ ) {
3491 142         294 my $ii = $imax - 1;
3492 142   100     485 while ( $ii >= 0
3493             && fat_comma_to_comma( $rtokens->[$ii] ) eq $tok_end )
3494             {
3495 93         163 $imax = $ii;
3496 93         227 $ii--;
3497             }
3498             }
3499              
3500             # make a first pass to find level range
3501 623         982 my $last_lev;
3502 623         947 foreach my $tok ( @{$rtokens} ) {
  623         1226  
3503 1540         2043 $i++;
3504 1540 100       2956 last if ( $i > $imax );
3505 917 50       1807 last if ( $tok eq '#' );
3506             my ( $raw_tok, $lev, $tag, $tok_count ) =
3507 917         1249 @{ $all_token_info[$jj]->[$i] };
  917         2103  
3508              
3509 917 50       1903 last if ( $tok eq '#' );
3510 917         1472 $token_pattern_max .= $tok;
3511 917         1644 $saw_level{$lev}++;
3512 917 100       1766 if ( !defined($lev_min) ) {
3513 523         857 $lev_min = $lev;
3514 523         794 $lev_max = $lev;
3515             }
3516             else {
3517 394 100       1031 if ( $lev < $lev_min ) { $lev_min = $lev; }
  51         127  
3518 394 100       876 if ( $lev > $lev_max ) { $lev_max = $lev; }
  118         213  
3519 394 100       809 if ( $lev < $last_lev ) { $is_monotonic = 0 }
  81         146  
3520             }
3521 917         1619 $last_lev = $lev;
3522             }
3523              
3524             # handle no levels
3525 623         1284 my $rtoken_patterns = {};
3526 623         1137 my $rtoken_indexes = {};
3527 623         2527 my @levs = sort keys %saw_level;
3528 623 100       1945 if ( !defined($lev_min) ) {
    100          
3529 100         194 $lev_min = -1;
3530 100         186 $lev_max = -1;
3531 100         255 $levs[0] = -1;
3532 100         360 $rtoken_patterns->{$lev_min} = EMPTY_STRING;
3533 100         252 $rtoken_indexes->{$lev_min} = [];
3534             }
3535              
3536             # handle one level
3537             elsif ( $lev_max == $lev_min ) {
3538 359         945 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3539 359         1261 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3540             }
3541              
3542             # handle multiple levels
3543             else {
3544 164         431 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3545 164         612 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3546              
3547 164         399 my $lev_top = pop @levs; # already did max level
3548 164         324 my $itok = -1;
3549 164         289 foreach my $tok ( @{$rtokens} ) {
  164         378  
3550 688         940 $itok++;
3551 688 100       1346 last if ( $itok > $imax );
3552             my ( $raw_tok, $lev, $tag, $tok_count ) =
3553 524         722 @{ $all_token_info[$jj]->[$itok] };
  524         1244  
3554 524 50       1127 last if ( $raw_tok eq '#' );
3555 524         904 foreach my $lev_test (@levs) {
3556 552 100       1220 next if ( $lev > $lev_test );
3557 272         622 $rtoken_patterns->{$lev_test} .= $tok;
3558 272         399 push @{ $rtoken_indexes->{$lev_test} }, $itok;
  272         779  
3559             }
3560             }
3561 164         469 push @levs, $lev_top;
3562             }
3563              
3564 623         1058 push @{$rline_values},
  623         2664  
3565             [
3566             $lev_min, $lev_max, $rtoken_patterns, \@levs,
3567             $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3568             ];
3569              
3570             # debug
3571 623         1881 0 && do {
3572             local $LIST_SEPARATOR = ')(';
3573             print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
3574             foreach my $key ( sort keys %{$rtoken_patterns} ) {
3575             print "$key => $rtoken_patterns->{$key}\n";
3576             print "$key => @{$rtoken_indexes->{$key}}\n";
3577             }
3578             };
3579             } ## end loop over lines
3580 152         1421 return ( $rline_values, $all_monotonic );
3581             } ## end sub get_line_token_info
3582              
3583             sub prune_alignment_tree {
3584 152     152 0 455 my ($rlines) = @_;
3585 152         307 my $jmax = @{$rlines} - 1;
  152         397  
3586 152 50       537 return unless $jmax > 0;
3587              
3588             # Vertical alignment in perltidy is done as an iterative process. The
3589             # starting point is to mark all possible alignment tokens ('=', ',', '=>',
3590             # etc) for vertical alignment. Then we have to delete all alignments
3591             # which, if actually made, would detract from overall alignment. This
3592             # is done in several phases of which this is one.
3593              
3594             # In this routine we look at the alignments of a group of lines as a
3595             # hierarchical tree. We will 'prune' the tree to limited depths if that
3596             # will improve overall alignment at the lower depths.
3597             # For each line we will be looking at its alignment patterns down to
3598             # different fixed depths. For each depth, we include all lower depths and
3599             # ignore all higher depths. We want to see if we can get alignment of a
3600             # larger group of lines if we ignore alignments at some lower depth.
3601             # Here is an # example:
3602              
3603             # for (
3604             # [ '$var', sub { join $_, "bar" }, 0, "bar" ],
3605             # [ 'CONSTANT', sub { join "foo", "bar" }, 0, "bar" ],
3606             # [ 'CONSTANT', sub { join "foo", "bar", 3 }, 1, "barfoo3" ],
3607             # [ '$myvar', sub { my $var; join $var, "bar" }, 0, "bar" ],
3608             # );
3609              
3610             # In the above example, all lines have three commas at the lowest depth
3611             # (zero), so if there were no other alignments, these lines would all
3612             # align considering only the zero depth alignment token. But some lines
3613             # have additional comma alignments at the next depth, so we need to decide
3614             # if we should drop those to keep the top level alignments, or keep those
3615             # for some additional low level alignments at the expense losing some top
3616             # level alignments. In this case we will drop the deeper level commas to
3617             # keep the entire collection aligned. But in some cases the decision could
3618             # go the other way.
3619              
3620             # The tree for this example at the zero depth has one node containing
3621             # all four lines, since they are identical at zero level (three commas).
3622             # At depth one, there are three 'children' nodes, namely:
3623             # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
3624             # - line 3, which has 2 commas at depth 1
3625             # - line4, which has a ';' and a ',' at depth 1
3626             # There are no deeper alignments in this example.
3627             # so the tree structure for this example is:
3628             #
3629             # depth 0 depth 1 depth 2
3630             # [lines 1-4] -- [line 1-2] - (empty)
3631             # | [line 3] - (empty)
3632             # | [line 4] - (empty)
3633              
3634             # We can carry this to any depth, but it is not really useful to go below
3635             # depth 2. To cleanly stop there, we will consider depth 2 to contain all
3636             # alignments at depth >=2.
3637              
3638 38     38   387 use constant EXPLAIN_PRUNE => 0;
  38         101  
  38         52393  
3639              
3640             #-------------------------------------------------------------------
3641             # Prune Tree Step 1. Start by scanning the lines and collecting info
3642             #-------------------------------------------------------------------
3643              
3644             # Note that the caller had this info but we have to redo this now because
3645             # alignment tokens may have been deleted.
3646 152         629 my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
3647              
3648             # If all the lines have levels which increase monotonically from left to
3649             # right, then the sweep-left-to-right pass can do a better job of alignment
3650             # than pruning, and without deleting alignments.
3651 152 100       991 return if ($all_monotonic);
3652              
3653             # Contents of $rline_values
3654             # [
3655             # $lev_min, $lev_max, $rtoken_patterns, \@levs,
3656             # $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3657             # ];
3658              
3659             # We can work to any depth, but there is little advantage to working
3660             # to a a depth greater than 2
3661 31         96 my $MAX_DEPTH = 2;
3662              
3663             # This arrays will hold the tree of alignment tokens at different depths
3664             # for these lines.
3665 31         66 my @match_tree;
3666              
3667             # Tree nodes contain these values:
3668             # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
3669             # $nc_beg_p, $nc_end_p, $rindexes];
3670             # where
3671             # $depth = 0,1,2 = index of depth of the match
3672              
3673             # $jbeg beginning index j of the range of lines in this match
3674             # $jend ending index j of the range of lines in this match
3675             # $n_parent = index of the containing group at $depth-1, if it exists
3676             # $level = actual level of code being matched in this group
3677             # $pattern = alignment pattern being matched
3678             # $nc_beg_p = first child
3679             # $nc_end_p = last child
3680             # $rindexes = ref to token indexes
3681              
3682             # the patterns and levels of the current group being formed at each depth
3683 31         193 my ( @token_patterns_current, @levels_current, @token_indexes_current );
3684              
3685             # the patterns and levels of the next line being tested at each depth
3686 31         0 my ( @token_patterns_next, @levels_next, @token_indexes_next );
3687              
3688             #-----------------------------------------------------------
3689             # define a recursive worker subroutine for tree construction
3690             #-----------------------------------------------------------
3691              
3692             # This is a recursive routine which is called if a match condition changes
3693             # at any depth when a new line is encountered. It ends the match node
3694             # which changed plus all deeper nodes attached to it.
3695 31         0 my $end_node;
3696             $end_node = sub {
3697 321     321   591 my ( $depth, $jl, $n_parent ) = @_;
3698              
3699             # $depth is the tree depth
3700             # $jl is the index of the line
3701             # $n_parent is index of the parent node of this node
3702              
3703 321 100       671 return if ( $depth > $MAX_DEPTH );
3704              
3705             # end any current group at this depth
3706 234 100 100     744 if ( $jl >= 0
      66        
      100        
3707             && defined( $match_tree[$depth] )
3708 75         390 && @{ $match_tree[$depth] }
3709             && defined( $levels_current[$depth] ) )
3710             {
3711 69         127 $match_tree[$depth]->[-1]->[1] = $jl;
3712             }
3713              
3714             # Define the index of the node we will create below
3715 234         345 my $ng_self = 0;
3716 234 100       484 if ( defined( $match_tree[$depth] ) ) {
3717 75         125 $ng_self = @{ $match_tree[$depth] };
  75         162  
3718             }
3719              
3720             # end any next deeper child node(s)
3721 234         776 $end_node->( $depth + 1, $jl, $ng_self );
3722              
3723             # update the levels being matched
3724 234         471 $token_patterns_current[$depth] = $token_patterns_next[$depth];
3725 234         401 $token_indexes_current[$depth] = $token_indexes_next[$depth];
3726 234         362 $levels_current[$depth] = $levels_next[$depth];
3727              
3728             # Do not start a new group at this level if it is not being used
3729 234 100 66     933 if ( !defined( $levels_next[$depth] )
      66        
3730             || $depth > 0
3731             && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
3732             {
3733 120         197 return;
3734             }
3735              
3736             # Create a node for the next group at this depth. We initially assume
3737             # that it will continue to $jmax, and correct that later if the node
3738             # ends earlier.
3739 114         195 push @{ $match_tree[$depth] },
  114         548  
3740             [
3741             $jl + 1, $jmax, $n_parent, $levels_current[$depth],
3742             $token_patterns_current[$depth],
3743             undef, undef, $token_indexes_current[$depth],
3744             ];
3745              
3746 114         288 return;
3747 31         296 }; ## end sub end_node
3748              
3749             #-----------------------------------------------------
3750             # Prune Tree Step 2. Loop to form the tree of matches.
3751             #-----------------------------------------------------
3752 31         128 foreach my $jp ( 0 .. $jmax ) {
3753              
3754             # working with two adjacent line indexes, 'm'=minus, 'p'=plus
3755 236         400 my $jm = $jp - 1;
3756              
3757             # Pull out needed values for the next line
3758             my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
3759             $is_monotonic, $imax_true, $imax )
3760 236         336 = @{ $rline_values->[$jp] };
  236         600  
3761              
3762             # Transfer levels and patterns for this line to the working arrays.
3763             # If the number of levels differs from our chosen MAX_DEPTH ...
3764             # if fewer than MAX_DEPTH: leave levels at missing depths undefined
3765             # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
3766 236         469 @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
  236         557  
3767 236 100       371 if ( @{$rlevs} > $MAX_DEPTH ) {
  236         508  
3768 5         9 $levels_next[$MAX_DEPTH] = $rlevs->[-1];
3769             }
3770 236         358 my $depth = 0;
3771 236         385 foreach my $item (@levels_next) {
3772             $token_patterns_next[$depth] =
3773 708 100       1313 defined($item) ? $rtoken_patterns->{$item} : undef;
3774             $token_indexes_next[$depth] =
3775 708 100       1577 defined($item) ? $rtoken_indexes->{$item} : undef;
3776 708         1008 $depth++;
3777             }
3778              
3779             # Look for a change in match groups...
3780              
3781             # Initialize on the first line
3782 236 100       920 if ( $jp == 0 ) {
    100          
    50          
3783 31         70 my $n_parent;
3784 31         109 $end_node->( 0, $jm, $n_parent );
3785             }
3786              
3787             # End groups if a hard flag has been set
3788             elsif ( $rlines->[$jm]->{'end_group'} ) {
3789 10         40 my $n_parent;
3790 10         44 $end_node->( 0, $jm, $n_parent );
3791             }
3792              
3793             # Continue at hanging side comment
3794             elsif ( $rlines->[$jp]->{'is_hanging_side_comment'} ) {
3795 0         0 next;
3796             }
3797              
3798             # Otherwise see if anything changed and update the tree if so
3799             else {
3800 195         430 foreach my $depth ( 0 .. $MAX_DEPTH ) {
3801              
3802 401         685 my $def_current = defined( $token_patterns_current[$depth] );
3803 401         588 my $def_next = defined( $token_patterns_next[$depth] );
3804 401 100 100     1043 last unless ( $def_current || $def_next );
3805 253 100 100     1151 if ( !$def_current
      100        
3806             || !$def_next
3807             || $token_patterns_current[$depth] ne
3808             $token_patterns_next[$depth] )
3809             {
3810 46         117 my $n_parent;
3811 46 100 66     192 if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
3812 23         41 $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
  23         49  
3813             }
3814 46         144 $end_node->( $depth, $jm, $n_parent );
3815 46         134 last;
3816             }
3817             }
3818             }
3819             } ## end loop to form tree of matches
3820              
3821             #---------------------------------------------------------
3822             # Prune Tree Step 3. Make links from parent to child nodes
3823             #---------------------------------------------------------
3824              
3825             # It seemed cleaner to do this as a separate step rather than during tree
3826             # construction. The children nodes have links up to the parent node which
3827             # created them. Now make links in the opposite direction, so the parents
3828             # can find the children. We store the range of children nodes ($nc_beg,
3829             # $nc_end) of each parent with two additional indexes in the original array.
3830             # These will be undef if no children.
3831 31         223 foreach my $depth ( reverse( 1 .. $MAX_DEPTH ) ) {
3832 62 100       188 next unless defined( $match_tree[$depth] );
3833 32         61 my $nc_max = @{ $match_tree[$depth] } - 1;
  32         117  
3834 32         68 my $np_now;
3835 32         99 foreach my $nc ( 0 .. $nc_max ) {
3836 50         115 my $np = $match_tree[$depth]->[$nc]->[2];
3837 50 50       141 if ( !defined($np) ) {
3838              
3839             # shouldn't happen
3840             #print STDERR "lost child $np at depth $depth\n";
3841 0         0 next;
3842             }
3843 50 100 100     254 if ( !defined($np_now) || $np != $np_now ) {
3844 35         63 $np_now = $np;
3845 35         106 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
3846             }
3847 50         160 $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
3848             }
3849             } ## end loop to make links down to the child nodes
3850              
3851 31         59 EXPLAIN_PRUNE > 0 && do {
3852             print "Tree complete. Found these groups:\n";
3853             foreach my $depth ( 0 .. $MAX_DEPTH ) {
3854             Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
3855             }
3856             };
3857              
3858             #------------------------------------------------------
3859             # Prune Tree Step 4. Make a list of nodes to be deleted
3860             #------------------------------------------------------
3861              
3862             # list of lines with tokens to be deleted:
3863             # [$jbeg, $jend, $level_keep]
3864             # $jbeg..$jend is the range of line indexes,
3865             # $level_keep is the minimum level to keep
3866 31         93 my @delete_list;
3867              
3868             # Not currently used:
3869             # Groups with ending comma lists and their range of sizes:
3870             # $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
3871             ## my %ragged_comma_group;
3872              
3873             # We work with a list of nodes to visit at the next deeper depth.
3874             my @todo_list;
3875 31 50       126 if ( defined( $match_tree[0] ) ) {
3876 31         80 @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
  31         138  
3877             }
3878              
3879 31         100 foreach my $depth ( 0 .. $MAX_DEPTH ) {
3880 86 100       256 last unless (@todo_list);
3881 55         111 my @todo_next;
3882 55         117 foreach my $np (@todo_list) {
3883             my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
3884             $rindexes_p )
3885 92         146 = @{ $match_tree[$depth]->[$np] };
  92         315  
3886 92         184 my $nlines_p = $jend_p - $jbeg_p + 1;
3887              
3888             # nothing to do if no children
3889 92 100       273 next unless defined($nc_beg_p);
3890              
3891             # Define the number of lines to either keep or delete a child node.
3892             # This is the key decision we have to make. We want to delete
3893             # short runs of matched lines, and keep long runs. It seems easier
3894             # for the eye to follow breaks in monotonic level changes than
3895             # non-monotonic level changes. For example, the following looks
3896             # best if we delete the lower level alignments:
3897              
3898             # [1] ~~ [];
3899             # [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
3900             # [ qr/o/, qr/a/ ] ~~ [ ["foo"], ["bar"] ];
3901             # [ "foo", "bar" ] ~~ [ qr/o/, qr/a/ ];
3902             # [ qr/o/, qr/a/ ] ~~ [ "foo", "bar" ];
3903             # $deep1 ~~ $deep1;
3904              
3905             # So we will use two thresholds.
3906 35         104 my $nmin_mono = $depth + 2;
3907 35         86 my $nmin_non_mono = $depth + 6;
3908 35 100       1130 if ( $nmin_mono > $nlines_p - 1 ) {
3909 21         47 $nmin_mono = $nlines_p - 1;
3910             }
3911 35 100       118 if ( $nmin_non_mono > $nlines_p - 1 ) {
3912 31         65 $nmin_non_mono = $nlines_p - 1;
3913             }
3914              
3915             # loop to keep or delete each child node
3916 35         107 foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
3917             my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
3918             $nc_end_c )
3919 50         89 = @{ $match_tree[ $depth + 1 ]->[$nc] };
  50         196  
3920 50         109 my $nlines_c = $jend_c - $jbeg_c + 1;
3921 50         106 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
3922 50 100       185 my $nmin = $is_monotonic ? $nmin_mono : $nmin_non_mono;
3923 50 100       150 if ( $nlines_c < $nmin ) {
3924             ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
3925 22         78 push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
3926             }
3927             else {
3928             ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
3929 28         100 push @todo_next, $nc;
3930             }
3931             }
3932             }
3933 55         162 @todo_list = @todo_next;
3934             } ## end loop to mark nodes to delete
3935              
3936             #------------------------------------------------------------
3937             # Prune Tree Step 5. Loop to delete selected alignment tokens
3938             #------------------------------------------------------------
3939 31         142 foreach my $item (@delete_list) {
3940 22         39 my ( $jbeg, $jend, $level_keep ) = @{$item};
  22         54  
3941 22         51 foreach my $jj ( $jbeg .. $jend ) {
3942 28         56 my $line = $rlines->[$jj];
3943 28         50 my @idel;
3944 28         45 my $rtokens = $line->{'rtokens'};
3945 28         43 my $imax = @{$rtokens} - 2;
  28         72  
3946 28         66 foreach my $i ( 0 .. $imax ) {
3947 152         264 my $tok = $rtokens->[$i];
3948 152         256 my ( $raw_tok, $lev, $tag, $tok_count ) =
3949             decode_alignment_token($tok);
3950 152 100       376 if ( $lev > $level_keep ) {
3951 83         167 push @idel, $i;
3952             }
3953             }
3954 28 50       73 if (@idel) {
3955 28         81 delete_selected_tokens( $line, \@idel );
3956             }
3957             }
3958             } ## end loop to delete selected alignment tokens
3959              
3960 31         352 return;
3961             } ## end sub prune_alignment_tree
3962              
3963             sub Dump_tree_groups {
3964 0     0 0 0 my ( $rgroup, $msg ) = @_;
3965              
3966             # Debug routine
3967 0         0 print "$msg\n";
3968 0         0 local $LIST_SEPARATOR = ')(';
3969 0         0 foreach my $item ( @{$rgroup} ) {
  0         0  
3970 0         0 my @fix = @{$item};
  0         0  
3971 0 0       0 foreach my $val (@fix) { $val = "undef" unless defined $val; }
  0         0  
3972 0         0 $fix[4] = "...";
3973 0         0 print "(@fix)\n";
3974             }
3975 0         0 return;
3976             } ## end sub Dump_tree_groups
3977              
3978             { ## closure for sub is_marginal_match
3979              
3980             my %is_if_or;
3981             my %is_assignment;
3982             my %is_good_alignment;
3983              
3984             # This test did not give sufficiently better results to use as an update,
3985             # but the flag is worth keeping as a starting point for future testing.
3986 38     38   354 use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
  38         114  
  38         6109  
3987              
3988             BEGIN {
3989              
3990 38     38   231 my @q = qw(
3991             if unless or ||
3992             );
3993 38         197 @is_if_or{@q} = (1) x scalar(@q);
3994              
3995 38         178 @q = qw(
3996             = **= += *= &= <<= &&=
3997             -= /= |= >>= ||= //=
3998             .= %= ^=
3999             x=
4000             );
4001 38         294 @is_assignment{@q} = (1) x scalar(@q);
4002              
4003             # Vertically aligning on certain "good" tokens is usually okay
4004             # so we can be less restrictive in marginal cases.
4005 38         125 @q = qw( { ? => = );
4006 38         92 push @q, (',');
4007 38         213603 @is_good_alignment{@q} = (1) x scalar(@q);
4008             } ## end BEGIN
4009              
4010             sub is_marginal_match {
4011              
4012 254     254 0 892 my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
4013              
4014             # Decide if we should undo some or all of the common alignments of a
4015             # group of just two lines.
4016              
4017             # Given:
4018             # $line_0 and $line_1 - the two lines
4019             # $group_level = the indentation level of the group being processed
4020             # $imax_align = the maximum index of the common alignment tokens
4021             # of the two lines
4022             # $imax_prev = the maximum index of the common alignment tokens
4023             # with the line before $line_0 (=-1 of does not exist)
4024              
4025             # Return:
4026             # $is_marginal = true if the two lines should NOT be fully aligned
4027             # = false if the two lines can remain fully aligned
4028             # $imax_align = the index of the highest alignment token shared by
4029             # these two lines to keep if the match is marginal.
4030              
4031             # When we have an alignment group of just two lines like this, we are
4032             # working in the twilight zone of what looks good and what looks bad.
4033             # This routine is a collection of rules which work have been found to
4034             # work fairly well, but it will need to be updated from time to time.
4035              
4036 254         485 my $is_marginal = 0;
4037              
4038             #---------------------------------------
4039             # Always align certain special cases ...
4040             #---------------------------------------
4041 254 100 100     2182 if (
      100        
4042              
4043             # always keep alignments of a terminal else or ternary
4044             defined( $line_1->{'j_terminal_match'} )
4045              
4046             # always align lists
4047             || $line_0->{'list_type'}
4048              
4049             # always align hanging side comments
4050             || $line_1->{'is_hanging_side_comment'}
4051              
4052             )
4053             {
4054 127         414 return ( $is_marginal, $imax_align );
4055             }
4056              
4057 127         364 my $jmax_0 = $line_0->{'jmax'};
4058 127         322 my $jmax_1 = $line_1->{'jmax'};
4059 127         353 my $rtokens_1 = $line_1->{'rtokens'};
4060 127         281 my $rtokens_0 = $line_0->{'rtokens'};
4061 127         265 my $rfield_lengths_0 = $line_0->{'rfield_lengths'};
4062 127         278 my $rfield_lengths_1 = $line_1->{'rfield_lengths'};
4063 127         256 my $rpatterns_0 = $line_0->{'rpatterns'};
4064 127         271 my $rpatterns_1 = $line_1->{'rpatterns'};
4065 127         283 my $imax_next = $line_1->{'imax_pair'};
4066              
4067             # We will scan the alignment tokens and set a flag '$is_marginal' if
4068             # it seems that the an alignment would look bad.
4069 127         278 my $max_pad = 0;
4070 127         274 my $saw_good_alignment = 0;
4071 127         232 my $saw_if_or; # if we saw an 'if' or 'or' at group level
4072 127         298 my $raw_tokb = EMPTY_STRING; # first token seen at group level
4073 127         407 my $jfirst_bad;
4074             my $line_ending_fat_comma; # is last token just a '=>' ?
4075 127         0 my $j0_eq_pad;
4076 127         299 my $j0_max_pad = 0;
4077              
4078 127         499 foreach my $j ( 0 .. $jmax_1 - 2 ) {
4079 156         507 my ( $raw_tok, $lev, $tag, $tok_count ) =
4080             decode_alignment_token( $rtokens_1->[$j] );
4081 156 100 66     1055 if ( $raw_tok && $lev == $group_level ) {
4082 136 100       464 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
  117         258  
4083 136   100     669 $saw_if_or ||= $is_if_or{$raw_tok};
4084             }
4085              
4086             # When the first of the two lines ends in a bare '=>' this will
4087             # probably be marginal match. (For a bare =>, the next field length
4088             # will be 2 or 3, depending on side comment)
4089             $line_ending_fat_comma =
4090 156   100     995 $j == $jmax_1 - 2
4091             && $raw_tok eq '=>'
4092             && $rfield_lengths_0->[ $j + 1 ] <= 3;
4093              
4094 156         446 my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
4095 156 100       519 if ( $j == 0 ) {
4096             $pad += $line_1->{'leading_space_count'} -
4097 122         373 $line_0->{'leading_space_count'};
4098              
4099             # Remember the pad at a leading equals
4100 122 100 66     733 if ( $raw_tok eq '=' && $lev == $group_level ) {
4101 73         202 $j0_eq_pad = $pad;
4102 73         259 $j0_max_pad =
4103             0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
4104 73 100       294 $j0_max_pad = 4 if ( $j0_max_pad < 4 );
4105             }
4106             }
4107              
4108 156 100       537 if ( $pad < 0 ) { $pad = -$pad }
  36         147  
4109 156 100       494 if ( $pad > $max_pad ) { $max_pad = $pad }
  87         194  
4110 156 100 100     866 if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
4111 124         281 $saw_good_alignment = 1;
4112             }
4113             else {
4114 32 100       122 $jfirst_bad = $j unless defined($jfirst_bad);
4115             }
4116 156 100       638 if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
4117              
4118             # Flag this as a marginal match since patterns differ.
4119             # Normally, we will not allow just two lines to match if
4120             # marginal. But we can allow matching in some specific cases.
4121              
4122 33 100       152 $jfirst_bad = $j if ( !defined($jfirst_bad) );
4123 33 50       136 $is_marginal = 1 if ( $is_marginal == 0 );
4124 33 100       159 if ( $raw_tok eq '=' ) {
4125              
4126             # Here is an example of a marginal match:
4127             # $done{$$op} = 1;
4128             # $op = compile_bblock($op);
4129             # The left tokens are both identifiers, but
4130             # one accesses a hash and the other doesn't.
4131             # We'll let this be a tentative match and undo
4132             # it later if we don't find more than 2 lines
4133             # in the group.
4134 12         46 $is_marginal = 2;
4135             }
4136             }
4137             }
4138              
4139 127 50 66     740 $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
4140              
4141             # Turn off the "marginal match" flag in some cases...
4142             # A "marginal match" occurs when the alignment tokens agree
4143             # but there are differences in the other tokens (patterns).
4144             # If we leave the marginal match flag set, then the rule is that we
4145             # will align only if there are more than two lines in the group.
4146             # We will turn of the flag if we almost have a match
4147             # and either we have seen a good alignment token or we
4148             # just need a small pad (2 spaces) to fit. These rules are
4149             # the result of experimentation. Tokens which misaligned by just
4150             # one or two characters are annoying. On the other hand,
4151             # large gaps to less important alignment tokens are also annoying.
4152 127 100 100     542 if ( $is_marginal == 1
      100        
4153             && ( $saw_good_alignment || $max_pad < 3 ) )
4154             {
4155 17         47 $is_marginal = 0;
4156             }
4157              
4158             # We will use the line endings to help decide on alignments...
4159             # See if the lines end with semicolons...
4160 127         329 my $sc_term0;
4161             my $sc_term1;
4162 127 50 33     716 if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
4163              
4164             # shouldn't happen
4165             }
4166             else {
4167 127         369 my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
4168 127         338 my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
4169 127         828 $sc_term0 = $pat0 =~ /;b?$/;
4170 127         634 $sc_term1 = $pat1 =~ /;b?$/;
4171             }
4172              
4173 127 100 100     772 if ( !$is_marginal && !$sc_term0 ) {
4174              
4175             # First line of assignment should be semicolon terminated.
4176             # For example, do not align here:
4177             # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4178             # $$href{-NUM_DIRS} = 0;
4179 28 100       126 if ( $is_assignment{$raw_tokb} ) {
4180 1         3 $is_marginal = 1;
4181             }
4182             }
4183              
4184             # Try to avoid some undesirable alignments of opening tokens
4185             # for example, the space between grep and { here:
4186             # return map { ( $_ => $_ ) }
4187             # grep { /$handles/ } $self->_get_delegate_method_list;
4188             $is_marginal ||=
4189 127   100     1630 ( $raw_tokb eq '(' || $raw_tokb eq '{' )
      100        
4190             && $jmax_1 == 2
4191             && $sc_term0 ne $sc_term1;
4192              
4193             #---------------------------------------
4194             # return if this is not a marginal match
4195             #---------------------------------------
4196 127 100       409 if ( !$is_marginal ) {
4197 109         502 return ( $is_marginal, $imax_align );
4198             }
4199              
4200             # Undo the marginal match flag in certain cases,
4201              
4202             # Two lines with a leading equals-like operator are allowed to
4203             # align if the patterns to the left of the equals are the same.
4204             # For example the following two lines are a marginal match but have
4205             # the same left side patterns, so we will align the equals.
4206             # my $orig = my $format = "^<<<<< ~~\n";
4207             # my $abc = "abc";
4208             # But these have a different left pattern so they will not be
4209             # aligned
4210             # $xmldoc .= $`;
4211             # $self->{'leftovers'} .= "<bx-seq:seq" . $';
4212              
4213             # First line semicolon terminated but second not, usually ok:
4214             # my $want = "'ab', 'a', 'b'";
4215             # my $got = join( ", ",
4216             # map { defined($_) ? "'$_'" : "undef" }
4217             # @got );
4218             # First line not semicolon terminated, Not OK to match:
4219             # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4220             # $$href{-NUM_DIRS} = 0;
4221 18         58 my $pat0 = $rpatterns_0->[0];
4222 18         60 my $pat1 = $rpatterns_1->[0];
4223              
4224             #---------------------------------------------------------
4225             # Turn off the marginal flag for some types of assignments
4226             #---------------------------------------------------------
4227 18 100       90 if ( $is_assignment{$raw_tokb} ) {
    50          
    50          
4228              
4229             # undo marginal flag if first line is semicolon terminated
4230             # and leading patters match
4231 13 100       45 if ($sc_term0) { # && $sc_term1) {
4232 12         34 $is_marginal = $pat0 ne $pat1;
4233             }
4234             }
4235             elsif ( $raw_tokb eq '=>' ) {
4236              
4237             # undo marginal flag if patterns match
4238 0   0     0 $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
4239             }
4240             elsif ( $raw_tokb eq '=~' ) {
4241              
4242             # undo marginal flag if both lines are semicolon terminated
4243             # and leading patters match
4244 0 0 0     0 if ( $sc_term1 && $sc_term0 ) {
4245 0         0 $is_marginal = $pat0 ne $pat1;
4246             }
4247             }
4248              
4249             #-----------------------------------------------------
4250             # Turn off the marginal flag if we saw an 'if' or 'or'
4251             #-----------------------------------------------------
4252              
4253             # A trailing 'if' and 'or' often gives a good alignment
4254             # For example, we can align these:
4255             # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/;
4256             # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
4257              
4258             # or
4259             # $d_in_m[2] = 29 if ( &Date_LeapYear($y) );
4260             # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] );
4261              
4262 18 100       71 if ($saw_if_or) {
4263              
4264             # undo marginal flag if both lines are semicolon terminated
4265 4 50 33     22 if ( $sc_term0 && $sc_term1 ) {
4266 4         12 $is_marginal = 0;
4267             }
4268             }
4269              
4270             # For a marginal match, only keep matches before the first 'bad' match
4271 18 50 100     174 if ( $is_marginal
      66        
4272             && defined($jfirst_bad)
4273             && $imax_align > $jfirst_bad - 1 )
4274             {
4275 0         0 $imax_align = $jfirst_bad - 1;
4276             }
4277              
4278             #----------------------------------------------------------
4279             # Allow sweep to match lines with leading '=' in some cases
4280             #----------------------------------------------------------
4281 18 100 66     166 if ( $imax_align < 0 && defined($j0_eq_pad) ) {
4282              
4283 13 0 50     140 if (
      33        
      33        
4284              
4285             # If there is a following line with leading equals, or
4286             # preceding line with leading equals, then let the sweep align
4287             # them without restriction. For example, the first two lines
4288             # here are a marginal match, but they are followed by a line
4289             # with leading equals, so the sweep-lr logic can align all of
4290             # the lines:
4291              
4292             # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4293             # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4294             # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4295             # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4296              
4297             # Likewise, if we reverse the two pairs we want the same result
4298              
4299             # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4300             # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4301             # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4302             # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4303              
4304             (
4305             $imax_next >= 0
4306             || $imax_prev >= 0
4307             || TEST_MARGINAL_EQ_ALIGNMENT
4308             )
4309             && $j0_eq_pad >= -$j0_max_pad
4310             && $j0_eq_pad <= $j0_max_pad
4311             )
4312             {
4313              
4314             # But do not do this if there is a comma before the '='.
4315             # For example, the first two lines below have commas and
4316             # therefore are not allowed to align with lines 3 & 4:
4317              
4318             # my ( $x, $y ) = $self->Size(); #<--line_0
4319             # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
4320             # my $vx = $right - $left;
4321             # my $vy = $bottom - $top;
4322              
4323 0 0 0     0 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
4324 0         0 $imax_align = 0;
4325             }
4326             }
4327             }
4328              
4329 18         102 return ( $is_marginal, $imax_align );
4330             } ## end sub is_marginal_match
4331             } ## end closure for sub is_marginal_match
4332              
4333             sub get_extra_leading_spaces {
4334              
4335 376     376 0 1148 my ( $rlines, $rgroups ) = @_;
4336              
4337             #----------------------------------------------------------
4338             # Define any extra indentation space (for the -lp option).
4339             # Here is why:
4340             # If a list has side comments, sub scan_list must dump the
4341             # list before it sees everything. When this happens, it sets
4342             # the indentation to the standard scheme, but notes how
4343             # many spaces it would have liked to use. We may be able
4344             # to recover that space here in the event that all of the
4345             # lines of a list are back together again.
4346             #----------------------------------------------------------
4347              
4348 376 50 33     737 return 0 unless ( @{$rlines} && @{$rgroups} );
  376         1401  
  376         1332  
4349              
4350 376         1048 my $object = $rlines->[0]->{'indentation'};
4351 376 100       1225 return 0 unless ( ref($object) );
4352 58         140 my $extra_leading_spaces = 0;
4353 58         229 my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
4354 58 100       232 return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
4355              
4356 13         37 my $min_spaces = $extra_indentation_spaces_wanted;
4357 13 50       54 if ( $min_spaces > 0 ) { $min_spaces = 0 }
  13         30  
4358              
4359             # loop over all groups
4360 13         35 my $ng = -1;
4361 13         27 my $ngroups = @{$rgroups};
  13         33  
4362 13         31 foreach my $item ( @{$rgroups} ) {
  13         38  
4363 33         56 $ng++;
4364 33         54 my ( $jbeg, $jend ) = @{$item};
  33         71  
4365 33         76 foreach my $j ( $jbeg .. $jend ) {
4366 44 100       116 next if ( $j == 0 );
4367              
4368             # all indentation objects must be the same
4369 31 100       140 if ( $object != $rlines->[$j]->{'indentation'} ) {
4370 1         3 return 0;
4371             }
4372             }
4373              
4374             # find the maximum space without exceeding the line length for this group
4375 32         114 my $avail = $rlines->[$jbeg]->get_available_space_on_right();
4376 32 100       128 my $spaces =
4377             ( $avail > $extra_indentation_spaces_wanted )
4378             ? $extra_indentation_spaces_wanted
4379             : $avail;
4380              
4381             #--------------------------------------------------------
4382             # Note: min spaces can be negative; for example with -gnu
4383             # f(
4384             # do { 1; !!(my $x = bless []); }
4385             # );
4386             #--------------------------------------------------------
4387             # The following rule is needed to match older formatting:
4388             # For multiple groups, we will keep spaces non-negative.
4389             # For a single group, we will allow a negative space.
4390 32 50 66     138 if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
  0         0  
4391              
4392             # update the minimum spacing
4393 32 100 66     177 if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
4394 13         37 $extra_leading_spaces = $spaces;
4395             }
4396             }
4397              
4398             # update the indentation object because with -icp the terminal
4399             # ');' will use the same adjustment.
4400 12         126 $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
4401 12         28 return $extra_leading_spaces;
4402             } ## end sub get_extra_leading_spaces
4403              
4404             sub forget_side_comment {
4405 111     111 0 367 my ($self) = @_;
4406 111         261 $self->[_last_side_comment_column_] = 0;
4407 111         254 return;
4408             }
4409              
4410             sub is_good_side_comment_column {
4411 199     199 0 581 my ( $self, $line, $line_number, $level, $num5 ) = @_;
4412              
4413             # Upon encountering the first side comment of a group, decide if
4414             # a previous side comment should be forgotten. This involves
4415             # checking several rules.
4416              
4417             # Return true to KEEP old comment location
4418             # Return false to FORGET old comment location
4419 199         359 my $KEEP = 1;
4420 199         353 my $FORGET = 0;
4421              
4422 199         418 my $rfields = $line->{'rfields'};
4423 199         415 my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4424              
4425             # RULE1: Never forget comment before a hanging side comment
4426 199 100       620 return $KEEP if ($is_hanging_side_comment);
4427              
4428             # RULE2: Forget a side comment after a short line difference,
4429             # where 'short line difference' is computed from a formula.
4430             # Using a smooth formula helps minimize sudden large changes.
4431 189         422 my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
4432 189         460 my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
4433              
4434             # '$num5' is the number of comments in the first 5 lines after the first
4435             # comment. It is needed to keep a compact group of side comments from
4436             # being influenced by a more distant side comment.
4437 189 50       581 $num5 = 1 unless ($num5);
4438              
4439             # Some values:
4440              
4441             # $adiff $num5 $short_diff
4442             # 0 * 12
4443             # 1 1 6
4444             # 1 2 4
4445             # 1 3 3
4446             # 1 4 2
4447             # 2 1 4
4448             # 2 2 2
4449             # 2 3 1
4450             # 3 1 3
4451             # 3 2 1
4452              
4453 189         535 my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
4454              
4455 189 100 100     1024 return $FORGET
4456             if ( $line_diff > $short_diff
4457             || !$self->[_rOpts_valign_side_comments_] );
4458              
4459             # RULE3: Forget a side comment if this line is at lower level and
4460             # ends a block
4461 122         321 my $last_sc_level = $self->[_last_side_comment_level_];
4462             return $FORGET
4463             if ( $level < $last_sc_level
4464 122 100 100     581 && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
4465              
4466             # RULE 4: Forget the last side comment if this comment might join a cached
4467             # line ...
4468 104 100       297 if ( my $cached_line_type = get_cached_line_type() ) {
4469              
4470             # ... otherwise side comment alignment will get messed up.
4471             # For example, in the following test script
4472             # with using 'perltidy -sct -act=2', the last comment would try to
4473             # align with the previous and then be in the wrong column when
4474             # the lines are combined:
4475              
4476             # foreach $line (
4477             # [0, 1, 2], [3, 4, 5], [6, 7, 8], # rows
4478             # [0, 3, 6], [1, 4, 7], [2, 5, 8], # columns
4479             # [0, 4, 8], [2, 4, 6]
4480             # ) # diagonals
4481 4 50 33     36 return $FORGET
4482             if ( $cached_line_type == 2 || $cached_line_type == 4 );
4483             }
4484              
4485             # Otherwise, keep it alive
4486 104         267 return $KEEP;
4487             } ## end sub is_good_side_comment_column
4488              
4489             sub align_side_comments {
4490              
4491 199     199 0 590 my ( $self, $rlines, $rgroups ) = @_;
4492              
4493             # Align any side comments in this batch of lines
4494              
4495             # Given:
4496             # $rlines - the lines
4497             # $rgroups - the partition of the lines into groups
4498             #
4499             # We will be working group-by-group because all side comments
4500             # (real or fake) in each group are already aligned. So we just have
4501             # to make alignments between groups wherever possible.
4502              
4503             # An unusual aspect is that within each group we have aligned both real
4504             # and fake side comments. This has the consequence that the lengths of
4505             # long lines without real side comments can cause 'push' all side comments
4506             # to the right. This seems unusual, but testing with and without this
4507             # feature shows that it is usually better this way. Otherwise, side
4508             # comments can be hidden between long lines without side comments and
4509             # thus be harder to read.
4510              
4511 199         502 my $group_level = $self->[_group_level_];
4512 199   100     818 my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
4513             && $group_level == $self->[_last_level_written_];
4514              
4515             # Find groups with side comments, and remember the first nonblank comment
4516 199         411 my $j_sc_beg;
4517             my @todo;
4518 199         413 my $ng = -1;
4519 199         359 foreach my $item ( @{$rgroups} ) {
  199         482  
4520 312         525 $ng++;
4521 312         504 my ( $jbeg, $jend ) = @{$item};
  312         677  
4522 312         723 foreach my $j ( $jbeg .. $jend ) {
4523 346         634 my $line = $rlines->[$j];
4524 346         613 my $jmax = $line->{'jmax'};
4525 346 100       1050 if ( $line->{'rfield_lengths'}->[$jmax] ) {
4526              
4527             # this group has a line with a side comment
4528 228         511 push @todo, $ng;
4529 228 100       719 if ( !defined($j_sc_beg) ) {
4530 199         370 $j_sc_beg = $j;
4531             }
4532 228         589 last;
4533             }
4534             }
4535             }
4536              
4537             # done if no groups with side comments
4538 199 50       663 return unless @todo;
4539              
4540             # Count $num5 = number of comments in the 5 lines after the first comment
4541             # This is an important factor in a decision formula
4542 199         418 my $num5 = 1;
4543 199         455 foreach my $jj ( $j_sc_beg + 1 .. @{$rlines} - 1 ) {
  199         614  
4544 194         385 my $ldiff = $jj - $j_sc_beg;
4545 194 100       443 last if ( $ldiff > 5 );
4546 190         338 my $line = $rlines->[$jj];
4547 190         308 my $jmax = $line->{'jmax'};
4548 190         352 my $sc_len = $line->{'rfield_lengths'}->[$jmax];
4549 190 100       490 next unless ($sc_len);
4550 121         282 $num5++;
4551             }
4552              
4553             # Forget the old side comment location if necessary
4554 199         592 my $line_0 = $rlines->[$j_sc_beg];
4555 199         1115 my $lnum =
4556             $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
4557 199         877 my $keep_it =
4558             $self->is_good_side_comment_column( $line_0, $lnum, $group_level, $num5 );
4559 199 100       655 my $last_side_comment_column =
4560             $keep_it ? $self->[_last_side_comment_column_] : 0;
4561              
4562             # If there are multiple groups we will do two passes
4563             # so that we can find a common alignment for all groups.
4564 199 100       635 my $MAX_PASS = @todo > 1 ? 2 : 1;
4565              
4566             # Loop over passes
4567 199         384 my $max_comment_column = $last_side_comment_column;
4568 199         525 foreach my $PASS ( 1 .. $MAX_PASS ) {
4569              
4570             # If there are two passes, then on the last pass make the old column
4571             # equal to the largest of the group. This will result in the comments
4572             # being aligned if possible.
4573 223 100       649 if ( $PASS == $MAX_PASS ) {
4574 199         394 $last_side_comment_column = $max_comment_column;
4575             }
4576              
4577             # Loop over the groups with side comments
4578 223         360 my $column_limit;
4579 223         577 foreach my $ng (@todo) {
4580 281         498 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
  281         700  
4581              
4582             # Note that since all lines in a group have common alignments, we
4583             # just have to work on one of the lines (the first line).
4584 281         507 my $line = $rlines->[$jbeg];
4585 281         548 my $jmax = $line->{'jmax'};
4586 281         554 my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4587             last
4588 281 100 100     1010 if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
4589              
4590             # the maximum space without exceeding the line length:
4591 277         1005 my $avail = $line->get_available_space_on_right();
4592              
4593             # try to use the previous comment column
4594 277         939 my $side_comment_column = $line->get_column( $jmax - 1 );
4595 277         740 my $move = $last_side_comment_column - $side_comment_column;
4596              
4597             # Remember the maximum possible column of the first line with
4598             # side comment
4599 277 100       810 if ( !defined($column_limit) ) {
4600 223         425 $column_limit = $side_comment_column + $avail;
4601             }
4602              
4603 277 50       688 next if ( $jmax <= 0 );
4604              
4605             # but if this doesn't work, give up and use the minimum space
4606 277         560 my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
4607 277 100       729 if ( $move > $avail ) {
4608 13         37 $move = $min_move;
4609             }
4610              
4611             # but we want some minimum space to the comment
4612 277 100 100     1288 if ( $move >= 0
      100        
4613             && $j_sc_beg == 0
4614             && $continuing_sc_flow )
4615             {
4616 3         8 $min_move = 0;
4617             }
4618              
4619             # remove constraints on hanging side comments
4620 277 100       639 if ($is_hanging_side_comment) { $min_move = 0 }
  14         27  
4621              
4622 277 100       716 if ( $move < $min_move ) {
4623 194         343 $move = $min_move;
4624             }
4625              
4626             # don't exceed the available space
4627 277 100       757 if ( $move > $avail ) { $move = $avail }
  11         24  
4628              
4629             # We can only increase space, never decrease.
4630 277 100       726 if ( $move < 0 ) { $move = 0 }
  8         19  
4631              
4632             # Discover the largest column on the preliminary pass
4633 277 100       1697 if ( $PASS < $MAX_PASS ) {
4634 49         158 my $col = $line->get_column( $jmax - 1 ) + $move;
4635              
4636             # but ignore columns too large for the starting line
4637 49 100 66     291 if ( $col > $max_comment_column && $col < $column_limit ) {
4638 23         66 $max_comment_column = $col;
4639             }
4640             }
4641              
4642             # Make the changes on the final pass
4643             else {
4644 228         933 $line->increase_field_width( $jmax - 1, $move );
4645              
4646             # remember this column for the next group
4647 228         982 $last_side_comment_column = $line->get_column( $jmax - 1 );
4648             }
4649             } ## end loop over groups
4650             } ## end loop over passes
4651              
4652             # Find the last side comment
4653 199         418 my $j_sc_last;
4654 199         520 my $ng_last = $todo[-1];
4655 199         392 my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
  199         544  
4656 199         628 foreach my $jj ( reverse( $jbeg .. $jend ) ) {
4657 201         442 my $line = $rlines->[$jj];
4658 201         392 my $jmax = $line->{'jmax'};
4659 201 100       594 if ( $line->{'rfield_lengths'}->[$jmax] ) {
4660 199         412 $j_sc_last = $jj;
4661 199         425 last;
4662             }
4663             }
4664              
4665             # Save final side comment info for possible use by the next batch
4666 199 50       606 if ( defined($j_sc_last) ) {
4667 199         700 my $line_number =
4668             $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
4669 199         427 $self->[_last_side_comment_column_] = $last_side_comment_column;
4670 199         420 $self->[_last_side_comment_line_number_] = $line_number;
4671 199         402 $self->[_last_side_comment_level_] = $group_level;
4672             }
4673 199         522 return;
4674             } ## end sub align_side_comments
4675              
4676             ###############################
4677             # CODE SECTION 6: Output Step A
4678             ###############################
4679              
4680             sub valign_output_step_A {
4681              
4682             #------------------------------------------------------------
4683             # This is Step A in writing vertically aligned lines.
4684             # The line is prepared according to the alignments which have
4685             # been found. Then it is shipped to the next step.
4686             #------------------------------------------------------------
4687              
4688 3061     3061 0 6624 my ( $self, $rinput_hash ) = @_;
4689              
4690 3061         5828 my $line = $rinput_hash->{line};
4691 3061         5148 my $min_ci_gap = $rinput_hash->{min_ci_gap};
4692 3061         4931 my $do_not_align = $rinput_hash->{do_not_align};
4693 3061         4911 my $group_leader_length = $rinput_hash->{group_leader_length};
4694 3061         4833 my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
4695 3061         4925 my $level = $rinput_hash->{level};
4696 3061         5131 my $maximum_line_length = $rinput_hash->{maximum_line_length};
4697              
4698 3061         5372 my $rfields = $line->{'rfields'};
4699 3061         4971 my $rfield_lengths = $line->{'rfield_lengths'};
4700 3061         4799 my $leading_space_count = $line->{'leading_space_count'};
4701 3061         5022 my $outdent_long_lines = $line->{'outdent_long_lines'};
4702 3061         5036 my $maximum_field_index = $line->{'jmax'};
4703 3061         5011 my $rvertical_tightness_flags = $line->{'rvertical_tightness_flags'};
4704 3061         5213 my $Kend = $line->{'Kend'};
4705 3061         5385 my $level_end = $line->{'level_end'};
4706              
4707             # Check for valid hash keys at end of lifetime of $line during development
4708 3061         4305 DEVEL_MODE
4709             && check_keys( $line, \%valid_LINE_keys,
4710             "Checking line keys at valign_output_step_A", 1 );
4711              
4712             # add any extra spaces
4713 3061 100       6792 if ( $leading_space_count > $group_leader_length ) {
4714 47         101 $leading_space_count += $min_ci_gap;
4715             }
4716              
4717 3061         6037 my $str = $rfields->[0];
4718 3061         5087 my $str_len = $rfield_lengths->[0];
4719              
4720 3061         4761 my @alignments = @{ $line->{'ralignments'} };
  3061         7469  
4721 3061 50       8213 if ( @alignments != $maximum_field_index + 1 ) {
4722              
4723             # Shouldn't happen: sub install_new_alignments makes jmax alignments
4724 0         0 my $jmax_alignments = @alignments - 1;
4725 0         0 if (DEVEL_MODE) {
4726             Fault(
4727             "alignment jmax=$jmax_alignments should equal $maximum_field_index\n"
4728             );
4729             }
4730 0         0 $do_not_align = 1;
4731             }
4732              
4733             # loop to concatenate all fields of this line and needed padding
4734 3061         5138 my $total_pad_count = 0;
4735 3061         6981 for my $j ( 1 .. $maximum_field_index ) {
4736              
4737             # skip zero-length side comments
4738             last
4739             if (
4740 7266 100 66     25201 ( $j == $maximum_field_index )
      100        
4741             && ( !defined( $rfields->[$j] )
4742             || ( $rfield_lengths->[$j] == 0 ) )
4743             );
4744              
4745             # compute spaces of padding before this field
4746 4530         9059 my $col = $alignments[ $j - 1 ]->{'column'};
4747 4530         7356 my $pad = $col - ( $str_len + $leading_space_count );
4748              
4749 4530 50       8600 if ($do_not_align) {
4750 0 0       0 $pad =
4751             ( $j < $maximum_field_index )
4752             ? 0
4753             : $self->[_rOpts_minimum_space_to_comment_] - 1;
4754             }
4755              
4756             # if the -fpsc flag is set, move the side comment to the selected
4757             # column if and only if it is possible, ignoring constraints on
4758             # line length and minimum space to comment
4759 4530 100 100     10552 if ( $self->[_rOpts_fixed_position_side_comment_]
4760             && $j == $maximum_field_index )
4761             {
4762 9         23 my $newpad =
4763             $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
4764 9 50       21 if ( $newpad >= 0 ) { $pad = $newpad; }
  9         12  
4765             }
4766              
4767             # accumulate the padding
4768 4530 100       9158 if ( $pad > 0 ) { $total_pad_count += $pad; }
  1319         2335  
4769              
4770             # only add padding when we have a finite field;
4771             # this avoids extra terminal spaces if we have empty fields
4772 4530 100       9038 if ( $rfield_lengths->[$j] > 0 ) {
4773 4519         9257 $str .= SPACE x $total_pad_count;
4774 4519         6513 $str_len += $total_pad_count;
4775 4519         6505 $total_pad_count = 0;
4776 4519         7651 $str .= $rfields->[$j];
4777 4519         8003 $str_len += $rfield_lengths->[$j];
4778             }
4779             else {
4780 11         31 $total_pad_count = 0;
4781             }
4782             }
4783              
4784 3061         5880 my $side_comment_length = $rfield_lengths->[$maximum_field_index];
4785              
4786             # ship this line off
4787 3061         26088 $self->valign_output_step_B(
4788             {
4789             leading_space_count => $leading_space_count + $extra_leading_spaces,
4790             line => $str,
4791             line_length => $str_len,
4792             side_comment_length => $side_comment_length,
4793             outdent_long_lines => $outdent_long_lines,
4794             rvertical_tightness_flags => $rvertical_tightness_flags,
4795             level => $level,
4796             level_end => $level_end,
4797             Kend => $Kend,
4798             maximum_line_length => $maximum_line_length,
4799             }
4800             );
4801 3061         15759 return;
4802             } ## end sub valign_output_step_A
4803              
4804             sub combine_fields {
4805              
4806             # We have a group of two lines for which we do not want to align tokens
4807             # between index $imax_align and the side comment. So we will delete fields
4808             # between $imax_align and the side comment. Alignments have already
4809             # been set so we have to adjust them.
4810              
4811 14     14 0 60 my ( $line_0, $line_1, $imax_align ) = @_;
4812              
4813 14 50       60 if ( !defined($imax_align) ) { $imax_align = -1 }
  0         0  
4814              
4815             # First delete the unwanted tokens
4816 14         46 my $jmax_old = $line_0->{'jmax'};
4817 14         58 my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
4818 14 50       59 return unless (@idel);
4819              
4820             # Get old alignments before any changes are made
4821 14         42 my @old_alignments = @{ $line_0->{'ralignments'} };
  14         46  
4822              
4823 14         46 foreach my $line ( $line_0, $line_1 ) {
4824 28         100 delete_selected_tokens( $line, \@idel );
4825             }
4826              
4827             # Now adjust the alignments. Note that the side comment alignment
4828             # is always at jmax-1, and there is an ending alignment at jmax.
4829 14         51 my @new_alignments;
4830 14 50       82 if ( $imax_align >= 0 ) {
4831 0         0 @new_alignments[ 0 .. $imax_align ] =
4832             @old_alignments[ 0 .. $imax_align ];
4833             }
4834              
4835 14         42 my $jmax_new = $line_0->{'jmax'};
4836              
4837 14         54 $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
4838 14         38 $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
4839 14         43 $line_0->{'ralignments'} = \@new_alignments;
4840 14         49 $line_1->{'ralignments'} = \@new_alignments;
4841 14         60 return;
4842             } ## end sub combine_fields
4843              
4844             sub get_output_line_number {
4845              
4846             # The output line number reported to a caller =
4847             # the number of items still in the buffer +
4848             # the number of items written.
4849 49     49 0 160 return $_[0]->group_line_count() +
4850             $_[0]->[_file_writer_object_]->get_output_line_number();
4851             } ## end sub get_output_line_number
4852              
4853             ###############################
4854             # CODE SECTION 7: Output Step B
4855             ###############################
4856              
4857             { ## closure for sub valign_output_step_B
4858              
4859             # These are values for a cache used by valign_output_step_B.
4860             my $cached_line_text;
4861             my $cached_line_text_length;
4862             my $cached_line_type;
4863             my $cached_line_opening_flag;
4864             my $cached_line_closing_flag;
4865             my $cached_seqno;
4866             my $cached_line_valid;
4867             my $cached_line_leading_space_count;
4868             my $cached_seqno_string;
4869             my $cached_line_Kend;
4870             my $cached_line_maximum_length;
4871              
4872             # These are passed to step_C:
4873             my $seqno_string;
4874             my $last_nonblank_seqno_string;
4875              
4876             sub set_last_nonblank_seqno_string {
4877 394     394 0 828 my ($val) = @_;
4878 394         670 $last_nonblank_seqno_string = $val;
4879 394         626 return;
4880             }
4881              
4882             sub get_cached_line_opening_flag {
4883 224     224 0 444 return $cached_line_opening_flag;
4884             }
4885              
4886             sub get_cached_line_type {
4887 7471     7471 0 13891 return $cached_line_type;
4888             }
4889              
4890             sub set_cached_line_valid {
4891 3     3 0 18 my ($val) = @_;
4892 3         16 $cached_line_valid = $val;
4893 3         8 return;
4894             }
4895              
4896             sub get_cached_seqno {
4897 224     224 0 505 return $cached_seqno;
4898             }
4899              
4900             sub initialize_step_B_cache {
4901              
4902             # valign_output_step_B cache:
4903 555     555 0 1897 $cached_line_text = EMPTY_STRING;
4904 555         1255 $cached_line_text_length = 0;
4905 555         1219 $cached_line_type = 0;
4906 555         1201 $cached_line_opening_flag = 0;
4907 555         1141 $cached_line_closing_flag = 0;
4908 555         1290 $cached_seqno = 0;
4909 555         1161 $cached_line_valid = 0;
4910 555         1097 $cached_line_leading_space_count = 0;
4911 555         1213 $cached_seqno_string = EMPTY_STRING;
4912 555         1142 $cached_line_Kend = undef;
4913 555         1090 $cached_line_maximum_length = undef;
4914              
4915             # These vars hold a string of sequence numbers joined together used by
4916             # the cache
4917 555         1287 $seqno_string = EMPTY_STRING;
4918 555         1194 $last_nonblank_seqno_string = EMPTY_STRING;
4919 555         1179 return;
4920             } ## end sub initialize_step_B_cache
4921              
4922             sub _flush_step_B_cache {
4923 1806     1806   4056 my ($self) = @_;
4924              
4925             # Send any text in the step_B cache on to step_C
4926 1806 100       4505 if ($cached_line_type) {
4927 1         3 $seqno_string = $cached_seqno_string;
4928 1         7 $self->valign_output_step_C(
4929             $seqno_string,
4930             $last_nonblank_seqno_string,
4931              
4932             $cached_line_text,
4933             $cached_line_leading_space_count,
4934             $self->[_last_level_written_],
4935             $cached_line_Kend,
4936             );
4937 1         2 $cached_line_type = 0;
4938 1         3 $cached_line_text = EMPTY_STRING;
4939 1         15 $cached_line_text_length = 0;
4940 1         3 $cached_seqno_string = EMPTY_STRING;
4941 1         9 $cached_line_Kend = undef;
4942 1         7 $cached_line_maximum_length = undef;
4943             }
4944 1806         3011 return;
4945             } ## end sub _flush_step_B_cache
4946              
4947             sub handle_cached_line {
4948              
4949 158     158 0 468 my ( $self, $rinput, $leading_string, $leading_string_length ) = @_;
4950              
4951             # The cached line will either be:
4952             # - passed along to step_C, or
4953             # - or combined with the current line
4954              
4955 158         353 my $last_level_written = $self->[_last_level_written_];
4956              
4957 158         340 my $leading_space_count = $rinput->{leading_space_count};
4958 158         319 my $str = $rinput->{line};
4959 158         298 my $str_length = $rinput->{line_length};
4960 158         277 my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
4961 158         301 my $level = $rinput->{level};
4962 158         290 my $level_end = $rinput->{level_end};
4963 158         285 my $maximum_line_length = $rinput->{maximum_line_length};
4964              
4965 158         321 my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
4966             $seqno_beg, $seqno_end );
4967 158 50       400 if ($rvertical_tightness_flags) {
4968              
4969 158         290 $open_or_close = $rvertical_tightness_flags->{_vt_type};
4970 158         297 $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
4971             }
4972              
4973             # Dump an invalid cached line
4974 158 100 100     645 if ( !$cached_line_valid ) {
    100          
4975 91         255 $self->valign_output_step_C(
4976             $seqno_string,
4977             $last_nonblank_seqno_string,
4978              
4979             $cached_line_text,
4980             $cached_line_leading_space_count,
4981             $last_level_written,
4982             $cached_line_Kend,
4983             );
4984             }
4985              
4986             # Handle cached line ending in OPENING tokens
4987             elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
4988              
4989 30         79 my $gap = $leading_space_count - $cached_line_text_length;
4990              
4991             # handle option of just one tight opening per line:
4992 30 100       96 if ( $cached_line_opening_flag == 1 ) {
4993 14 50 33     73 if ( defined($open_or_close) && $open_or_close == 1 ) {
4994 0         0 $gap = -1;
4995             }
4996             }
4997              
4998             # Do not join the lines if this might produce a one-line
4999             # container which exceeds the maximum line length. This is
5000             # necessary prevent blinking, particularly with the combination
5001             # -xci -pvt=2. In that case a one-line block alternately forms
5002             # and breaks, causing -xci to alternately turn on and off (case
5003             # b765).
5004             # Patched to fix cases b656 b862 b971 b972: always do the check
5005             # if the maximum line length changes (due to -vmll).
5006 30 50 33     258 if (
      66        
5007             $gap >= 0
5008             && ( $maximum_line_length != $cached_line_maximum_length
5009             || ( defined($level_end) && $level > $level_end ) )
5010             )
5011             {
5012 0         0 my $test_line_length =
5013             $cached_line_text_length + $gap + $str_length;
5014              
5015             # Add a small tolerance in the length test (fixes case b862)
5016 0 0       0 if ( $test_line_length > $cached_line_maximum_length - 2 ) {
5017 0         0 $gap = -1;
5018             }
5019             }
5020              
5021 30 100 66     145 if ( $gap >= 0 && defined($seqno_beg) ) {
5022 18         51 $maximum_line_length = $cached_line_maximum_length;
5023 18         59 $leading_string = $cached_line_text . SPACE x $gap;
5024 18         38 $leading_string_length = $cached_line_text_length + $gap;
5025 18         35 $leading_space_count = $cached_line_leading_space_count;
5026 18         61 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5027 18         42 $level = $last_level_written;
5028             }
5029             else {
5030 12         48 $self->valign_output_step_C(
5031             $seqno_string,
5032             $last_nonblank_seqno_string,
5033              
5034             $cached_line_text,
5035             $cached_line_leading_space_count,
5036             $last_level_written,
5037             $cached_line_Kend,
5038             );
5039             }
5040             }
5041              
5042             # Handle cached line ending in CLOSING tokens
5043             else {
5044 37         202 my $test_line =
5045             $cached_line_text . SPACE x $cached_line_closing_flag . $str;
5046 37         104 my $test_line_length =
5047             $cached_line_text_length +
5048             $cached_line_closing_flag +
5049             $str_length;
5050 37 100 66     505 if (
      66        
      100        
5051              
5052             # The new line must start with container
5053             $seqno_beg
5054              
5055             # The container combination must be okay..
5056             && (
5057              
5058             # okay to combine like types
5059             ( $open_or_close == $cached_line_type )
5060              
5061             # closing block brace may append to non-block
5062             || ( $cached_line_type == 2 && $open_or_close == 4 )
5063              
5064             # something like ');'
5065             || ( !$open_or_close && $cached_line_type == 2 )
5066              
5067             )
5068              
5069             # The combined line must fit
5070             && ( $test_line_length <= $cached_line_maximum_length )
5071             )
5072             {
5073              
5074 33         97 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5075              
5076             # Patch to outdent closing tokens ending # in ');' If we
5077             # are joining a line like ');' to a previous stacked set of
5078             # closing tokens, then decide if we may outdent the
5079             # combined stack to the indentation of the ');'. Since we
5080             # should not normally outdent any of the other tokens more
5081             # than the indentation of the lines that contained them, we
5082             # will only do this if all of the corresponding opening
5083             # tokens were on the same line. This can happen with -sot
5084             # and -sct.
5085              
5086             # For example, it is ok here:
5087             # __PACKAGE__->load_components( qw(
5088             # PK::Auto
5089             # Core
5090             # ));
5091             #
5092             # But, for example, we do not outdent in this example
5093             # because that would put the closing sub brace out farther
5094             # than the opening sub brace:
5095             #
5096             # perltidy -sot -sct
5097             # $c->Tk::bind(
5098             # '<Control-f>' => sub {
5099             # my ($c) = @_;
5100             # my $e = $c->XEvent;
5101             # itemsUnderArea $c;
5102             # } );
5103             #
5104 33 100 100     286 if ( $str =~ /^\);/
5105             && $cached_line_text =~ /^[\)\}\]\s]*$/ )
5106             {
5107              
5108             # The way to tell this is if the stacked sequence
5109             # numbers of this output line are the reverse of the
5110             # stacked sequence numbers of the previous non-blank
5111             # line of sequence numbers. So we can join if the
5112             # previous nonblank string of tokens is the mirror
5113             # image. For example if stack )}] is 13:8:6 then we
5114             # are looking for a leading stack like [{( which
5115             # is 6:8:13. We only need to check the two ends,
5116             # because the intermediate tokens must fall in order.
5117             # Note on speed: having to split on colons and
5118             # eliminate multiple colons might appear to be slow,
5119             # but it's not an issue because we almost never come
5120             # through here. In a typical file we don't.
5121              
5122 4         18 $seqno_string =~ s/^:+//;
5123 4         11 $last_nonblank_seqno_string =~ s/^:+//;
5124 4         23 $seqno_string =~ s/:+/:/g;
5125 4         17 $last_nonblank_seqno_string =~ s/:+/:/g;
5126              
5127             # how many spaces can we outdent?
5128 4         10 my $diff =
5129             $cached_line_leading_space_count - $leading_space_count;
5130 4 100 33     41 if ( $diff > 0
      66        
5131             && length($seqno_string)
5132             && length($last_nonblank_seqno_string) ==
5133             length($seqno_string) )
5134             {
5135 3         27 my @seqno_last =
5136             ( split /:/, $last_nonblank_seqno_string );
5137 3         10 my @seqno_now = ( split /:/, $seqno_string );
5138 3 50 33     44 if ( @seqno_now
      33        
      33        
5139             && @seqno_last
5140             && $seqno_now[-1] == $seqno_last[0]
5141             && $seqno_now[0] == $seqno_last[-1] )
5142             {
5143              
5144             # OK to outdent ..
5145             # for absolute safety, be sure we only remove
5146             # whitespace
5147 3         9 my $ws = substr( $test_line, 0, $diff );
5148 3 50 33     27 if ( ( length($ws) == $diff )
5149             && $ws =~ /^\s+$/ )
5150             {
5151              
5152 3         10 $test_line = substr( $test_line, $diff );
5153 3         6 $cached_line_leading_space_count -= $diff;
5154 3         17 $last_level_written =
5155             $self->level_change(
5156             $cached_line_leading_space_count,
5157             $diff, $last_level_written );
5158 3         11 $self->reduce_valign_buffer_indentation($diff);
5159             }
5160              
5161             # shouldn't happen, but not critical:
5162             ##else {
5163             ## ERROR transferring indentation here
5164             ##}
5165             }
5166             }
5167             }
5168              
5169             # Change the args to look like we received the combined line
5170 33         71 $str = $test_line;
5171 33         62 $str_length = $test_line_length;
5172 33         61 $leading_string = EMPTY_STRING;
5173 33         70 $leading_string_length = 0;
5174 33         78 $leading_space_count = $cached_line_leading_space_count;
5175 33         72 $level = $last_level_written;
5176 33         75 $maximum_line_length = $cached_line_maximum_length;
5177             }
5178             else {
5179 4         23 $self->valign_output_step_C(
5180             $seqno_string,
5181             $last_nonblank_seqno_string,
5182              
5183             $cached_line_text,
5184             $cached_line_leading_space_count,
5185             $last_level_written,
5186             $cached_line_Kend,
5187             );
5188             }
5189             }
5190 158         782 return ( $str, $str_length, $leading_string, $leading_string_length,
5191             $leading_space_count, $level, $maximum_line_length, );
5192              
5193             } ## end sub handle_cached_line
5194              
5195             sub valign_output_step_B {
5196              
5197             #---------------------------------------------------------
5198             # This is Step B in writing vertically aligned lines.
5199             # Vertical tightness is applied according to preset flags.
5200             # In particular this routine handles stacking of opening
5201             # and closing tokens.
5202             #---------------------------------------------------------
5203              
5204 7366     7366 0 15169 my ( $self, $rinput ) = @_;
5205              
5206 7366         14000 my $leading_space_count = $rinput->{leading_space_count};
5207 7366         12933 my $str = $rinput->{line};
5208 7366         11361 my $str_length = $rinput->{line_length};
5209 7366         11202 my $side_comment_length = $rinput->{side_comment_length};
5210 7366         11091 my $outdent_long_lines = $rinput->{outdent_long_lines};
5211 7366         11232 my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
5212 7366         11900 my $level = $rinput->{level};
5213 7366         11373 my $level_end = $rinput->{level_end};
5214 7366         11304 my $Kend = $rinput->{Kend};
5215 7366         10961 my $maximum_line_length = $rinput->{maximum_line_length};
5216              
5217             # Useful -gcs test cases for wide characters are
5218             # perl527/(method.t.2, reg_mesg.t, mime-header.t)
5219              
5220             # handle outdenting of long lines:
5221 7366         11149 my $is_outdented_line;
5222 7366 100       15460 if ($outdent_long_lines) {
5223 276         790 my $excess =
5224             $str_length -
5225             $side_comment_length +
5226             $leading_space_count -
5227             $maximum_line_length;
5228 276 100       866 if ( $excess > 0 ) {
5229 10         38 $leading_space_count = 0;
5230 10         19 my $file_writer_object = $self->[_file_writer_object_];
5231 10         42 my $last_outdented_line_at =
5232             $file_writer_object->get_output_line_number();
5233 10         30 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
5234              
5235 10         22 my $outdented_line_count = $self->[_outdented_line_count_];
5236 10 100       32 unless ($outdented_line_count) {
5237 3         12 $self->[_first_outdented_line_at_] =
5238             $last_outdented_line_at;
5239             }
5240 10         16 $outdented_line_count++;
5241 10         17 $self->[_outdented_line_count_] = $outdented_line_count;
5242 10         19 $is_outdented_line = 1;
5243             }
5244             }
5245              
5246             # Make preliminary leading whitespace. It could get changed
5247             # later by entabbing, so we have to keep track of any changes
5248             # to the leading_space_count from here on.
5249 7366 100       19063 my $leading_string =
5250             $leading_space_count > 0
5251             ? ( SPACE x $leading_space_count )
5252             : EMPTY_STRING;
5253 7366         11723 my $leading_string_length = length($leading_string);
5254              
5255             # Unpack any recombination data; it was packed by
5256             # sub 'Formatter::set_vertical_tightness_flags'
5257              
5258             # old hash Meaning
5259             # index key
5260             #
5261             # 0 _vt_type: 1=opening non-block 2=closing non-block
5262             # 3=opening block brace 4=closing block brace
5263             #
5264             # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
5265             # 1b _vt_closing_flag: spaces of padding to use if closing
5266             # 2 _vt_seqno: sequence number of container
5267             # 3 _vt_valid flag: do not append if this flag is false. Will be
5268             # true if appropriate -vt flag is set. Otherwise, Will be
5269             # made true only for 2 line container in parens with -lp
5270             # 4 _vt_seqno_beg: sequence number of first token of line
5271             # 5 _vt_seqno_end: sequence number of last token of line
5272             # 6 _vt_min_lines: min number of lines for joining opening cache,
5273             # 0=no constraint
5274             # 7 _vt_max_lines: max number of lines for joining opening cache,
5275             # 0=no constraint
5276              
5277 7366         12500 my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
5278             $seqno_beg, $seqno_end );
5279 7366 100       14805 if ($rvertical_tightness_flags) {
5280              
5281 1308         2315 $open_or_close = $rvertical_tightness_flags->{_vt_type};
5282 1308         2066 $opening_flag = $rvertical_tightness_flags->{_vt_opening_flag};
5283 1308         2101 $closing_flag = $rvertical_tightness_flags->{_vt_closing_flag};
5284 1308         2174 $seqno = $rvertical_tightness_flags->{_vt_seqno};
5285 1308         2121 $valid = $rvertical_tightness_flags->{_vt_valid_flag};
5286 1308         2203 $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
5287 1308         2140 $seqno_end = $rvertical_tightness_flags->{_vt_seqno_end};
5288             }
5289              
5290 7366         11360 $seqno_string = $seqno_end;
5291              
5292             # handle any cached line ..
5293             # either append this line to it or write it out
5294             # Note: the function length() is used in this next test out of caution.
5295             # All testing has shown that the variable $cached_line_text_length is
5296             # correct, but its calculation is complex and a loss of cached text
5297             # would be a disaster.
5298 7366 100       15653 if ( length($cached_line_text) ) {
5299              
5300             (
5301 158         770 $str,
5302             $str_length,
5303             $leading_string,
5304             $leading_string_length,
5305             $leading_space_count,
5306             $level,
5307             $maximum_line_length
5308              
5309             ) = $self->handle_cached_line( $rinput, $leading_string,
5310             $leading_string_length );
5311              
5312 158         447 $cached_line_type = 0;
5313 158         307 $cached_line_text = EMPTY_STRING;
5314 158         279 $cached_line_text_length = 0;
5315 158         251 $cached_line_Kend = undef;
5316 158         259 $cached_line_maximum_length = undef;
5317              
5318             }
5319              
5320             # make the line to be written
5321 7366         15580 my $line = $leading_string . $str;
5322 7366         11714 my $line_length = $leading_string_length + $str_length;
5323              
5324             # Safety check: be sure that a line to be cached as a stacked block
5325             # brace line ends in the appropriate opening or closing block brace.
5326             # This should always be the case if the caller set flags correctly.
5327             # Code '3' is for -sobb, code '4' is for -scbb.
5328 7366 100       14560 if ($open_or_close) {
5329 159 50 66     1259 if ( $open_or_close == 3 && $line !~ /\{\s*$/
      66        
      33        
5330             || $open_or_close == 4 && $line !~ /\}\s*$/ )
5331             {
5332 0         0 $open_or_close = 0;
5333             }
5334             }
5335              
5336             # write or cache this line ...
5337             # fix for case b999: do not cache an outdented line
5338             # fix for b1378: do not cache an empty line
5339 7366 100 66     21011 if ( !$open_or_close
      66        
      33        
5340             || $side_comment_length > 0
5341             || $is_outdented_line
5342             || !$line_length )
5343             {
5344 7207         18446 $self->valign_output_step_C(
5345             $seqno_string,
5346             $last_nonblank_seqno_string,
5347              
5348             $line,
5349             $leading_space_count,
5350             $level,
5351             $Kend,
5352             );
5353             }
5354             else {
5355 159         322 $cached_line_text = $line;
5356 159         1330 $cached_line_text_length = $line_length;
5357 159         267 $cached_line_type = $open_or_close;
5358 159         318 $cached_line_opening_flag = $opening_flag;
5359 159         284 $cached_line_closing_flag = $closing_flag;
5360 159         328 $cached_seqno = $seqno;
5361 159         257 $cached_line_valid = $valid;
5362 159         259 $cached_line_leading_space_count = $leading_space_count;
5363 159         287 $cached_seqno_string = $seqno_string;
5364 159         289 $cached_line_Kend = $Kend;
5365 159         268 $cached_line_maximum_length = $maximum_line_length;
5366             }
5367              
5368 7366         12787 $self->[_last_level_written_] = $level;
5369 7366         11403 $self->[_last_side_comment_length_] = $side_comment_length;
5370 7366         17109 return;
5371             } ## end sub valign_output_step_B
5372             }
5373              
5374             ###############################
5375             # CODE SECTION 8: Output Step C
5376             ###############################
5377              
5378             { ## closure for sub valign_output_step_C
5379              
5380             # Vertical alignment buffer used by valign_output_step_C
5381             my $valign_buffer_filling;
5382             my @valign_buffer;
5383              
5384             sub initialize_valign_buffer {
5385 555     555 0 1502 @valign_buffer = ();
5386 555         1213 $valign_buffer_filling = EMPTY_STRING;
5387 555         1013 return;
5388             }
5389              
5390             sub dump_valign_buffer {
5391 1808     1808 0 3711 my ($self) = @_;
5392              
5393             # Send all lines in the current buffer on to step_D
5394 1808 100       4617 if (@valign_buffer) {
5395 2         7 foreach (@valign_buffer) {
5396 7         11 $self->valign_output_step_D( @{$_} );
  7         29  
5397             }
5398 2         15 @valign_buffer = ();
5399             }
5400 1808         3562 $valign_buffer_filling = EMPTY_STRING;
5401 1808         2887 return;
5402             } ## end sub dump_valign_buffer
5403              
5404             sub reduce_valign_buffer_indentation {
5405              
5406 3     3 0 8 my ( $self, $diff ) = @_;
5407              
5408             # Reduce the leading indentation of lines in the current
5409             # buffer by $diff spaces
5410 3 100 66     29 if ( $valign_buffer_filling && $diff ) {
5411 2         7 my $max_valign_buffer = @valign_buffer;
5412 2         9 foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
5413             my ( $line, $leading_space_count, $level, $Kend ) =
5414 7         9 @{ $valign_buffer[$i] };
  7         21  
5415 7         16 my $ws = substr( $line, 0, $diff );
5416 7 50 33     42 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
5417 7         19 $line = substr( $line, $diff );
5418             }
5419 7 50       15 if ( $leading_space_count >= $diff ) {
5420 7         11 $leading_space_count -= $diff;
5421 7         21 $level =
5422             $self->level_change( $leading_space_count, $diff,
5423             $level );
5424             }
5425 7         32 $valign_buffer[$i] =
5426             [ $line, $leading_space_count, $level, $Kend ];
5427             }
5428             }
5429 3         20 return;
5430             } ## end sub reduce_valign_buffer_indentation
5431              
5432             sub valign_output_step_C {
5433              
5434             #-----------------------------------------------------------------------
5435             # This is Step C in writing vertically aligned lines.
5436             # Lines are either stored in a buffer or passed along to the next step.
5437             # The reason for storing lines is that we may later want to reduce their
5438             # indentation when -sot and -sct are both used.
5439             #-----------------------------------------------------------------------
5440             my (
5441 7315     7315 0 20876 $self,
5442             $seqno_string,
5443             $last_nonblank_seqno_string,
5444              
5445             @args_to_D,
5446             ) = @_;
5447              
5448             # Dump any saved lines if we see a line with an unbalanced opening or
5449             # closing token.
5450 7315 100 100     17585 $self->dump_valign_buffer()
5451             if ( $seqno_string && $valign_buffer_filling );
5452              
5453             # Either store or write this line
5454 7315 100       13756 if ($valign_buffer_filling) {
5455 7         26 push @valign_buffer, [@args_to_D];
5456             }
5457             else {
5458 7308         17266 $self->valign_output_step_D(@args_to_D);
5459             }
5460              
5461             # For lines starting or ending with opening or closing tokens..
5462 7315 100       15192 if ($seqno_string) {
5463 394         1164 $last_nonblank_seqno_string = $seqno_string;
5464 394         1469 set_last_nonblank_seqno_string($seqno_string);
5465              
5466             # Start storing lines when we see a line with multiple stacked
5467             # opening tokens.
5468             # patch for RT #94354, requested by Colin Williams
5469 394 100 100     1917 if ( index( $seqno_string, ':' ) >= 0
      100        
5470             && $seqno_string =~ /^\d+(\:+\d+)+$/
5471             && $args_to_D[0] !~ /^[\}\)\]\:\?]/ )
5472             {
5473              
5474             # This test is efficient but a little subtle: The first test
5475             # says that we have multiple sequence numbers and hence
5476             # multiple opening or closing tokens in this line. The second
5477             # part of the test rejects stacked closing and ternary tokens.
5478             # So if we get here then we should have stacked unbalanced
5479             # opening tokens.
5480              
5481             # Here is a complex example:
5482              
5483             # Foo($Bar[0], { # (side comment)
5484             # baz => 1,
5485             # });
5486              
5487             # The first line has sequence 6::4. It does not begin with
5488             # a closing token or ternary, so it passes the test and must be
5489             # stacked opening tokens.
5490              
5491             # The last line has sequence 4:6 but is a stack of closing
5492             # tokens, so it gets rejected.
5493              
5494             # Note that the sequence number of an opening token for a qw
5495             # quote is a negative number and will be rejected. For
5496             # example, for the following line: skip_symbols([qw(
5497             # $seqno_string='10:5:-1'. It would be okay to accept it but I
5498             # decided not to do this after testing.
5499              
5500 8         22 $valign_buffer_filling = $seqno_string;
5501              
5502             }
5503             }
5504 7315         14461 return;
5505             } ## end sub valign_output_step_C
5506             }
5507              
5508             ###############################
5509             # CODE SECTION 9: Output Step D
5510             ###############################
5511              
5512             sub valign_output_step_D {
5513              
5514             #----------------------------------------------------------------
5515             # This is Step D in writing vertically aligned lines.
5516             # It is the end of the vertical alignment pipeline.
5517             # Write one vertically aligned line of code to the output object.
5518             #----------------------------------------------------------------
5519              
5520 7315     7315 0 16352 my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
5521              
5522             # The line is currently correct if there is no tabbing (recommended!)
5523             # We may have to lop off some leading spaces and replace with tabs.
5524 7315 100       15649 if ( $leading_space_count > 0 ) {
5525              
5526 4315         8116 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5527 4315         7353 my $rOpts_tabs = $self->[_rOpts_tabs_];
5528 4315         7030 my $rOpts_entab_leading_whitespace =
5529             $self->[_rOpts_entab_leading_whitespace_];
5530              
5531             # Nothing to do if no tabs
5532 4315 100 66     18986 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
    50 66        
5533             || $rOpts_indent_columns <= 0 )
5534             {
5535              
5536             # nothing to do
5537             }
5538              
5539             # Handle entab option
5540             elsif ($rOpts_entab_leading_whitespace) {
5541              
5542             # Patch 12-nov-2018 based on report from Glenn. Extra padding was
5543             # not correctly entabbed, nor were side comments: Increase leading
5544             # space count for a padded line to get correct tabbing
5545 45 50       280 if ( $line =~ /^(\s+)(.*)$/ ) {
5546 45         135 my $spaces = length($1);
5547 45 50       104 if ( $spaces > $leading_space_count ) {
5548 0         0 $leading_space_count = $spaces;
5549             }
5550             }
5551              
5552 45         85 my $space_count =
5553             $leading_space_count % $rOpts_entab_leading_whitespace;
5554 45         109 my $tab_count =
5555             int( $leading_space_count / $rOpts_entab_leading_whitespace );
5556 45         115 my $leading_string = "\t" x $tab_count . SPACE x $space_count;
5557 45 50       526 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5558 45         155 substr( $line, 0, $leading_space_count ) = $leading_string;
5559             }
5560             else {
5561              
5562             # shouldn't happen - program error counting whitespace
5563             # - skip entabbing
5564 0         0 DEBUG_TABS
5565             && warning(
5566             "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5567             );
5568             }
5569             }
5570              
5571             # Handle option of one tab per level
5572             else {
5573 0         0 my $leading_string = ( "\t" x $level );
5574 0         0 my $space_count =
5575             $leading_space_count - $level * $rOpts_indent_columns;
5576              
5577             # shouldn't happen:
5578 0 0       0 if ( $space_count < 0 ) {
5579              
5580             # But it could be an outdented comment
5581 0 0       0 if ( $line !~ /^\s*#/ ) {
5582 0         0 DEBUG_TABS
5583             && warning(
5584             "Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
5585             );
5586             }
5587 0         0 $leading_string = ( SPACE x $leading_space_count );
5588             }
5589             else {
5590 0         0 $leading_string .= ( SPACE x $space_count );
5591             }
5592 0 0       0 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5593 0         0 substr( $line, 0, $leading_space_count ) = $leading_string;
5594             }
5595             else {
5596              
5597             # shouldn't happen - program error counting whitespace
5598             # we'll skip entabbing
5599 0         0 DEBUG_TABS
5600             && warning(
5601             "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5602             );
5603             }
5604             }
5605             }
5606 7315         13351 my $file_writer_object = $self->[_file_writer_object_];
5607 7315         33993 $file_writer_object->write_code_line( $line . "\n", $Kend );
5608              
5609 7315         14645 return;
5610             } ## end sub valign_output_step_D
5611              
5612             ##########################
5613             # CODE SECTION 10: Summary
5614             ##########################
5615              
5616             sub report_anything_unusual {
5617 555     555 0 1269 my $self = shift;
5618              
5619 555         1761 my $outdented_line_count = $self->[_outdented_line_count_];
5620 555 100       2193 if ( $outdented_line_count > 0 ) {
5621 21         313 write_logfile_entry(
5622             "$outdented_line_count long lines were outdented:\n");
5623 21         76 my $first_outdented_line_at = $self->[_first_outdented_line_at_];
5624 21         135 write_logfile_entry(
5625             " First at output line $first_outdented_line_at\n");
5626              
5627 21 100       185 if ( $outdented_line_count > 1 ) {
5628 7         23 my $last_outdented_line_at = $self->[_last_outdented_line_at_];
5629 7         31 write_logfile_entry(
5630             " Last at output line $last_outdented_line_at\n");
5631             }
5632             write_logfile_entry(
5633 21         119 " use -noll to prevent outdenting, -l=n to increase line length\n"
5634             );
5635 21         134 write_logfile_entry("\n");
5636             }
5637 555         1351 return;
5638             } ## end sub report_anything_unusual
5639             1;