File Coverage

blib/lib/Perl/Tidy/VerticalAligner.pm
Criterion Covered Total %
statement 1912 2065 92.5
branch 668 826 80.8
condition 423 544 77.7
subroutine 98 109 89.9
pod 0 71 0.0
total 3101 3615 85.7


line stmt bran cond sub pod time code
1             package Perl::Tidy::VerticalAligner;
2 39     39   309 use strict;
  39         89  
  39         1565  
3 39     39   241 use warnings;
  39         99  
  39         1289  
4 39     39   212 use Carp;
  39         80  
  39         2919  
5 39     39   242 use English qw( -no_match_vars );
  39         87  
  39         374  
6             our $VERSION = '20230912';
7 39     39   31604 use Perl::Tidy::VerticalAligner::Alignment;
  39         109  
  39         1334  
8 39     39   16244 use Perl::Tidy::VerticalAligner::Line;
  39         125  
  39         1452  
9              
10 39     39   303 use constant DEVEL_MODE => 0;
  39         91  
  39         2392  
11 39     39   249 use constant EMPTY_STRING => q{};
  39         97  
  39         1817  
12 39     39   254 use constant SPACE => q{ };
  39         115  
  39         17990  
13              
14             # The Perl::Tidy::VerticalAligner package collects output lines and
15             # attempts to line up certain common tokens, such as => and #, which are
16             # identified by the calling routine.
17             #
18             # Usage:
19             # - Initiate an object with a call to new().
20             # - Write lines one-by-one with calls to valign_input().
21             # - Make a final call to flush() to empty the pipeline.
22             #
23             # The sub valign_input collects lines into groups. When a group reaches
24             # the maximum possible size it is processed for alignment and output.
25             # The maximum group size is reached whenever there is a change in indentation
26             # level, a blank line, a block comment, or an external flush call. The calling
27             # routine may also force a break in alignment at any time.
28             #
29             # If the calling routine needs to interrupt the output and send other text to
30             # the output, it must first call flush() to empty the output pipeline. This
31             # might occur for example if a block of pod text needs to be sent to the output
32             # between blocks of code.
33              
34             # It is essential that a final call to flush() be made. Otherwise some
35             # final lines of text will be lost.
36              
37             # Index...
38             # CODE SECTION 1: Preliminary code, global definitions and sub new
39             # sub new
40             # CODE SECTION 2: Some Basic Utilities
41             # CODE SECTION 3: Code to accept input and form groups
42             # sub valign_input
43             # CODE SECTION 4: Code to process comment lines
44             # sub _flush_comment_lines
45             # CODE SECTION 5: Code to process groups of code lines
46             # sub _flush_group_lines
47             # CODE SECTION 6: Output Step A
48             # sub valign_output_step_A
49             # CODE SECTION 7: Output Step B
50             # sub valign_output_step_B
51             # CODE SECTION 8: Output Step C
52             # sub valign_output_step_C
53             # CODE SECTION 9: Output Step D
54             # sub valign_output_step_D
55             # CODE SECTION 10: Summary
56             # sub report_anything_unusual
57              
58             ##################################################################
59             # CODE SECTION 1: Preliminary code, global definitions and sub new
60             ##################################################################
61              
62             sub AUTOLOAD {
63              
64             # Catch any undefined sub calls so that we are sure to get
65             # some diagnostic information. This sub should never be called
66             # except for a programming error.
67 0     0   0 our $AUTOLOAD;
68 0 0       0 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
69 0         0 my ( $pkg, $fname, $lno ) = caller();
70 0         0 my $my_package = __PACKAGE__;
71 0         0 print {*STDERR} <<EOM;
  0         0  
72             ======================================================================
73             Error detected in package '$my_package', version $VERSION
74             Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
75             Called from package: '$pkg'
76             Called from File '$fname' at line '$lno'
77             This error is probably due to a recent programming change
78             ======================================================================
79             EOM
80 0         0 exit 1;
81             } ## end sub AUTOLOAD
82              
83       0     sub DESTROY {
84              
85             # required to avoid call to AUTOLOAD in some versions of perl
86             }
87              
88             sub Die {
89 0     0 0 0 my ($msg) = @_;
90 0         0 Perl::Tidy::Die($msg);
91 0         0 croak "unexpected return from Perl::Tidy::Die";
92             }
93              
94             sub Fault {
95 0     0 0 0 my ($msg) = @_;
96              
97             # This routine is called for errors that really should not occur
98             # except if there has been a bug introduced by a recent program change.
99             # Please add comments at calls to Fault to explain why the call
100             # should not occur, and where to look to fix it.
101 0         0 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
102 0         0 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
103 0         0 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
104 0         0 my $pkg = __PACKAGE__;
105              
106 0         0 my $input_stream_name = get_input_stream_name();
107              
108 0         0 Die(<<EOM);
109             ==============================================================================
110             While operating on input stream with name: '$input_stream_name'
111             A fault was detected at line $line0 of sub '$subroutine1'
112             in file '$filename1'
113             which was called from line $line1 of sub '$subroutine2'
114             Message: '$msg'
115             This is probably an error introduced by a recent programming change.
116             $pkg reports VERSION='$VERSION'.
117             ==============================================================================
118             EOM
119              
120             # We shouldn't get here, but this return is to keep Perl-Critic from
121             # complaining.
122 0         0 return;
123             } ## end sub Fault
124              
125             my %valid_LINE_keys;
126              
127             BEGIN {
128              
129             # define valid keys in a line object
130 39     39   314 my @q = qw(
131             jmax
132             rtokens
133             rfields
134             rfield_lengths
135             rpatterns
136             indentation
137             leading_space_count
138             outdent_long_lines
139             list_type
140             list_seqno
141             is_hanging_side_comment
142             maximum_line_length
143             rvertical_tightness_flags
144             is_terminal_ternary
145             j_terminal_match
146             end_group
147             Kend
148             ci_level
149             level
150             level_end
151             imax_pair
152              
153             ralignments
154             );
155              
156 39         5070 @valid_LINE_keys{@q} = (1) x scalar(@q);
157             } ## end BEGIN
158              
159             BEGIN {
160              
161             # Define the fixed indexes for variables in $self, which is an array
162             # reference. Note the convention of leading and trailing underscores to
163             # keep them unique.
164             # Do not combine with other BEGIN blocks (c101).
165 39     39   162 my $i = 0;
166             use constant {
167 39         10970 _file_writer_object_ => $i++,
168             _logger_object_ => $i++,
169             _diagnostics_object_ => $i++,
170              
171             _rOpts_ => $i++,
172             _rOpts_indent_columns_ => $i++,
173             _rOpts_tabs_ => $i++,
174             _rOpts_entab_leading_whitespace_ => $i++,
175             _rOpts_fixed_position_side_comment_ => $i++,
176             _rOpts_minimum_space_to_comment_ => $i++,
177             _rOpts_valign_code_ => $i++,
178             _rOpts_valign_block_comments_ => $i++,
179             _rOpts_valign_side_comments_ => $i++,
180              
181             _last_level_written_ => $i++,
182             _last_side_comment_column_ => $i++,
183             _last_side_comment_line_number_ => $i++,
184             _last_side_comment_length_ => $i++,
185             _last_side_comment_level_ => $i++,
186             _outdented_line_count_ => $i++,
187             _first_outdented_line_at_ => $i++,
188             _last_outdented_line_at_ => $i++,
189             _consecutive_block_comments_ => $i++,
190              
191             _rgroup_lines_ => $i++,
192             _group_level_ => $i++,
193             _group_type_ => $i++,
194             _group_maximum_line_length_ => $i++,
195             _zero_count_ => $i++,
196             _last_leading_space_count_ => $i++,
197             _comment_leading_space_count_ => $i++,
198 39     39   376 };
  39         85  
199              
200             # Debug flag. This is a relic from the original program development
201             # looking for problems with tab characters. Caution: this debug flag can
202             # produce a lot of output It should be 0 except when debugging small
203             # scripts.
204              
205 39     39   306 use constant DEBUG_TABS => 0;
  39         97  
  39         4639  
206              
207             my $debug_warning = sub {
208 0         0 print {*STDOUT} "VALIGN_DEBUGGING with key $_[0]\n";
  0         0  
209 0         0 return;
210 39         461 };
211              
212 39         53026 DEBUG_TABS && $debug_warning->('TABS');
213             } ## end BEGIN
214              
215             # GLOBAL variables
216             my (
217              
218             %valign_control_hash,
219             $valign_control_default,
220              
221             );
222              
223             sub check_options {
224              
225             # This routine is called to check the user-supplied run parameters
226             # and to configure the control hashes to them.
227 560     560 0 1936 my ($rOpts) = @_;
228              
229             # All alignments are done by default
230 560         1589 %valign_control_hash = ();
231 560         1276 $valign_control_default = 1;
232              
233             # If -vil=s is entered without -vxl, assume -vxl='*'
234 560 50 66     4599 if ( !$rOpts->{'valign-exclusion-list'}
235             && $rOpts->{'valign-inclusion-list'} )
236             {
237 0         0 $rOpts->{'valign-exclusion-list'} = '*';
238             }
239              
240             # See if the user wants to exclude any alignment types ...
241 560 100       2441 if ( $rOpts->{'valign-exclusion-list'} ) {
242              
243             # The inclusion list is only relevant if there is an exclusion list
244 3 100       23 if ( $rOpts->{'valign-inclusion-list'} ) {
245 1         7 my @vil = split /\s+/, $rOpts->{'valign-inclusion-list'};
246 1         5 @valign_control_hash{@vil} = (1) x scalar(@vil);
247             }
248              
249             # Note that the -vxl list is done after -vil, so -vxl has priority
250             # in the event of duplicate entries.
251 3         18 my @vxl = split /\s+/, $rOpts->{'valign-exclusion-list'};
252 3         25 @valign_control_hash{@vxl} = (0) x scalar(@vxl);
253              
254             # Optimization: revert to defaults if no exclusions.
255             # This could happen with -vxl=' ' and any -vil list
256 3 50       17 if ( !@vxl ) {
257 0         0 %valign_control_hash = ();
258             }
259              
260             # '$valign_control_default' applies to types not in the hash:
261             # - If a '*' was entered then set it to be that default type
262             # - Otherwise, leave it set it to 1
263 3 100       18 if ( defined( $valign_control_hash{'*'} ) ) {
264 1         4 $valign_control_default = $valign_control_hash{'*'};
265             }
266              
267             # Side comments are controlled separately and must be removed
268             # if given in a list.
269 3 50       16 if (%valign_control_hash) {
270 3         10 $valign_control_hash{'#'} = 1;
271             }
272             }
273              
274 560         1543 return;
275             } ## end sub check_options
276              
277             sub check_keys {
278 0     0 0 0 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
279              
280             # Check the keys of a hash:
281             # $rtest = ref to hash to test
282             # $rvalid = ref to hash with valid keys
283              
284             # $msg = a message to write in case of error
285             # $exact_match defines the type of check:
286             # = false: test hash must not have unknown key
287             # = true: test hash must have exactly same keys as known hash
288             my @unknown_keys =
289 0         0 grep { !exists $rvalid->{$_} } keys %{$rtest};
  0         0  
  0         0  
290             my @missing_keys =
291 0         0 grep { !exists $rtest->{$_} } keys %{$rvalid};
  0         0  
  0         0  
292 0         0 my $error = @unknown_keys;
293 0 0 0     0 if ($exact_match) { $error ||= @missing_keys }
  0         0  
294 0 0       0 if ($error) {
295 0         0 local $LIST_SEPARATOR = ')(';
296 0         0 my @expected_keys = sort keys %{$rvalid};
  0         0  
297 0         0 @unknown_keys = sort @unknown_keys;
298 0         0 Fault(<<EOM);
299             ------------------------------------------------------------------------
300             Program error detected checking hash keys
301             Message is: '$msg'
302             Expected keys: (@expected_keys)
303             Unknown key(s): (@unknown_keys)
304             Missing key(s): (@missing_keys)
305             ------------------------------------------------------------------------
306             EOM
307             }
308 0         0 return;
309             } ## end sub check_keys
310              
311             sub new {
312              
313 561     561 0 2892 my ( $class, @args ) = @_;
314              
315 561         4100 my %defaults = (
316             rOpts => undef,
317             file_writer_object => undef,
318             logger_object => undef,
319             diagnostics_object => undef,
320             );
321 561         3187 my %args = ( %defaults, @args );
322              
323             # Initialize other caches and buffers
324 561         3935 initialize_step_B_cache();
325 561         2872 initialize_valign_buffer();
326 561         3054 initialize_decode();
327 561         3103 set_logger_object( $args{logger_object} );
328              
329             # Initialize all variables in $self.
330             # To add an item to $self, first define a new constant index in the BEGIN
331             # section.
332 561         1638 my $self = [];
333              
334             # objects
335 561         1976 $self->[_file_writer_object_] = $args{file_writer_object};
336 561         1787 $self->[_logger_object_] = $args{logger_object};
337 561         1490 $self->[_diagnostics_object_] = $args{diagnostics_object};
338              
339             # shortcuts to user options
340 561         1452 my $rOpts = $args{rOpts};
341              
342 561         1373 $self->[_rOpts_] = $rOpts;
343 561         1700 $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
344 561         1780 $self->[_rOpts_tabs_] = $rOpts->{'tabs'};
345             $self->[_rOpts_entab_leading_whitespace_] =
346 561         1759 $rOpts->{'entab-leading-whitespace'};
347             $self->[_rOpts_fixed_position_side_comment_] =
348 561         1599 $rOpts->{'fixed-position-side-comment'};
349             $self->[_rOpts_minimum_space_to_comment_] =
350 561         1622 $rOpts->{'minimum-space-to-comment'};
351 561         1444 $self->[_rOpts_valign_code_] = $rOpts->{'valign-code'};
352 561         1556 $self->[_rOpts_valign_block_comments_] = $rOpts->{'valign-block-comments'};
353 561         1571 $self->[_rOpts_valign_side_comments_] = $rOpts->{'valign-side-comments'};
354              
355             # Batch of lines being collected
356 561         1848 $self->[_rgroup_lines_] = [];
357 561         1397 $self->[_group_level_] = 0;
358 561         1407 $self->[_group_type_] = EMPTY_STRING;
359 561         1613 $self->[_group_maximum_line_length_] = undef;
360 561         1510 $self->[_zero_count_] = 0;
361 561         1451 $self->[_comment_leading_space_count_] = 0;
362 561         1431 $self->[_last_leading_space_count_] = 0;
363              
364             # Memory of what has been processed
365 561         1368 $self->[_last_level_written_] = -1;
366 561         1336 $self->[_last_side_comment_column_] = 0;
367 561         1286 $self->[_last_side_comment_line_number_] = 0;
368 561         1257 $self->[_last_side_comment_length_] = 0;
369 561         1291 $self->[_last_side_comment_level_] = -1;
370 561         1228 $self->[_outdented_line_count_] = 0;
371 561         1359 $self->[_first_outdented_line_at_] = 0;
372 561         1190 $self->[_last_outdented_line_at_] = 0;
373 561         1135 $self->[_consecutive_block_comments_] = 0;
374              
375 561         1372 bless $self, $class;
376 561         3142 return $self;
377             } ## end sub new
378              
379             #################################
380             # CODE SECTION 2: Basic Utilities
381             #################################
382              
383             sub flush {
384              
385             # flush() is the external call to completely empty the pipeline.
386 1818     1818 0 3877 my ($self) = @_;
387              
388             # push things out the pipeline...
389              
390             # push out any current group lines
391 1818         5910 $self->_flush_group_lines();
392              
393             # then anything left in the cache of step_B
394 1818         6981 $self->_flush_step_B_cache();
395              
396             # then anything left in the buffer of step_C
397 1818         5850 $self->dump_valign_buffer();
398              
399 1818         3588 return;
400             } ## end sub flush
401              
402             sub initialize_for_new_group {
403 2237     2237 0 5188 my ($self) = @_;
404              
405 2237         5108 $self->[_rgroup_lines_] = [];
406 2237         4730 $self->[_group_type_] = EMPTY_STRING;
407 2237         3875 $self->[_zero_count_] = 0;
408 2237         3725 $self->[_comment_leading_space_count_] = 0;
409 2237         3600 $self->[_last_leading_space_count_] = 0;
410 2237         3925 $self->[_group_maximum_line_length_] = undef;
411              
412             # Note that the value for _group_level_ is
413             # handled separately in sub valign_input
414 2237         3842 return;
415             } ## end sub initialize_for_new_group
416              
417             sub group_line_count {
418 73     73 0 125 return +@{ $_[0]->[_rgroup_lines_] };
  73         352  
419             }
420              
421             # interface to Perl::Tidy::Diagnostics routines
422             # For debugging; not currently used
423             sub write_diagnostics {
424 0     0 0 0 my ( $self, $msg ) = @_;
425 0         0 my $diagnostics_object = $self->[_diagnostics_object_];
426 0 0       0 if ($diagnostics_object) {
427 0         0 $diagnostics_object->write_diagnostics($msg);
428             }
429 0         0 return;
430             } ## end sub write_diagnostics
431              
432             { ## begin closure for logger routines
433             my $logger_object;
434              
435             # Called once per file to initialize the logger object
436             sub set_logger_object {
437 561     561 0 21726 $logger_object = shift;
438 561         1257 return;
439             }
440              
441             sub get_logger_object {
442 0     0 0 0 return $logger_object;
443             }
444              
445             sub get_input_stream_name {
446 0     0 0 0 my $input_stream_name = EMPTY_STRING;
447 0 0       0 if ($logger_object) {
448 0         0 $input_stream_name = $logger_object->get_input_stream_name();
449             }
450 0         0 return $input_stream_name;
451             } ## end sub get_input_stream_name
452              
453             sub warning {
454 0     0 0 0 my ($msg) = @_;
455 0 0       0 if ($logger_object) {
456 0         0 $logger_object->warning($msg);
457             }
458 0         0 return;
459             } ## end sub warning
460              
461             sub write_logfile_entry {
462 91     91 0 191 my ($msg) = @_;
463 91 50       214 if ($logger_object) {
464 91         219 $logger_object->write_logfile_entry($msg);
465             }
466 91         199 return;
467             } ## end sub write_logfile_entry
468             }
469              
470             sub get_cached_line_count {
471 1     1 0 3 my $self = shift;
472 1 50       6 return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
473             }
474              
475             sub get_recoverable_spaces {
476              
477             # return the number of spaces (+ means shift right, - means shift left)
478             # that we would like to shift a group of lines with the same indentation
479             # to get them to line up with their opening parens
480 4121     4121 0 7074 my $indentation = shift;
481 4121 100       14550 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
482             } ## end sub get_recoverable_spaces
483              
484             ######################################################
485             # CODE SECTION 3: Code to accept input and form groups
486             ######################################################
487              
488 39     39   390 use constant DEBUG_VALIGN => 0;
  39         131  
  39         2551  
489 39     39   287 use constant SC_LONG_LINE_DIFF => 12;
  39         116  
  39         3650  
490              
491             my %is_closing_token;
492              
493             BEGIN {
494 39     39   227 my @q = qw< } ) ] >;
495 39         65470 @is_closing_token{@q} = (1) x scalar(@q);
496             }
497              
498             #--------------------------------------------
499             # VTFLAGS: Vertical tightness types and flags
500             #--------------------------------------------
501             # Vertical tightness is controlled by a 'type' and associated 'flags' for each
502             # line. These values are set by sub Formatter::set_vertical_tightness_flags.
503             # These are defined as follows:
504              
505             # Vertical Tightness Line Type Codes:
506             # Type 0, no vertical tightness condition
507             # Type 1, last token of this line is a non-block opening token
508             # Type 2, first token of next line is a non-block closing
509             # Type 3, isolated opening block brace
510             # type 4, isolated closing block brace
511              
512             # Opening token flag values are the vertical tightness flags
513             # 0 do not join with next line
514             # 1 just one join per line
515             # 2 any number of joins
516              
517             # Closing token flag values indicate spacing:
518             # 0 = no space added before closing token
519             # 1 = single space added before closing token
520              
521             sub valign_input {
522              
523             #---------------------------------------------------------------------
524             # This is the front door of the vertical aligner. On each call
525             # we receive one line of specially marked text for vertical alignment.
526             # We compare the line with the current group, and either:
527             # - the line joins the current group if alignments match, or
528             # - the current group is flushed and a new group is started otherwise
529             #---------------------------------------------------------------------
530             #
531             # The key input parameters describing each line are:
532             # $level = indentation level of this line
533             # $rfields = ref to array of fields
534             # $rpatterns = ref to array of patterns, one per field
535             # $rtokens = ref to array of tokens starting fields 1,2,..
536             # $rfield_lengths = ref to array of field display widths
537             #
538             # Here is an example of what this package does. In this example,
539             # we are trying to line up both the '=>' and the '#'.
540             #
541             # '18' => 'grave', # \`
542             # '19' => 'acute', # `'
543             # '20' => 'caron', # \v
544             # <-tabs-><f1-><--field 2 ---><-f3->
545             # | | | |
546             # | | | |
547             # col1 col2 col3 col4
548             #
549             # The calling routine has already broken the entire line into 3 fields as
550             # indicated. (So the work of identifying promising common tokens has
551             # already been done).
552             #
553             # In this example, there will be 2 tokens being matched: '=>' and '#'.
554             # They are the leading parts of fields 2 and 3, but we do need to know
555             # what they are so that we can dump a group of lines when these tokens
556             # change.
557             #
558             # The fields contain the actual characters of each field. The patterns
559             # are like the fields, but they contain mainly token types instead
560             # of tokens, so they have fewer characters. They are used to be
561             # sure we are matching fields of similar type.
562             #
563             # In this example, there will be 4 column indexes being adjusted. The
564             # first one is always at zero. The interior columns are at the start of
565             # the matching tokens, and the last one tracks the maximum line length.
566             #
567             # Each time a new line comes in, it joins the current vertical
568             # group if possible. Otherwise it causes the current group to be flushed
569             # and a new group is started.
570             #
571             # For each new group member, the column locations are increased, as
572             # necessary, to make room for the new fields. When the group is finally
573             # output, these column numbers are used to compute the amount of spaces of
574             # padding needed for each field.
575             #
576             # Programming note: the fields are assumed not to have any tab characters.
577             # Tabs have been previously removed except for tabs in quoted strings and
578             # side comments. Tabs in these fields can mess up the column counting.
579             # The log file warns the user if there are any such tabs.
580              
581 7384     7384 0 16255 my ( $self, $rcall_hash ) = @_;
582              
583             # Unpack the call args. This form is significantly faster than getting them
584             # one-by-one.
585             my (
586              
587             $Kend,
588             $break_alignment_after,
589             $break_alignment_before,
590             $ci_level,
591             $forget_side_comment,
592             $indentation,
593             $is_terminal_ternary,
594             $level,
595             $level_end,
596             $list_seqno,
597             $maximum_line_length,
598             $outdent_long_lines,
599             $rline_alignment,
600             $rvertical_tightness_flags,
601              
602             ) =
603              
604 7384         28727 @{$rcall_hash}{
605 7384         14361 qw(
606             Kend
607             break_alignment_after
608             break_alignment_before
609             ci_level
610             forget_side_comment
611             indentation
612             is_terminal_ternary
613             level
614             level_end
615             list_seqno
616             maximum_line_length
617             outdent_long_lines
618             rline_alignment
619             rvertical_tightness_flags
620             )
621             };
622              
623             my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
624 7384         12114 @{$rline_alignment};
  7384         14847  
625              
626             # The index '$Kend' is a value which passed along with the line text to sub
627             # 'write_code_line' for a convergence check.
628              
629             # number of fields is $jmax
630             # number of tokens between fields is $jmax-1
631 7384         10699 my $jmax = @{$rfields} - 1;
  7384         12536  
632              
633 7384 100       16486 my $leading_space_count =
634             ref($indentation) ? $indentation->get_spaces() : $indentation;
635              
636             # set outdented flag to be sure we either align within statements or
637             # across statement boundaries, but not both.
638 7384         14118 my $is_outdented =
639             $self->[_last_leading_space_count_] > $leading_space_count;
640 7384         12316 $self->[_last_leading_space_count_] = $leading_space_count;
641              
642             # Identify a hanging side comment. Hanging side comments have an empty
643             # initial field.
644 7384   100     24767 my $is_hanging_side_comment =
645             ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
646              
647             # Undo outdented flag for a hanging side comment
648 7384 100       14888 $is_outdented = 0 if $is_hanging_side_comment;
649              
650             # Identify a block comment.
651 7384   100     23857 my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
652              
653             # Block comment .. update count
654 7384 100       13863 if ($is_block_comment) {
655 632         1274 $self->[_consecutive_block_comments_]++;
656             }
657              
658             # Not a block comment ..
659             # Forget side comment column if we saw 2 or more block comments,
660             # and reset the count
661             else {
662              
663 6752 100       15338 if ( $self->[_consecutive_block_comments_] > 1 ) {
664 67         453 $self->forget_side_comment();
665             }
666 6752         11033 $self->[_consecutive_block_comments_] = 0;
667             }
668              
669             # Reset side comment location if we are entering a new block from level 0.
670             # This is intended to keep them from drifting too far to the right.
671 7384 100       14290 if ($forget_side_comment) {
672 44         230 $self->forget_side_comment();
673             }
674              
675 7384         12136 my $is_balanced_line = $level_end == $level;
676              
677 7384         12017 my $group_level = $self->[_group_level_];
678 7384         11989 my $group_maximum_line_length = $self->[_group_maximum_line_length_];
679              
680 7384         10328 DEBUG_VALIGN && do {
681             my $nlines = $self->group_line_count();
682             print {*STDOUT}
683             "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level=$level, group_level=$group_level, level_end=$level_end\n";
684             };
685              
686             # Validate cached line if necessary: If we can produce a container
687             # with just 2 lines total by combining an existing cached opening
688             # token with the closing token to follow, then we will mark both
689             # cached flags as valid.
690 7384         18244 my $cached_line_type = get_cached_line_type();
691 7384 100       16120 if ($cached_line_type) {
692 224         831 my $cached_line_opening_flag = get_cached_line_opening_flag();
693 224 50       569 if ($rvertical_tightness_flags) {
694 224         562 my $cached_seqno = get_cached_seqno();
695 224 100 100     1096 if ( $cached_seqno
      100        
696             && $rvertical_tightness_flags->{_vt_seqno}
697             && $rvertical_tightness_flags->{_vt_seqno} == $cached_seqno )
698             {
699              
700             # Fix for b1187 and b1188: Normally this step is only done
701             # if the number of existing lines is 0 or 1. But to prevent
702             # blinking, this range can be controlled by the caller.
703             # If zero values are given we fall back on the range 0 to 1.
704 4         49 my $line_count = $self->group_line_count();
705 4         12 my $min_lines = $rvertical_tightness_flags->{_vt_min_lines};
706 4         10 my $max_lines = $rvertical_tightness_flags->{_vt_max_lines};
707 4 50       28 $min_lines = 0 if ( !$min_lines );
708 4 50       19 $max_lines = 1 if ( !$max_lines );
709 4 100 66     30 if ( ( $line_count >= $min_lines )
710             && ( $line_count <= $max_lines ) )
711             {
712 3   50     22 $rvertical_tightness_flags->{_vt_valid_flag} ||= 1;
713 3         12 set_cached_line_valid(1);
714             }
715             }
716             }
717              
718             # do not join an opening block brace (type 3, see VTFLAGS)
719             # with an unbalanced line unless requested with a flag value of 2
720 224 50 100     699 if ( $cached_line_type == 3
      66        
      66        
721             && !$self->group_line_count()
722             && $cached_line_opening_flag < 2
723             && !$is_balanced_line )
724             {
725 0         0 set_cached_line_valid(0);
726             }
727             }
728              
729             # shouldn't happen:
730 7384 50       15974 if ( $level < 0 ) { $level = 0 }
  0         0  
731              
732             # do not align code across indentation level changes
733             # or changes in the maximum line length
734             # or if vertical alignment is turned off
735 7384 100 66     57979 if (
      66        
      66        
      100        
      100        
      100        
      100        
736             $level != $group_level
737             || ( $group_maximum_line_length
738             && $maximum_line_length != $group_maximum_line_length )
739             || $is_outdented
740             || ( $is_block_comment && !$self->[_rOpts_valign_block_comments_] )
741             || ( !$is_block_comment
742             && !$self->[_rOpts_valign_side_comments_]
743             && !$self->[_rOpts_valign_code_] )
744             )
745             {
746              
747 2843         11347 $self->_flush_group_lines( $level - $group_level );
748              
749 2843         5191 $group_level = $level;
750 2843         5017 $self->[_group_level_] = $group_level;
751 2843         4527 $self->[_group_maximum_line_length_] = $maximum_line_length;
752              
753             # Update leading spaces after the above flush because the leading space
754             # count may have been changed if the -icp flag is in effect
755 2843 100       6312 $leading_space_count =
756             ref($indentation) ? $indentation->get_spaces() : $indentation;
757             }
758              
759             # --------------------------------------------------------------------
760             # Collect outdentable block COMMENTS
761             # --------------------------------------------------------------------
762 7384 100       17945 if ( $self->[_group_type_] eq 'COMMENT' ) {
763 558 100 66     3182 if ( $is_block_comment
      66        
764             && $outdent_long_lines
765             && $leading_space_count == $self->[_comment_leading_space_count_] )
766             {
767              
768             # Note that for a comment group we are not storing a line
769             # but rather just the text and its length.
770 77         198 push @{ $self->[_rgroup_lines_] },
  77         348  
771             [ $rfields->[0], $rfield_lengths->[0], $Kend ];
772 77         275 return;
773             }
774             else {
775 481         2056 $self->_flush_group_lines();
776             }
777             }
778              
779 7307         11734 my $rgroup_lines = $self->[_rgroup_lines_];
780 7307 100 100     16237 if ( $break_alignment_before && @{$rgroup_lines} ) {
  111         481  
781 27         84 $rgroup_lines->[-1]->{'end_group'} = 1;
782             }
783              
784             # --------------------------------------------------------------------
785             # add dummy fields for terminal ternary
786             # --------------------------------------------------------------------
787 7307         10543 my $j_terminal_match;
788              
789 7307 100 100     16208 if ( $is_terminal_ternary && @{$rgroup_lines} ) {
  16         76  
790 13         83 $j_terminal_match =
791             fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
792             $rpatterns, $rfield_lengths, $group_level, );
793 13         36 $jmax = @{$rfields} - 1;
  13         33  
794             }
795              
796             # --------------------------------------------------------------------
797             # add dummy fields for else statement
798             # --------------------------------------------------------------------
799              
800             # Note the trailing space after 'else' here. If there were no space between
801             # the else and the next '{' then we would not be able to do vertical
802             # alignment of the '{'.
803 7307 100 100     18244 if ( $rfields->[0] eq 'else '
      66        
804 12         121 && @{$rgroup_lines}
805             && $is_balanced_line )
806             {
807              
808 9         203 $j_terminal_match =
809             fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
810             $rpatterns, $rfield_lengths );
811 9         26 $jmax = @{$rfields} - 1;
  9         26  
812             }
813              
814             # --------------------------------------------------------------------
815             # Handle simple line of code with no fields to match.
816             # --------------------------------------------------------------------
817 7307 100       14918 if ( $jmax <= 0 ) {
818 4285         7493 $self->[_zero_count_]++;
819              
820 4285 100 100     6708 if ( @{$rgroup_lines}
  4285         12699  
821             && !get_recoverable_spaces( $rgroup_lines->[0]->{'indentation'} ) )
822             {
823              
824             # flush the current group if it has some aligned columns..
825             # or we haven't seen a comment lately
826 338 100 100     1718 if ( $rgroup_lines->[0]->{'jmax'} > 1
827             || $self->[_zero_count_] > 3 )
828             {
829 309         1221 $self->_flush_group_lines();
830              
831             # Update '$rgroup_lines' - it will become a ref to empty array.
832             # This allows avoiding a call to get_group_line_count below.
833 309         958 $rgroup_lines = $self->[_rgroup_lines_];
834             }
835             }
836              
837             # start new COMMENT group if this comment may be outdented
838 4285 100 100     12291 if ( $is_block_comment
      66        
839             && $outdent_long_lines
840 531         1861 && !@{$rgroup_lines} )
841             {
842 531         1363 $self->[_group_type_] = 'COMMENT';
843 531         1030 $self->[_comment_leading_space_count_] = $leading_space_count;
844 531         1011 $self->[_group_maximum_line_length_] = $maximum_line_length;
845 531         950 push @{$rgroup_lines},
  531         1957  
846             [ $rfields->[0], $rfield_lengths->[0], $Kend ];
847 531         1902 return;
848             }
849              
850             # just write this line directly if no current group, no side comment,
851             # and no space recovery is needed.
852 3754 100 100     5831 if ( !@{$rgroup_lines}
  3754         12698  
853             && !get_recoverable_spaces($indentation) )
854             {
855              
856 3710         33530 $self->valign_output_step_B(
857             {
858             leading_space_count => $leading_space_count,
859             line => $rfields->[0],
860             line_length => $rfield_lengths->[0],
861             side_comment_length => 0,
862             outdent_long_lines => $outdent_long_lines,
863             rvertical_tightness_flags => $rvertical_tightness_flags,
864             level => $level,
865             level_end => $level_end,
866             Kend => $Kend,
867             maximum_line_length => $maximum_line_length,
868             }
869             );
870 3710         16408 return;
871             }
872             }
873             else {
874 3022         6194 $self->[_zero_count_] = 0;
875             }
876              
877             # --------------------------------------------------------------------
878             # It simplifies things to create a zero length side comment
879             # if none exists.
880             # --------------------------------------------------------------------
881 3066 100 100     14386 if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
882 2741         4635 $jmax += 1;
883 2741         5971 $rtokens->[ $jmax - 1 ] = '#';
884 2741         5555 $rfields->[$jmax] = EMPTY_STRING;
885 2741         4923 $rfield_lengths->[$jmax] = 0;
886 2741         5419 $rpatterns->[$jmax] = '#';
887             }
888              
889             # --------------------------------------------------------------------
890             # create an object to hold this line
891             # --------------------------------------------------------------------
892              
893             # The hash keys below must match the list of keys in %valid_LINE_keys.
894             # Values in this hash are accessed directly, except for 'ralignments',
895             # rather than with get/set calls for efficiency.
896 3066         52541 my $new_line = Perl::Tidy::VerticalAligner::Line->new(
897             {
898             jmax => $jmax,
899             rtokens => $rtokens,
900             rfields => $rfields,
901             rpatterns => $rpatterns,
902             rfield_lengths => $rfield_lengths,
903             indentation => $indentation,
904             leading_space_count => $leading_space_count,
905             outdent_long_lines => $outdent_long_lines,
906             list_seqno => $list_seqno,
907             list_type => EMPTY_STRING,
908             is_hanging_side_comment => $is_hanging_side_comment,
909             rvertical_tightness_flags => $rvertical_tightness_flags,
910             is_terminal_ternary => $is_terminal_ternary,
911             j_terminal_match => $j_terminal_match,
912             end_group => $break_alignment_after,
913             Kend => $Kend,
914             ci_level => $ci_level,
915             level => $level,
916             level_end => $level_end,
917             imax_pair => -1,
918             maximum_line_length => $maximum_line_length,
919              
920             ralignments => [],
921             }
922             );
923              
924 3066         5548 DEVEL_MODE
925             && check_keys( $new_line, \%valid_LINE_keys,
926             "Checking line keys at line definition", 1 );
927              
928             # --------------------------------------------------------------------
929             # Decide if this is a simple list of items.
930             # We use this to be less restrictive in deciding what to align.
931             # --------------------------------------------------------------------
932 3066 100       9016 decide_if_list($new_line) if ($list_seqno);
933              
934             # --------------------------------------------------------------------
935             # Append this line to the current group (or start new group)
936             # --------------------------------------------------------------------
937              
938 3066         4810 push @{ $self->[_rgroup_lines_] }, $new_line;
  3066         7700  
939 3066         5905 $self->[_group_maximum_line_length_] = $maximum_line_length;
940              
941             # output this group if it ends in a terminal else or ternary line
942 3066 100 100     17839 if ( defined($j_terminal_match) ) {
    100          
943 20         116 $self->_flush_group_lines();
944             }
945              
946             # Force break after jump to lower level
947             elsif ($level_end < $level
948             || $is_closing_token{ substr( $rfields->[0], 0, 1 ) } )
949             {
950 119         501 $self->_flush_group_lines(-1);
951             }
952              
953             else {
954             ##ok: no output needed
955             }
956              
957             # --------------------------------------------------------------------
958             # Some old debugging stuff
959             # --------------------------------------------------------------------
960 3066         4856 DEBUG_VALIGN && do {
961             print {*STDOUT} "exiting valign_input fields:";
962             dump_array( @{$rfields} );
963             print {*STDOUT} "exiting valign_input tokens:";
964             dump_array( @{$rtokens} );
965             print {*STDOUT} "exiting valign_input patterns:";
966             dump_array( @{$rpatterns} );
967             };
968              
969 3066         9379 return;
970             } ## end sub valign_input
971              
972             sub join_hanging_comment {
973              
974             # Add dummy fields to a hanging side comment to make it look
975             # like the first line in its potential group. This simplifies
976             # the coding.
977 38     38 0 104 my ( $new_line, $old_line ) = @_;
978              
979 38         83 my $jmax = $new_line->{'jmax'};
980              
981             # must be 2 fields
982 38 50       130 return 0 unless $jmax == 1;
983 38         83 my $rtokens = $new_line->{'rtokens'};
984              
985             # the second field must be a comment
986 38 50       116 return 0 unless $rtokens->[0] eq '#';
987 38         90 my $rfields = $new_line->{'rfields'};
988              
989             # the first field must be empty
990 38 50       222 return 0 if ( $rfields->[0] !~ /^\s*$/ );
991              
992             # the current line must have fewer fields
993 38         167 my $maximum_field_index = $old_line->{'jmax'};
994 38 100       130 return 0
995             if ( $maximum_field_index <= $jmax );
996              
997             # looks ok..
998 3         6 my $rpatterns = $new_line->{'rpatterns'};
999 3         9 my $rfield_lengths = $new_line->{'rfield_lengths'};
1000              
1001 3         9 $new_line->{'is_hanging_side_comment'} = 1;
1002              
1003 3         5 $jmax = $maximum_field_index;
1004 3         8 $new_line->{'jmax'} = $jmax;
1005 3         9 $rfields->[$jmax] = $rfields->[1];
1006 3         6 $rfield_lengths->[$jmax] = $rfield_lengths->[1];
1007 3         13 $rtokens->[ $jmax - 1 ] = $rtokens->[0];
1008 3         10 $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
1009              
1010 3         11 foreach my $j ( 1 .. $jmax - 1 ) {
1011 3         8 $rfields->[$j] = EMPTY_STRING;
1012 3         6 $rfield_lengths->[$j] = 0;
1013 3         9 $rtokens->[ $j - 1 ] = EMPTY_STRING;
1014 3         9 $rpatterns->[ $j - 1 ] = EMPTY_STRING;
1015             }
1016 3         8 return 1;
1017             } ## end sub join_hanging_comment
1018              
1019             { ## closure for sub decide_if_list
1020              
1021             my %is_comma_token;
1022              
1023             BEGIN {
1024              
1025 39     39   259 my @q = qw( => );
1026 39         124 push @q, ',';
1027 39         10129 @is_comma_token{@q} = (1) x scalar(@q);
1028             } ## end BEGIN
1029              
1030             sub decide_if_list {
1031              
1032 1032     1032 0 2038 my $line = shift;
1033              
1034             # A list will be taken to be a line with a forced break in which all
1035             # of the field separators are commas or comma-arrows (except for the
1036             # trailing #)
1037              
1038 1032         2212 my $rtokens = $line->{'rtokens'};
1039 1032         2050 my $test_token = $rtokens->[0];
1040 1032         2838 my ( $raw_tok, $lev, $tag, $tok_count ) =
1041             decode_alignment_token($test_token);
1042 1032 100       3282 if ( $is_comma_token{$raw_tok} ) {
1043 930         1602 my $list_type = $test_token;
1044 930         1674 my $jmax = $line->{'jmax'};
1045              
1046 930         2814 foreach ( 1 .. $jmax - 2 ) {
1047 871         1755 ( $raw_tok, $lev, $tag, $tok_count ) =
1048             decode_alignment_token( $rtokens->[$_] );
1049 871 100       2367 if ( !$is_comma_token{$raw_tok} ) {
1050 26         85 $list_type = EMPTY_STRING;
1051 26         115 last;
1052             }
1053             }
1054 930         2150 $line->{'list_type'} = $list_type;
1055             }
1056 1032         1966 return;
1057             } ## end sub decide_if_list
1058             }
1059              
1060             sub fix_terminal_ternary {
1061              
1062             # Add empty fields as necessary to align a ternary term
1063             # like this:
1064             #
1065             # my $leapyear =
1066             # $year % 4 ? 0
1067             # : $year % 100 ? 1
1068             # : $year % 400 ? 0
1069             # : 1;
1070             #
1071             # returns the index of the terminal question token, if any
1072              
1073 13     13 0 56 my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
1074             $group_level )
1075             = @_;
1076              
1077 13 50       54 return if ( !$old_line );
1078 39     39   327 use constant EXPLAIN_TERNARY => 0;
  39         115  
  39         56523  
1079              
1080 13 50       68 if (%valign_control_hash) {
1081 0         0 my $align_ok = $valign_control_hash{'?'};
1082 0 0       0 $align_ok = $valign_control_default unless defined($align_ok);
1083 0 0       0 return if ( !$align_ok );
1084             }
1085              
1086 13         30 my $jmax = @{$rfields} - 1;
  13         44  
1087 13         46 my $rfields_old = $old_line->{'rfields'};
1088              
1089 13         39 my $rpatterns_old = $old_line->{'rpatterns'};
1090 13         32 my $rtokens_old = $old_line->{'rtokens'};
1091 13         35 my $maximum_field_index = $old_line->{'jmax'};
1092              
1093             # look for the question mark after the :
1094 13         29 my ($jquestion);
1095             my $depth_question;
1096 13         33 my $pad = EMPTY_STRING;
1097 13         32 my $pad_length = 0;
1098 13         58 foreach my $j ( 0 .. $maximum_field_index - 1 ) {
1099 14         44 my $tok = $rtokens_old->[$j];
1100 14         86 my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
1101 14 100       73 if ( $raw_tok eq '?' ) {
1102 13         36 $depth_question = $lev;
1103              
1104             # depth must be correct
1105 13 50       58 next if ( $depth_question ne $group_level );
1106              
1107 13         32 $jquestion = $j;
1108 13 50       106 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
1109 13         40 $pad_length = length($1);
1110 13         50 $pad = SPACE x $pad_length;
1111             }
1112             else {
1113 0         0 return; # shouldn't happen
1114             }
1115 13         38 last;
1116             }
1117             }
1118 13 50       65 return if ( !defined($jquestion) ); # shouldn't happen
1119              
1120             # Now splice the tokens and patterns of the previous line
1121             # into the else line to insure a match. Add empty fields
1122             # as necessary.
1123 13         31 my $jadd = $jquestion;
1124              
1125             # Work on copies of the actual arrays in case we have
1126             # to return due to an error
1127 13         41 my @fields = @{$rfields};
  13         46  
1128 13         33 my @patterns = @{$rpatterns};
  13         38  
1129 13         27 my @tokens = @{$rtokens};
  13         46  
1130 13         53 my @field_lengths = @{$rfield_lengths};
  13         40  
1131              
1132 13         30 EXPLAIN_TERNARY && do {
1133             local $LIST_SEPARATOR = '><';
1134             print {*STDOUT} "CURRENT FIELDS=<@{$rfields_old}>\n";
1135             print {*STDOUT} "CURRENT TOKENS=<@{$rtokens_old}>\n";
1136             print {*STDOUT} "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
1137             print {*STDOUT} "UNMODIFIED FIELDS=<@{$rfields}>\n";
1138             print {*STDOUT} "UNMODIFIED TOKENS=<@{$rtokens}>\n";
1139             print {*STDOUT} "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
1140             };
1141              
1142             # handle cases of leading colon on this line
1143 13 50       88 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
1144              
1145 13         66 my ( $colon, $therest ) = ( $1, $2 );
1146              
1147             # Handle sub-case of first field with leading colon plus additional code
1148             # This is the usual situation as at the '1' below:
1149             # ...
1150             # : $year % 400 ? 0
1151             # : 1;
1152 13 50       46 if ($therest) {
1153              
1154             # Split the first field after the leading colon and insert padding.
1155             # Note that this padding will remain even if the terminal value goes
1156             # out on a separate line. This does not seem to look to bad, so no
1157             # mechanism has been included to undo it.
1158 13         47 my $field1 = shift @fields;
1159 13         32 my $field_length1 = shift @field_lengths;
1160 13         41 my $len_colon = length($colon);
1161 13         64 unshift @fields, ( $colon, $pad . $therest );
1162 13         38 unshift @field_lengths,
1163             ( $len_colon, $pad_length + $field_length1 - $len_colon );
1164              
1165             # change the leading pattern from : to ?
1166 13 50       140 return if ( $patterns[0] !~ s/^\:/?/ );
1167              
1168             # install leading tokens and patterns of existing line
1169 13         96 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
  13         53  
1170 13         40 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  13         42  
1171              
1172             # insert appropriate number of empty fields
1173 13 100       55 splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1174 13 100       1045 splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
1175             }
1176              
1177             # handle sub-case of first field just equal to leading colon.
1178             # This can happen for example in the example below where
1179             # the leading '(' would create a new alignment token
1180             # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
1181             # : ( $mname = $name . '->' );
1182             else {
1183              
1184 0 0 0     0 return if ( $jmax <= 0 || $tokens[0] eq '#' ); # shouldn't happen
1185              
1186             # prepend a leading ? onto the second pattern
1187 0         0 $patterns[1] = "?b" . $patterns[1];
1188              
1189             # pad the second field
1190 0         0 $fields[1] = $pad . $fields[1];
1191 0         0 $field_lengths[1] = $pad_length + $field_lengths[1];
1192              
1193             # install leading tokens and patterns of existing line, replacing
1194             # leading token and inserting appropriate number of empty fields
1195 0         0 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
  0         0  
1196 0         0 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
  0         0  
1197 0 0       0 splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1198 0 0       0 splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
1199             }
1200             }
1201              
1202             # Handle case of no leading colon on this line. This will
1203             # be the case when -wba=':' is used. For example,
1204             # $year % 400 ? 0 :
1205             # 1;
1206             else {
1207              
1208             # install leading tokens and patterns of existing line
1209 0         0 $patterns[0] = '?' . 'b' . $patterns[0];
1210 0         0 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
  0         0  
1211 0         0 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  0         0  
1212              
1213             # insert appropriate number of empty fields
1214 0         0 $jadd = $jquestion + 1;
1215 0         0 $fields[0] = $pad . $fields[0];
1216 0         0 $field_lengths[0] = $pad_length + $field_lengths[0];
1217 0 0       0 splice( @fields, 0, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1218 0 0       0 splice( @field_lengths, 0, 0, (0) x $jadd ) if $jadd;
1219             }
1220              
1221 13         34 EXPLAIN_TERNARY && do {
1222             local $LIST_SEPARATOR = '><';
1223             print {*STDOUT} "MODIFIED TOKENS=<@tokens>\n";
1224             print {*STDOUT} "MODIFIED PATTERNS=<@patterns>\n";
1225             print {*STDOUT} "MODIFIED FIELDS=<@fields>\n";
1226             };
1227              
1228             # all ok .. update the arrays
1229 13         36 @{$rfields} = @fields;
  13         55  
1230 13         42 @{$rtokens} = @tokens;
  13         44  
1231 13         28 @{$rpatterns} = @patterns;
  13         69  
1232 13         39 @{$rfield_lengths} = @field_lengths;
  13         47  
1233              
1234             # force a flush after this line
1235 13         52 return $jquestion;
1236             } ## end sub fix_terminal_ternary
1237              
1238             sub fix_terminal_else {
1239              
1240             # Add empty fields as necessary to align a balanced terminal
1241             # else block to a previous if/elsif/unless block,
1242             # like this:
1243             #
1244             # if ( 1 || $x ) { print "ok 13\n"; }
1245             # else { print "not ok 13\n"; }
1246             #
1247             # returns a positive value if the else block should be indented
1248             #
1249 9     9 0 48 my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
1250              
1251 9 50       48 return if ( !$old_line );
1252 9         18 my $jmax = @{$rfields} - 1;
  9         47  
1253 9 50       40 return if ( $jmax <= 0 );
1254              
1255 9 50       35 if (%valign_control_hash) {
1256 0         0 my $align_ok = $valign_control_hash{'{'};
1257 0 0       0 $align_ok = $valign_control_default unless defined($align_ok);
1258 0 0       0 return if ( !$align_ok );
1259             }
1260              
1261             # check for balanced else block following if/elsif/unless
1262 9         27 my $rfields_old = $old_line->{'rfields'};
1263              
1264             # TBD: add handling for 'case'
1265 9 100       144 return if ( $rfields_old->[0] !~ /^(?:if|elsif|unless)\s*$/ );
1266              
1267             # look for the opening brace after the else, and extract the depth
1268 7         26 my $tok_brace = $rtokens->[0];
1269 7         17 my $depth_brace;
1270 7 50       59 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
  7         33  
1271              
1272             # probably: "else # side_comment"
1273 0         0 else { return }
1274              
1275 7         22 my $rpatterns_old = $old_line->{'rpatterns'};
1276 7         30 my $rtokens_old = $old_line->{'rtokens'};
1277 7         24 my $maximum_field_index = $old_line->{'jmax'};
1278              
1279             # be sure the previous if/elsif is followed by an opening paren
1280 7         19 my $jparen = 0;
1281 7         23 my $tok_paren = '(' . $depth_brace;
1282 7         21 my $tok_test = $rtokens_old->[$jparen];
1283 7 50       28 return if ( $tok_test ne $tok_paren ); # shouldn't happen
1284              
1285             # Now find the opening block brace
1286 7         18 my ($jbrace);
1287 7         30 foreach my $j ( 1 .. $maximum_field_index - 1 ) {
1288 8         22 my $tok = $rtokens_old->[$j];
1289 8 100       31 if ( $tok eq $tok_brace ) {
1290 7         16 $jbrace = $j;
1291 7         19 last;
1292             }
1293             }
1294 7 50       28 return if ( !defined($jbrace) ); # shouldn't happen
1295              
1296             # Now splice the tokens and patterns of the previous line
1297             # into the else line to insure a match. Add empty fields
1298             # as necessary.
1299 7         19 my $jadd = $jbrace - $jparen;
1300 7         17 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
  7         37  
  7         39  
1301 7         22 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
  7         25  
  7         20  
1302 7         17 splice( @{$rfields}, 1, 0, (EMPTY_STRING) x $jadd );
  7         25  
1303 7         42 splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
  7         28  
1304              
1305             # force a flush after this line if it does not follow a case
1306 7 50       45 if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
  0         0  
1307 7         26 else { return $jbrace }
1308             } ## end sub fix_terminal_else
1309              
1310             my %is_closing_block_type;
1311              
1312             BEGIN {
1313 39     39   281 my @q = qw< } ] >;
1314 39         1243 @is_closing_block_type{@q} = (1) x scalar(@q);
1315             }
1316              
1317             # This is a flag for testing alignment by sub sweep_left_to_right only.
1318             # This test can help find problems with the alignment logic.
1319             # This flag should normally be zero.
1320 39     39   299 use constant TEST_SWEEP_ONLY => 0;
  39         173  
  39         2620  
1321              
1322 39     39   293 use constant EXPLAIN_CHECK_MATCH => 0;
  39         91  
  39         3620  
1323              
1324             sub check_match {
1325              
1326             # See if the current line matches the current vertical alignment group.
1327              
1328 1139     1139 0 2760 my ( $self, $new_line, $base_line, $prev_line, $group_line_count ) = @_;
1329              
1330             # Given:
1331             # $new_line = the line being considered for group inclusion
1332             # $base_line = the first line of the current group
1333             # $prev_line = the line just before $new_line
1334             # $group_line_count = number of lines in the current group
1335              
1336             # returns a flag and a value as follows:
1337             # return (0, $imax_align) if the line does not match
1338             # return (1, $imax_align) if the line matches but does not fit
1339             # return (2, $imax_align) if the line matches and fits
1340              
1341 39     39   328 use constant NO_MATCH => 0;
  39         104  
  39         2404  
1342 39     39   291 use constant MATCH_NO_FIT => 1;
  39         115  
  39         2866  
1343 39     39   311 use constant MATCH_AND_FIT => 2;
  39         139  
  39         69679  
1344              
1345 1139         1700 my $return_value;
1346              
1347             # Returns '$imax_align' which is the index of the maximum matching token.
1348             # It will be used in the subsequent left-to-right sweep to align as many
1349             # tokens as possible for lines which partially match.
1350 1139         1888 my $imax_align = -1;
1351              
1352             # variable $GoToMsg explains reason for no match, for debugging
1353 1139         2108 my $GoToMsg = EMPTY_STRING;
1354              
1355 1139         2099 my $jmax = $new_line->{'jmax'};
1356 1139         2935 my $maximum_field_index = $base_line->{'jmax'};
1357              
1358 1139         1958 my $jlimit = $jmax - 2;
1359 1139 100       2903 if ( $jmax > $maximum_field_index ) {
1360 82         236 $jlimit = $maximum_field_index - 2;
1361             }
1362              
1363 1139 100       2552 if ( $new_line->{'is_hanging_side_comment'} ) {
1364              
1365             # HSC's can join the group if they fit
1366             }
1367              
1368             # Everything else
1369             else {
1370              
1371             # A group with hanging side comments ends with the first non hanging
1372             # side comment.
1373 1101 50       2576 if ( $base_line->{'is_hanging_side_comment'} ) {
1374 0         0 $GoToMsg = "end of hanging side comments";
1375 0         0 $return_value = NO_MATCH;
1376             }
1377             else {
1378              
1379             # The number of tokens that this line shares with the previous
1380             # line has been stored with the previous line. This value was
1381             # calculated and stored by sub 'match_line_pair'.
1382 1101         1934 $imax_align = $prev_line->{'imax_pair'};
1383              
1384             # Only the following ci sequences are accepted (issue c225):
1385             # 0 0 0 ... OK
1386             # 0 1 1 ... OK but marginal*
1387             # 1 1 1 ... OK
1388             # This check is rarely activated, but for example we want
1389             # to avoid something like this 'tail wag dog' situation:
1390             # $tag =~ s/\b([a-z]+)/\L\u$1/gio;
1391             # $tag =~ s/\b([b-df-hj-np-tv-z]+)\b/\U$1/gio
1392             # if $tag =~ /-/;
1393             # *Note: we could set a flag for the 0 1 marginal case and
1394             # use it to prevent alignment of selected token types.
1395 1101         1922 my $ci_prev = $prev_line->{'ci_level'};
1396 1101         1810 my $ci_new = $new_line->{'ci_level'};
1397 1101 50 100     4868 if ( $ci_prev != $ci_new
    100 33        
      66        
1398             && $imax_align >= 0
1399             && ( $ci_new == 0 || $group_line_count > 1 ) )
1400             {
1401 0         0 $imax_align = -1;
1402 0         0 $GoToMsg =
1403             "Rejected ci: ci_prev=$ci_prev ci_new=$ci_new num=$group_line_count\n";
1404 0         0 $return_value = NO_MATCH;
1405             }
1406             elsif ( $imax_align != $jlimit ) {
1407 27         191 $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
1408 27         76 $return_value = NO_MATCH;
1409             }
1410             else {
1411             ##ok: continue
1412             }
1413             }
1414             }
1415              
1416 1139 100       2645 if ( !defined($return_value) ) {
1417              
1418             # The tokens match, but the lines must have identical number of
1419             # tokens to join the group.
1420 1112 100 100     3997 if ( $maximum_field_index != $jmax ) {
    100          
1421 118         368 $GoToMsg = "token count differs";
1422 118         289 $return_value = NO_MATCH;
1423             }
1424              
1425             # The tokens match. Now See if there is space for this line in the
1426             # current group.
1427             elsif ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY )
1428             {
1429              
1430 981         3024 $GoToMsg = "match and fit, imax_align=$imax_align, jmax=$jmax\n";
1431 981         1652 $return_value = MATCH_AND_FIT;
1432 981         1827 $imax_align = $jlimit;
1433             }
1434             else {
1435 13         53 $GoToMsg = "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
1436 13         30 $return_value = MATCH_NO_FIT;
1437 13         27 $imax_align = $jlimit;
1438             }
1439             }
1440              
1441             EXPLAIN_CHECK_MATCH
1442 1139         1830 && print
1443             "returning $return_value because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
1444              
1445 1139         3049 return ( $return_value, $imax_align );
1446             } ## end sub check_match
1447              
1448             sub check_fit {
1449              
1450 994     994 0 2149 my ( $self, $new_line, $old_line ) = @_;
1451              
1452             # The new line has alignments identical to the current group. Now we have
1453             # to fit the new line into the group without causing a field to exceed the
1454             # line length limit.
1455             # return true if successful
1456             # return false if not successful
1457              
1458 994         1834 my $jmax = $new_line->{'jmax'};
1459 994         1738 my $leading_space_count = $new_line->{'leading_space_count'};
1460 994         1695 my $rfield_lengths = $new_line->{'rfield_lengths'};
1461 994         3476 my $padding_available = $old_line->get_available_space_on_right();
1462 994         2023 my $jmax_old = $old_line->{'jmax'};
1463              
1464             # Safety check ... only lines with equal array sizes should arrive here
1465             # from sub check_match. So if this error occurs, look at recent changes in
1466             # sub check_match. It is only supposed to check the fit of lines with
1467             # identical numbers of alignment tokens.
1468 994 50       2921 if ( $jmax_old ne $jmax ) {
1469              
1470 0         0 warning(<<EOM);
1471             Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
1472             unexpected difference in array lengths: $jmax != $jmax_old
1473             EOM
1474 0         0 return;
1475             }
1476              
1477             # Save current columns in case this line does not fit.
1478 994         1680 my @alignments = @{ $old_line->{'ralignments'} };
  994         2466  
1479 994         2078 foreach my $alignment (@alignments) {
1480 3451         7458 $alignment->save_column();
1481             }
1482              
1483             # Loop over all alignments ...
1484 994         3246 for my $j ( 0 .. $jmax ) {
1485              
1486 3435         8288 my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
1487              
1488 3435 100       6962 if ( $j == 0 ) {
1489 994         1667 $pad += $leading_space_count;
1490             }
1491              
1492             # Keep going if this field does not need any space.
1493 3435 100       6660 next if ( $pad < 0 );
1494              
1495             # Revert to the starting state if does not fit
1496 2376 100       4666 if ( $pad > $padding_available ) {
1497              
1498             #----------------------------------------------
1499             # Line does not fit -- revert to starting state
1500             #----------------------------------------------
1501 13         34 foreach my $alignment (@alignments) {
1502 39         112 $alignment->restore_column();
1503             }
1504 13         57 return;
1505             }
1506              
1507             # make room for this field
1508 2363         6550 $old_line->increase_field_width( $j, $pad );
1509 2363         3973 $padding_available -= $pad;
1510             }
1511              
1512             #-------------------------------------
1513             # The line fits, the match is accepted
1514             #-------------------------------------
1515 981         5052 return 1;
1516              
1517             } ## end sub check_fit
1518              
1519             sub install_new_alignments {
1520              
1521 2085     2085 0 3978 my ($new_line) = @_;
1522              
1523 2085         4220 my $jmax = $new_line->{'jmax'};
1524 2085         3648 my $rfield_lengths = $new_line->{'rfield_lengths'};
1525 2085         3585 my $col = $new_line->{'leading_space_count'};
1526              
1527 2085         3497 my @alignments;
1528 2085         4945 for my $j ( 0 .. $jmax ) {
1529 6967         10741 $col += $rfield_lengths->[$j];
1530              
1531             # create initial alignments for the new group
1532 6967         23742 my $alignment =
1533             Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
1534 6967         14647 push @alignments, $alignment;
1535             }
1536 2085         5769 $new_line->{'ralignments'} = \@alignments;
1537 2085         4759 return;
1538             } ## end sub install_new_alignments
1539              
1540             sub copy_old_alignments {
1541 981     981 0 2415 my ( $new_line, $old_line ) = @_;
1542 981         1577 my @new_alignments = @{ $old_line->{'ralignments'} };
  981         2709  
1543 981         2349 $new_line->{'ralignments'} = \@new_alignments;
1544 981         2281 return;
1545             } ## end sub copy_old_alignments
1546              
1547             sub dump_array {
1548              
1549             # debug routine to dump array contents
1550 0     0 0 0 local $LIST_SEPARATOR = ')(';
1551 0         0 print {*STDOUT} "(@_)\n";
  0         0  
1552 0         0 return;
1553             } ## end sub dump_array
1554              
1555             sub level_change {
1556              
1557             # compute decrease in level when we remove $diff spaces from the
1558             # leading spaces
1559 10     10 0 25 my ( $self, $leading_space_count, $diff, $level ) = @_;
1560              
1561 10         18 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
1562 10 50       23 if ($rOpts_indent_columns) {
1563 10         36 my $olev =
1564             int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
1565 10         18 my $nlev = int( $leading_space_count / $rOpts_indent_columns );
1566 10         14 $level -= ( $olev - $nlev );
1567 10 50       28 if ( $level < 0 ) { $level = 0 }
  0         0  
1568             }
1569 10         23 return $level;
1570             } ## end sub level_change
1571              
1572             ###############################################
1573             # CODE SECTION 4: Code to process comment lines
1574             ###############################################
1575              
1576             sub _flush_comment_lines {
1577              
1578             # Output a group consisting of COMMENT lines
1579              
1580 531     531   1294 my ($self) = @_;
1581 531         1168 my $rgroup_lines = $self->[_rgroup_lines_];
1582 531 50       875 return if ( !@{$rgroup_lines} );
  531         1539  
1583 531         1099 my $group_level = $self->[_group_level_];
1584 531         1073 my $group_maximum_line_length = $self->[_group_maximum_line_length_];
1585 531         1083 my $leading_space_count = $self->[_comment_leading_space_count_];
1586              
1587             # look for excessively long lines
1588 531         1045 my $max_excess = 0;
1589 531         1151 foreach my $item ( @{$rgroup_lines} ) {
  531         1353  
1590 608         1020 my ( $str, $str_len ) = @{$item};
  608         1588  
1591 608         2608 my $excess =
1592             $str_len + $leading_space_count - $group_maximum_line_length;
1593 608 100       2144 if ( $excess > $max_excess ) {
1594 38         123 $max_excess = $excess;
1595             }
1596             }
1597              
1598             # zero leading space count if any lines are too long
1599 531 100       1707 if ( $max_excess > 0 ) {
1600 36         89 $leading_space_count -= $max_excess;
1601 36 50       124 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
  36         74  
1602 36         83 my $file_writer_object = $self->[_file_writer_object_];
1603 36         181 my $last_outdented_line_at =
1604             $file_writer_object->get_output_line_number();
1605 36         76 my $nlines = @{$rgroup_lines};
  36         112  
1606 36         110 $self->[_last_outdented_line_at_] =
1607             $last_outdented_line_at + $nlines - 1;
1608 36         77 my $outdented_line_count = $self->[_outdented_line_count_];
1609 36 100       118 if ( !$outdented_line_count ) {
1610 18         57 $self->[_first_outdented_line_at_] = $last_outdented_line_at;
1611             }
1612 36         73 $outdented_line_count += $nlines;
1613 36         81 $self->[_outdented_line_count_] = $outdented_line_count;
1614             }
1615              
1616             # write the lines
1617 531         1108 my $outdent_long_lines = 0;
1618              
1619 531         984 foreach my $item ( @{$rgroup_lines} ) {
  531         1256  
1620 608         1039 my ( $str, $str_len, $Kend ) = @{$item};
  608         1544  
1621 608         7043 $self->valign_output_step_B(
1622             {
1623             leading_space_count => $leading_space_count,
1624             line => $str,
1625             line_length => $str_len,
1626             side_comment_length => 0,
1627             outdent_long_lines => $outdent_long_lines,
1628             rvertical_tightness_flags => undef,
1629             level => $group_level,
1630             level_end => $group_level,
1631             Kend => $Kend,
1632             maximum_line_length => $group_maximum_line_length,
1633             }
1634             );
1635             }
1636              
1637 531         2644 $self->initialize_for_new_group();
1638 531         1072 return;
1639             } ## end sub _flush_comment_lines
1640              
1641             ######################################################
1642             # CODE SECTION 5: Code to process groups of code lines
1643             ######################################################
1644              
1645             sub _flush_group_lines {
1646              
1647             # This is the vertical aligner internal flush, which leaves the cache
1648             # intact
1649 5590     5590   11212 my ( $self, $level_jump ) = @_;
1650              
1651             # $level_jump = $next_level-$group_level, if known
1652             # = undef if not known
1653             # Note: only the sign of the jump is needed
1654              
1655 5590         9875 my $rgroup_lines = $self->[_rgroup_lines_];
1656 5590 100       8165 return if ( !@{$rgroup_lines} );
  5590         14558  
1657 2237         5034 my $group_type = $self->[_group_type_];
1658 2237         3931 my $group_level = $self->[_group_level_];
1659              
1660             # Debug
1661 2237         3514 0 && do {
1662             my ( $a, $b, $c ) = caller();
1663             my $nlines = @{$rgroup_lines};
1664             print {*STDOUT}
1665             "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
1666             };
1667              
1668             #-------------------------------------------
1669             # Section 1: Handle a group of COMMENT lines
1670             #-------------------------------------------
1671 2237 100       6154 if ( $group_type eq 'COMMENT' ) {
1672 531         2224 $self->_flush_comment_lines();
1673 531         1626 return;
1674             }
1675              
1676             #------------------------------------------------------------------------
1677             # Section 2: Handle line(s) of CODE. Most of the actual work of vertical
1678             # aligning happens here in the following steps:
1679             #------------------------------------------------------------------------
1680              
1681             # STEP 1: Remove most unmatched tokens. They block good alignments.
1682 1706         6035 my ( $max_lev_diff, $saw_side_comment ) =
1683             delete_unmatched_tokens( $rgroup_lines, $group_level );
1684              
1685             # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
1686             # matching common alignments. The indexes of these subgroups are in the
1687             # return variable.
1688 1706         7048 my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
1689              
1690             # STEP 3: Sweep left to right through the lines, looking for leading
1691             # alignment tokens shared by groups.
1692             sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
1693 1706 100       2921 if ( @{$rgroups} > 1 );
  1706         5886  
1694              
1695             # STEP 4: Move side comments to a common column if possible.
1696 1706 100       4441 if ($saw_side_comment) {
1697 199         1044 $self->align_side_comments( $rgroup_lines, $rgroups );
1698             }
1699              
1700             # STEP 5: For the -lp option, increase the indentation of lists
1701             # to the desired amount, but do not exceed the line length limit.
1702              
1703             # We are allowed to shift a group of lines to the right if:
1704             # (1) its level is greater than the level of the previous group, and
1705             # (2) its level is greater than the level of the next line to be written.
1706              
1707 1706         2810 my $extra_indent_ok;
1708 1706 100       4759 if ( $group_level > $self->[_last_level_written_] ) {
1709              
1710             # Use the level jump to next line to come, if given
1711 854 100       2591 if ( defined($level_jump) ) {
1712 571         1524 $extra_indent_ok = $level_jump < 0;
1713             }
1714              
1715             # Otherwise, assume the next line has the level of the end of last line.
1716             # This fixes case c008.
1717             else {
1718 283         827 my $level_end = $rgroup_lines->[-1]->{'level_end'};
1719 283         742 $extra_indent_ok = $group_level > $level_end;
1720             }
1721             }
1722              
1723 1706 100       5259 my $extra_leading_spaces =
1724             $extra_indent_ok
1725             ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
1726             : 0;
1727              
1728             # STEP 6: Output the lines.
1729             # All lines in this group have the same leading spacing and maximum line
1730             # length
1731 1706         3482 my $group_leader_length = $rgroup_lines->[0]->{'leading_space_count'};
1732 1706         3310 my $group_maximum_line_length = $rgroup_lines->[0]->{'maximum_line_length'};
1733              
1734 1706         2841 foreach my $line ( @{$rgroup_lines} ) {
  1706         3755  
1735 3066         19009 $self->valign_output_step_A(
1736             {
1737             line => $line,
1738             min_ci_gap => 0,
1739             do_not_align => 0,
1740             group_leader_length => $group_leader_length,
1741             extra_leading_spaces => $extra_leading_spaces,
1742             level => $group_level,
1743             maximum_line_length => $group_maximum_line_length,
1744             }
1745             );
1746             }
1747              
1748             # Let the formatter know that this object has been processed and any
1749             # recoverable spaces have been handled. This is needed for setting the
1750             # closing paren location in -lp mode.
1751 1706         4218 my $object = $rgroup_lines->[0]->{'indentation'};
1752 1706 100       4782 if ( ref($object) ) { $object->set_recoverable_spaces(0) }
  92         363  
1753              
1754 1706         6443 $self->initialize_for_new_group();
1755 1706         4089 return;
1756             } ## end sub _flush_group_lines
1757              
1758             { ## closure for sub sweep_top_down
1759              
1760             my $rall_lines; # all of the lines
1761             my $grp_level; # level of all lines
1762             my $rgroups; # describes the partition of lines we will make here
1763             my $group_line_count; # number of lines in current partition
1764              
1765 39     39   80894 BEGIN { $rgroups = [] }
1766              
1767             sub initialize_for_new_rgroup {
1768 3791     3791 0 6144 $group_line_count = 0;
1769 3791         5837 return;
1770             }
1771              
1772             sub add_to_rgroup {
1773              
1774 3066     3066 0 5910 my ($jend) = @_;
1775 3066         5571 my $rline = $rall_lines->[$jend];
1776              
1777 3066         4686 my $jbeg = $jend;
1778 3066 100       6861 if ( $group_line_count == 0 ) {
1779 2085         5630 install_new_alignments($rline);
1780             }
1781             else {
1782 981         1674 my $rvals = pop @{$rgroups};
  981         2126  
1783 981         1903 $jbeg = $rvals->[0];
1784 981         2732 copy_old_alignments( $rline, $rall_lines->[$jbeg] );
1785             }
1786 3066         4903 push @{$rgroups}, [ $jbeg, $jend, undef ];
  3066         7367  
1787 3066         4877 $group_line_count++;
1788 3066         5113 return;
1789             } ## end sub add_to_rgroup
1790              
1791             sub get_rgroup_jrange {
1792              
1793 1288 50   1288 0 2026 return if ( !@{$rgroups} );
  1288         3407  
1794 1288 50       3334 return if ( $group_line_count <= 0 );
1795 1288         2488 my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
  1288         3103  
1796 1288         2738 return ( $jbeg, $jend );
1797             } ## end sub get_rgroup_jrange
1798              
1799             sub end_rgroup {
1800              
1801 2104     2104 0 4425 my ($imax_align) = @_;
1802 2104 50       3013 return if ( !@{$rgroups} );
  2104         5203  
1803 2104 100       5156 return if ( $group_line_count <= 0 );
1804              
1805 2085         3189 my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
  2085         3011  
  2085         5166  
1806 2085         3992 push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
  2085         5232  
1807              
1808             # Undo some alignments of poor two-line combinations.
1809             # We had to wait until now to know the line count.
1810 2085 100       5968 if ( $jend - $jbeg == 1 ) {
1811 256         961 my $line_0 = $rall_lines->[$jbeg];
1812 256         715 my $line_1 = $rall_lines->[$jend];
1813              
1814 256         650 my $imax_pair = $line_1->{'imax_pair'};
1815 256 50       864 if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
  0         0  
1816              
1817             ## flag for possible future use:
1818             ## my $is_isolated_pair = $imax_pair < 0
1819             ## && ( $jbeg == 0
1820             ## || $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} < 0 );
1821              
1822             my $imax_prev =
1823 256 100       962 $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} : -1;
1824              
1825 256         1224 my ( $is_marginal, $imax_align_fix ) =
1826             is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
1827             $imax_prev );
1828 256 100       984 if ($is_marginal) {
1829 14         72 combine_fields( $line_0, $line_1, $imax_align_fix );
1830             }
1831             }
1832              
1833 2085         5462 initialize_for_new_rgroup();
1834 2085         3407 return;
1835             } ## end sub end_rgroup
1836              
1837             sub block_penultimate_match {
1838              
1839             # emergency reset to prevent sweep_left_to_right from trying to match a
1840             # failed terminal else match
1841 1 50   1 0 12 return if ( @{$rgroups} <= 1 );
  1         12  
1842 1         2 $rgroups->[-2]->[2] = -1;
1843 1         4 return;
1844             } ## end sub block_penultimate_match
1845              
1846             sub sweep_top_down {
1847 1706     1706 0 3963 my ( $self, $rlines, $group_level ) = @_;
1848              
1849             # Partition the set of lines into final alignment subgroups
1850             # and store the alignments with the lines.
1851              
1852             # The alignment subgroups we are making here are groups of consecutive
1853             # lines which have (1) identical alignment tokens and (2) do not
1854             # exceed the allowable maximum line length. A later sweep from
1855             # left-to-right ('sweep_lr') will handle additional alignments.
1856              
1857             # transfer args to closure variables
1858 1706         20234 $rall_lines = $rlines;
1859 1706         4230 $grp_level = $group_level;
1860 1706         5418 $rgroups = [];
1861 1706         5475 initialize_for_new_rgroup();
1862 1706 50       2695 return unless @{$rlines}; # shouldn't happen
  1706         4816  
1863              
1864             # Unset the _end_group flag for the last line if it it set because it
1865             # is not needed and can causes problems for -lp formatting
1866 1706         4173 $rall_lines->[-1]->{'end_group'} = 0;
1867              
1868             # Loop over all lines ...
1869 1706         3290 my $jline = -1;
1870 1706         2967 foreach my $new_line ( @{$rall_lines} ) {
  1706         4122  
1871 3066         4800 $jline++;
1872              
1873             # Start a new subgroup if necessary
1874 3066 100       7379 if ( !$group_line_count ) {
1875 1778         5664 add_to_rgroup($jline);
1876 1778 100       5506 if ( $new_line->{'end_group'} ) {
1877 22         101 end_rgroup(-1);
1878             }
1879 1778         3821 next;
1880             }
1881              
1882 1288         3071 my $j_terminal_match = $new_line->{'j_terminal_match'};
1883 1288         3379 my ( $jbeg, $jend ) = get_rgroup_jrange();
1884 1288 50       3466 if ( !defined($jbeg) ) {
1885              
1886             # safety check, shouldn't happen
1887 0         0 warning(<<EOM);
1888             Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
1889             undefined index for group line count $group_line_count
1890             EOM
1891 0         0 $jbeg = $jline;
1892             }
1893 1288         2397 my $base_line = $rall_lines->[$jbeg];
1894              
1895             # Initialize a global flag saying if the last line of the group
1896             # should match end of group and also terminate the group. There
1897             # should be no returns between here and where the flag is handled
1898             # at the bottom.
1899 1288         2098 my $col_matching_terminal = 0;
1900 1288 100       2934 if ( defined($j_terminal_match) ) {
1901              
1902             # remember the column of the terminal ? or { to match with
1903 19         125 $col_matching_terminal =
1904             $base_line->get_column($j_terminal_match);
1905              
1906             # Ignore an undefined value as a defensive step; shouldn't
1907             # normally happen.
1908 19 50       88 $col_matching_terminal = 0
1909             unless defined($col_matching_terminal);
1910             }
1911              
1912             # -------------------------------------------------------------
1913             # Allow hanging side comment to join current group, if any. The
1914             # only advantage is to keep the other tokens in the same group. For
1915             # example, this would make the '=' align here:
1916             # $ax = 1; # side comment
1917             # # hanging side comment
1918             # $boondoggle = 5; # side comment
1919             # $beetle = 5; # side comment
1920              
1921             # here is another example..
1922              
1923             # _rtoc_name_count => {}, # hash to track ..
1924             # _rpackage_stack => [], # stack to check ..
1925             # # name changes
1926             # _rlast_level => \$last_level, # brace indentation
1927             #
1928             #
1929             # If this were not desired, the next step could be skipped.
1930             # -------------------------------------------------------------
1931 1288 100       4433 if ( $new_line->{'is_hanging_side_comment'} ) {
    100          
1932 38         160 join_hanging_comment( $new_line, $base_line );
1933             }
1934              
1935             # If this line has no matching tokens, then flush out the lines
1936             # BEFORE this line unless both it and the previous line have side
1937             # comments. This prevents this line from pushing side comments out
1938             # to the right.
1939             elsif ( $new_line->{'jmax'} == 1 ) {
1940              
1941             # There are no matching tokens, so now check side comments.
1942             # Programming note: accessing arrays with index -1 is
1943             # risky in Perl, but we have verified there is at least one
1944             # line in the group and that there is at least one field.
1945             my $prev_comment =
1946 194         700 $rall_lines->[ $jline - 1 ]->{'rfields'}->[-1];
1947 194         483 my $side_comment = $new_line->{'rfields'}->[-1];
1948 194 100 100     1101 end_rgroup(-1) if ( !$side_comment || !$prev_comment );
1949             }
1950             else {
1951             ##ok: continue
1952             }
1953              
1954             # See if the new line matches and fits the current group,
1955             # if it still exists. Flush the current group if not.
1956 1288         2169 my $match_code;
1957 1288 100       3099 if ($group_line_count) {
1958 1139         4219 ( $match_code, my $imax_align ) =
1959             $self->check_match( $new_line, $base_line,
1960             $rall_lines->[ $jline - 1 ],
1961             $group_line_count );
1962 1139 100       3013 if ( $match_code != 2 ) { end_rgroup($imax_align) }
  158         501  
1963             }
1964              
1965             # Store the new line
1966 1288         3470 add_to_rgroup($jline);
1967              
1968 1288 100       5649 if ( defined($j_terminal_match) ) {
    100          
1969              
1970             # Decide if we should fix a terminal match. We can either:
1971             # 1. fix it and prevent the sweep_lr from changing it, or
1972             # 2. leave it alone and let sweep_lr try to fix it.
1973              
1974             # The current logic is to fix it if:
1975             # -it has not joined to previous lines,
1976             # -and either the previous subgroup has just 1 line, or
1977             # -this line matched but did not fit (so sweep won't work)
1978 19         74 my $fixit;
1979 19 100       100 if ( $group_line_count == 1 ) {
1980 3   66     21 $fixit ||= $match_code;
1981 3 100       12 if ( !$fixit ) {
1982 2 50       19 if ( @{$rgroups} > 1 ) {
  2         17  
1983 2         7 my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
  2         9  
1984 2         9 my $nlines = $jendx - $jbegx + 1;
1985 2   66     19 $fixit ||= $nlines <= 1;
1986             }
1987             }
1988             }
1989              
1990 19 100       76 if ($fixit) {
1991 2         8 $base_line = $new_line;
1992 2         11 my $col_now = $base_line->get_column($j_terminal_match);
1993              
1994             # Ignore an undefined value as a defensive step; shouldn't
1995             # normally happen.
1996 2 50       9 $col_now = 0 unless defined($col_now);
1997              
1998 2         7 my $pad = $col_matching_terminal - $col_now;
1999 2         9 my $padding_available =
2000             $base_line->get_available_space_on_right();
2001 2 100 33     29 if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
      66        
2002 1         4 $base_line->increase_field_width( $j_terminal_match,
2003             $pad );
2004             }
2005              
2006             # do not let sweep_left_to_right change an isolated 'else'
2007 2 100       17 if ( !$new_line->{'is_terminal_ternary'} ) {
2008 1         4 block_penultimate_match();
2009             }
2010             }
2011 19         86 end_rgroup(-1);
2012             }
2013              
2014             # end the group if we know we cannot match next line.
2015             elsif ( $new_line->{'end_group'} ) {
2016 50         278 end_rgroup(-1);
2017             }
2018              
2019             else {
2020             ##ok: continue
2021             }
2022             } ## end loop over lines
2023              
2024 1706         6251 end_rgroup(-1);
2025 1706         3926 return ($rgroups);
2026             } ## end sub sweep_top_down
2027             }
2028              
2029             sub two_line_pad {
2030              
2031 18     18 0 126 my ( $line_m, $line, $imax_min ) = @_;
2032              
2033             # Given:
2034             # two isolated (list) lines
2035             # imax_min = number of common alignment tokens
2036             # Return:
2037             # $pad_max = maximum suggested pad distance
2038             # = 0 if alignment not recommended
2039             # Note that this is only for two lines which do not have alignment tokens
2040             # in common with any other lines. It is intended for lists, but it might
2041             # also be used for two non-list lines with a common leading '='.
2042              
2043             # Allow alignment if the difference in the two unpadded line lengths
2044             # is not more than either line length. The idea is to avoid
2045             # aligning lines with very different field lengths, like these two:
2046              
2047             # [
2048             # 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
2049             # 1, 0, 0, 0, undef, 0, 0
2050             # ];
2051 18         57 my $rfield_lengths = $line->{'rfield_lengths'};
2052 18         46 my $rfield_lengths_m = $line_m->{'rfield_lengths'};
2053              
2054             # Safety check - shouldn't happen
2055             return 0
2056 18         89 if ( $imax_min >= @{$rfield_lengths}
2057 18 50 33     50 || $imax_min >= @{$rfield_lengths_m} );
  18         76  
2058              
2059 18         49 my $lensum_m = 0;
2060 18         42 my $lensum = 0;
2061 18         68 foreach my $i ( 0 .. $imax_min ) {
2062 49         86 $lensum_m += $rfield_lengths_m->[$i];
2063 49         94 $lensum += $rfield_lengths->[$i];
2064             }
2065              
2066 18 100       122 my ( $lenmin, $lenmax ) =
2067             $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
2068              
2069 18         66 my $patterns_match;
2070 18 50 66     135 if ( $line_m->{'list_type'} && $line->{'list_type'} ) {
2071 16         44 $patterns_match = 1;
2072 16         52 my $rpatterns_m = $line_m->{'rpatterns'};
2073 16         48 my $rpatterns = $line->{'rpatterns'};
2074 16         60 foreach my $i ( 0 .. $imax_min ) {
2075 46         93 my $pat = $rpatterns->[$i];
2076 46         85 my $pat_m = $rpatterns_m->[$i];
2077 46 100       150 if ( $pat ne $pat_m ) { $patterns_match = 0; last }
  2         4  
  2         8  
2078             }
2079             }
2080              
2081 18         59 my $pad_max = $lenmax;
2082 18 50 66     122 if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
  0         0  
2083              
2084 18         61 return $pad_max;
2085             } ## end sub two_line_pad
2086              
2087             sub sweep_left_to_right {
2088              
2089 255     255 0 886 my ( $rlines, $rgroups, $group_level ) = @_;
2090              
2091             # So far we have divided the lines into groups having an equal number of
2092             # identical alignments. Here we are going to look for common leading
2093             # alignments between the different groups and align them when possible.
2094             # For example, the three lines below are in three groups because each line
2095             # has a different number of commas. In this routine we will sweep from
2096             # left to right, aligning the leading commas as we go, but stopping if we
2097             # hit the line length limit.
2098              
2099             # my ( $num, $numi, $numj, $xyza, $ka, $xyzb, $kb, $aff, $error );
2100             # my ( $i, $j, $error, $aff, $asum, $avec );
2101             # my ( $km, $area, $varea );
2102              
2103             # nothing to do if just one group
2104 255         494 my $ng_max = @{$rgroups} - 1;
  255         604  
2105 255 50       827 return if ( $ng_max <= 0 );
2106              
2107             #---------------------------------------------------------------------
2108             # Step 1: Loop over groups to find all common leading alignment tokens
2109             #---------------------------------------------------------------------
2110              
2111 255         3699 my $line;
2112             my $rtokens;
2113 255         0 my $imax; # index of maximum non-side-comment alignment token
2114 255         0 my $istop; # an optional stopping index
2115 255         0 my $jbeg; # starting line index
2116 255         0 my $jend; # ending line index
2117              
2118 255         0 my $line_m;
2119 255         0 my $rtokens_m;
2120 255         0 my $imax_m;
2121 255         0 my $istop_m;
2122 255         0 my $jbeg_m;
2123 255         0 my $jend_m;
2124              
2125 255         0 my $istop_mm;
2126              
2127             # Look at neighboring pairs of groups and form a simple list
2128             # of all common leading alignment tokens. Foreach such match we
2129             # store [$i, $ng], where
2130             # $i = index of the token in the line (0,1,...)
2131             # $ng is the second of the two groups with this common token
2132 255         0 my @icommon;
2133              
2134             # Hash to hold the maximum alignment change for any group
2135 255         0 my %max_move;
2136              
2137             # a small number of columns
2138 255         512 my $short_pad = 4;
2139              
2140 255         525 my $ng = -1;
2141 255         533 foreach my $item ( @{$rgroups} ) {
  255         669  
2142 634         998 $ng++;
2143              
2144 634         1048 $istop_mm = $istop_m;
2145              
2146             # save _m values of previous group
2147 634         996 $line_m = $line;
2148 634         959 $rtokens_m = $rtokens;
2149 634         902 $imax_m = $imax;
2150 634         1041 $istop_m = $istop;
2151 634         1001 $jbeg_m = $jbeg;
2152 634         995 $jend_m = $jend;
2153              
2154             # Get values for this group. Note that we just have to use values for
2155             # one of the lines of the group since all members have the same
2156             # alignments.
2157 634         996 ( $jbeg, $jend, $istop ) = @{$item};
  634         1253  
2158              
2159 634         1078 $line = $rlines->[$jbeg];
2160 634         1180 $rtokens = $line->{'rtokens'};
2161 634         1081 $imax = $line->{'jmax'} - 2;
2162 634 50       1506 $istop = -1 if ( !defined($istop) );
2163 634 50       1386 $istop = $imax if ( $istop > $imax );
2164              
2165             # Initialize on first group
2166 634 100       1625 next if ( $ng == 0 );
2167              
2168             # Use the minimum index limit of the two groups
2169 379 100       1436 my $imax_min = $imax > $imax_m ? $imax_m : $imax;
2170              
2171             # Also impose a limit if given.
2172 379 100       1158 if ( $istop_m < $imax_min ) {
2173 51         120 $imax_min = $istop_m;
2174             }
2175              
2176             # Special treatment of two one-line groups isolated from other lines,
2177             # unless they form a simple list or a terminal match. Otherwise the
2178             # alignment can look strange in some cases.
2179 379         894 my $list_type = $rlines->[$jbeg]->{'list_type'};
2180 379 100 100     4918 if (
      100        
      100        
      100        
      100        
      100        
      100        
      100        
2181             $jend == $jbeg
2182             && $jend_m == $jbeg_m
2183             && ( $ng == 1 || $istop_mm < 0 )
2184             && ( $ng == $ng_max || $istop < 0 )
2185             && !$line->{'j_terminal_match'}
2186              
2187             # Only do this for imperfect matches. This is normally true except
2188             # when two perfect matches cannot form a group because the line
2189             # length limit would be exceeded. In that case we can still try
2190             # to match as many alignments as possible.
2191             && ( $imax != $imax_m || $istop_m != $imax_m )
2192             )
2193             {
2194              
2195             # We will just align assignments and simple lists
2196 73 100       311 next if ( $imax_min < 0 );
2197             next
2198 21 100 100     192 if ( $rtokens->[0] !~ /^=\d/
2199             && !$list_type );
2200              
2201             # In this case we will limit padding to a short distance. This
2202             # is a compromise to keep some vertical alignment but prevent large
2203             # gaps, which do not look good for just two lines.
2204 18         331 my $pad_max =
2205             two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
2206 18 50       70 next if ( !$pad_max );
2207 18         58 my $ng_m = $ng - 1;
2208 18         65 $max_move{"$ng_m"} = $pad_max;
2209 18         54 $max_move{"$ng"} = $pad_max;
2210             }
2211              
2212             # Loop to find all common leading tokens.
2213 324 100       1179 if ( $imax_min >= 0 ) {
2214 78         285 foreach my $i ( 0 .. $imax_min ) {
2215 144         313 my $tok = $rtokens->[$i];
2216 144         283 my $tok_m = $rtokens_m->[$i];
2217 144 50       381 last if ( $tok ne $tok_m );
2218 144         522 push @icommon, [ $i, $ng, $tok ];
2219             }
2220             }
2221             }
2222 255 100       1250 return unless @icommon;
2223              
2224             #----------------------------------------------------------
2225             # Step 2: Reorder and consolidate the list into a task list
2226             #----------------------------------------------------------
2227              
2228             # We have to work first from lowest token index to highest, then by group,
2229             # sort our list first on token index then group number
2230 64 50       373 @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
  160         438  
2231              
2232             # Make a task list of the form
2233             # [$i, ng_beg, $ng_end, $tok], ..
2234             # where
2235             # $i is the index of the token to be aligned
2236             # $ng_beg..$ng_end is the group range for this action
2237 64         146 my @todo;
2238 64         185 my ( $i, $ng_end, $tok );
2239 64         185 foreach my $item (@icommon) {
2240 144         260 my $ng_last = $ng_end;
2241 144         241 my $i_last = $i;
2242 144         249 ( $i, $ng_end, $tok ) = @{$item};
  144         357  
2243 144         298 my $ng_beg = $ng_end - 1;
2244 144 100 100     687 if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
      66        
2245 29         59 my $var = pop(@todo);
2246 29         69 $ng_beg = $var->[1];
2247             }
2248 144         372 my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
2249 144         603 push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
2250             }
2251              
2252             #------------------------------
2253             # Step 3: Execute the task list
2254             #------------------------------
2255 64         840 do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
2256             $group_level );
2257 64         341 return;
2258             } ## end sub sweep_left_to_right
2259              
2260             { ## closure for sub do_left_to_right_sweep
2261              
2262             my %is_good_alignment_token;
2263              
2264             BEGIN {
2265              
2266             # One of the most difficult aspects of vertical alignment is knowing
2267             # when not to align. Alignment can go from looking very nice to very
2268             # bad when overdone. In the sweep algorithm there are two special
2269             # cases where we may need to limit padding to a '$short_pad' distance
2270             # to avoid some very ugly formatting:
2271              
2272             # 1. Two isolated lines with partial alignment
2273             # 2. A 'tail-wag-dog' situation, in which a single terminal
2274             # line with partial alignment could cause a significant pad
2275             # increase in many previous lines if allowed to join the alignment.
2276              
2277             # For most alignment tokens, we will allow only a small pad to be
2278             # introduced (the hardwired $short_pad variable) . But for some 'good'
2279             # alignments we can be less restrictive.
2280              
2281             # These are 'good' alignments, which are allowed more padding:
2282 39     39   280 my @q = qw(
2283             => = ? if unless or || {
2284             );
2285 39         152 push @q, ',';
2286 39         299 @is_good_alignment_token{@q} = (0) x scalar(@q);
2287              
2288             # Promote a few of these to 'best', with essentially no pad limit:
2289 39         114 $is_good_alignment_token{'='} = 1;
2290 39         111 $is_good_alignment_token{'if'} = 1;
2291 39         88 $is_good_alignment_token{'unless'} = 1;
2292 39         34213 $is_good_alignment_token{'=>'} = 1;
2293              
2294             # Note the hash values are set so that:
2295             # if ($is_good_alignment_token{$raw_tok}) => best
2296             # if defined ($is_good_alignment_token{$raw_tok}) => good or best
2297              
2298             } ## end BEGIN
2299              
2300             sub move_to_common_column {
2301              
2302             # This is a sub called by sub do_left_to_right_sweep to
2303             # move the alignment column of token $itok to $col_want for a
2304             # sequence of groups.
2305 118     118 0 462 my ( $rlines, $rgroups, $rmax_move, $ngb, $nge, $itok, $col_want,
2306             $raw_tok )
2307             = @_;
2308 118 100 66     558 return if ( !defined($ngb) || $nge <= $ngb );
2309 108         298 foreach my $ng ( $ngb .. $nge ) {
2310              
2311 242         370 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
  242         534  
2312 242         441 my $line = $rlines->[$jbeg];
2313 242         632 my $col = $line->get_column($itok);
2314 242         513 my $move = $col_want - $col;
2315 242 100       930 if ( $move > 0 ) {
    50          
2316              
2317             # limit padding increase in isolated two lines
2318             next
2319             if ( defined( $rmax_move->{$ng} )
2320             && $move > $rmax_move->{$ng}
2321 77 50 66     449 && !$is_good_alignment_token{$raw_tok} );
      33        
2322              
2323 77         281 $line->increase_field_width( $itok, $move );
2324             }
2325             elsif ( $move < 0 ) {
2326              
2327             # spot to take special action on failure to move
2328             }
2329             else {
2330             ##ok: (move==0)
2331             }
2332             }
2333 108         354 return;
2334             } ## end sub move_to_common_column
2335              
2336             sub do_left_to_right_sweep {
2337 64     64 0 272 my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
2338             = @_;
2339              
2340             # $blocking_level[$nj is the level at a match failure between groups
2341             # $ng-1 and $ng
2342 64         118 my @blocking_level;
2343 64         204 my $group_list_type = $rlines->[0]->{'list_type'};
2344              
2345 64         125 foreach my $task ( @{$rtodo} ) {
  64         224  
2346 115         218 my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
  115         362  
2347              
2348             # Nothing to do for a single group
2349 115 50       343 next if ( $ng_end <= $ng_beg );
2350              
2351 115         1344 my $ng_first; # index of the first group of a continuous sequence
2352             my $col_want; # the common alignment column of a sequence of groups
2353 115         0 my $col_limit; # maximum column before bumping into max line length
2354 115         210 my $line_count_ng_m = 0;
2355 115         209 my $jmax_m;
2356             my $it_stop_m;
2357              
2358             # Loop over the groups
2359             # 'ix_' = index in the array of lines
2360             # 'ng_' = index in the array of groups
2361             # 'it_' = index in the array of tokens
2362 115         238 my $ix_min = $rgroups->[$ng_beg]->[0];
2363 115         223 my $ix_max = $rgroups->[$ng_end]->[1];
2364 115         242 my $lines_total = $ix_max - $ix_min + 1;
2365 115         329 foreach my $ng ( $ng_beg .. $ng_end ) {
2366 259         428 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
  259         620  
2367 259         447 my $line_count_ng = $ix_end - $ix_beg + 1;
2368              
2369             # Important: note that since all lines in a group have a common
2370             # alignments object, we just have to work on one of the lines
2371             # (the first line). All of the rest will be changed
2372             # automatically.
2373 259         399 my $line = $rlines->[$ix_beg];
2374 259         449 my $jmax = $line->{'jmax'};
2375              
2376             # the maximum space without exceeding the line length:
2377 259         718 my $avail = $line->get_available_space_on_right();
2378 259         695 my $col = $line->get_column($itok);
2379 259         909 my $col_max = $col + $avail;
2380              
2381             # Initialize on first group
2382 259 100       685 if ( !defined($col_want) ) {
2383 115         235 $ng_first = $ng;
2384 115         215 $col_want = $col;
2385 115         213 $col_limit = $col_max;
2386 115         184 $line_count_ng_m = $line_count_ng;
2387 115         199 $jmax_m = $jmax;
2388 115         203 $it_stop_m = $it_stop;
2389 115         242 next;
2390             }
2391              
2392             # RULE: Throw a blocking flag upon encountering a token level
2393             # different from the level of the first blocking token. For
2394             # example, in the following example, if the = matches get
2395             # blocked between two groups as shown, then we want to start
2396             # blocking matches at the commas, which are at deeper level, so
2397             # that we do not get the big gaps shown here:
2398              
2399             # my $unknown3 = pack( "v", -2 );
2400             # my $unknown4 = pack( "v", 0x09 );
2401             # my $unknown5 = pack( "VVV", 0x06, 0x00, 0x00 );
2402             # my $num_bbd_blocks = pack( "V", $num_lists );
2403             # my $root_startblock = pack( "V", $root_start );
2404             # my $unknown6 = pack( "VV", 0x00, 0x1000 );
2405              
2406             # On the other hand, it is okay to keep matching at the same
2407             # level such as in a simple list of commas and/or fat commas.
2408              
2409 144   66     660 my $is_blocked = defined( $blocking_level[$ng] )
2410             && $lev > $blocking_level[$ng];
2411              
2412             # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
2413             # Do not let one or two lines with a **different number of
2414             # alignments** open up a big gap in a large block. For
2415             # example, we will prevent something like this, where the first
2416             # line pries open the rest:
2417              
2418             # $worksheet->write( "B7", "http://www.perl.com", undef, $format );
2419             # $worksheet->write( "C7", "", $format );
2420             # $worksheet->write( "D7", "", $format );
2421             # $worksheet->write( "D8", "", $format );
2422             # $worksheet->write( "D8", "", $format );
2423              
2424             # We should exclude from consideration two groups which are
2425             # effectively the same but separated because one does not
2426             # fit in the maximum allowed line length.
2427 144   100     489 my $is_same_group =
2428             $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
2429              
2430 144         312 my $lines_above = $ix_beg - $ix_min;
2431 144         276 my $lines_below = $lines_total - $lines_above;
2432              
2433             # Increase the tolerable gap for certain favorable factors
2434 144         264 my $factor = 1;
2435 144         296 my $top_level = $lev == $group_level;
2436              
2437             # Align best top level alignment tokens like '=', 'if', ...
2438             # A factor of 10 allows a gap of up to 40 spaces
2439 144 100 100     1352 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
2440 31         64 $factor = 10;
2441             }
2442              
2443             # Otherwise allow some minimal padding of good alignments
2444             else {
2445              
2446 113 100 100     816 if (
      100        
2447              
2448             defined( $is_good_alignment_token{$raw_tok} )
2449              
2450             # We have to be careful if there are just 2 lines.
2451             # This two-line factor allows large gaps only for 2
2452             # lines which are simple lists with fewer items on the
2453             # second line. It gives results similar to previous
2454             # versions of perltidy.
2455             && (
2456             $lines_total > 2
2457             || ( $group_list_type
2458             && $jmax < $jmax_m
2459             && $top_level )
2460             )
2461             )
2462             {
2463 102         184 $factor += 1;
2464 102 100       272 if ($top_level) {
2465 66         132 $factor += 1;
2466             }
2467             }
2468             }
2469              
2470 144         276 my $is_big_gap;
2471 144 100       379 if ( !$is_same_group ) {
2472 118   66     1090 $is_big_gap ||=
      33        
2473             ( $lines_above == 1
2474             || $lines_above == 2 && $lines_below >= 4 )
2475             && $col_want > $col + $short_pad * $factor;
2476 118   66     849 $is_big_gap ||=
      33        
2477             ( $lines_below == 1
2478             || $lines_below == 2 && $lines_above >= 4 )
2479             && $col > $col_want + $short_pad * $factor;
2480             }
2481              
2482             # if match is limited by gap size, stop aligning at this level
2483 144 50       373 if ($is_big_gap) {
2484 0         0 $blocking_level[$ng] = $lev - 1;
2485             }
2486              
2487             # quit and restart if it cannot join this batch
2488 144 50 100     1017 if ( $col_want > $col_max
      66        
      66        
2489             || $col > $col_limit
2490             || $is_big_gap
2491             || $is_blocked )
2492             {
2493              
2494             # remember the level of the first blocking token
2495 10 100       27 if ( !defined( $blocking_level[$ng] ) ) {
2496 9         21 $blocking_level[$ng] = $lev;
2497             }
2498              
2499             move_to_common_column(
2500 10         46 $rlines, $rgroups, $rmax_move, $ng_first,
2501             $ng - 1, $itok, $col_want, $raw_tok
2502             );
2503 10         19 $ng_first = $ng;
2504 10         16 $col_want = $col;
2505 10         17 $col_limit = $col_max;
2506 10         17 $line_count_ng_m = $line_count_ng;
2507 10         14 $jmax_m = $jmax;
2508 10         19 $it_stop_m = $it_stop;
2509 10         37 next;
2510             }
2511              
2512 134         264 $line_count_ng_m += $line_count_ng;
2513              
2514             # update the common column and limit
2515 134 100       368 if ( $col > $col_want ) { $col_want = $col }
  42         90  
2516 134 100       402 if ( $col_max < $col_limit ) { $col_limit = $col_max }
  35         95  
2517              
2518             } ## end loop over groups
2519              
2520 115 100       414 if ( $ng_end > $ng_first ) {
2521 108         386 move_to_common_column(
2522             $rlines, $rgroups, $rmax_move, $ng_first,
2523             $ng_end, $itok, $col_want, $raw_tok
2524             );
2525             } ## end loop over groups for one task
2526             } ## end loop over tasks
2527              
2528 64         168 return;
2529             } ## end sub do_left_to_right_sweep
2530             }
2531              
2532             sub delete_selected_tokens {
2533              
2534 469     469 0 1143 my ( $line_obj, $ridel ) = @_;
2535              
2536             # $line_obj is the line to be modified
2537             # $ridel is a ref to list of indexes to be deleted
2538              
2539             # remove an unused alignment token(s) to improve alignment chances
2540              
2541 469 50 33     2294 return if ( !defined($line_obj) || !defined($ridel) || !@{$ridel} );
  469   33     1548  
2542              
2543 469         1097 my $jmax_old = $line_obj->{'jmax'};
2544 469         978 my $rfields_old = $line_obj->{'rfields'};
2545 469         862 my $rfield_lengths_old = $line_obj->{'rfield_lengths'};
2546 469         922 my $rpatterns_old = $line_obj->{'rpatterns'};
2547 469         887 my $rtokens_old = $line_obj->{'rtokens'};
2548 469         911 my $j_terminal_match = $line_obj->{'j_terminal_match'};
2549              
2550 39     39   377 use constant EXPLAIN_DELETE_SELECTED => 0;
  39         113  
  39         35367  
2551              
2552 469         1170 local $LIST_SEPARATOR = '> <';
2553 469         687 EXPLAIN_DELETE_SELECTED && print <<EOM;
2554             delete indexes: <@{$ridel}>
2555             old jmax: $jmax_old
2556             old tokens: <@{$rtokens_old}>
2557             old patterns: <@{$rpatterns_old}>
2558             old fields: <@{$rfields_old}>
2559             old field_lengths: <@{$rfield_lengths_old}>
2560             EOM
2561              
2562 469         1033 my $rfields_new = [];
2563 469         946 my $rpatterns_new = [];
2564 469         945 my $rtokens_new = [];
2565 469         930 my $rfield_lengths_new = [];
2566              
2567             # Convert deletion list to a hash to allow any order, multiple entries,
2568             # and avoid problems with index values out of range
2569 469         828 my %delete_me;
2570 469         826 @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
  469         1603  
  469         1030  
2571              
2572 469         1152 my $pattern_0 = $rpatterns_old->[0];
2573 469         965 my $field_0 = $rfields_old->[0];
2574 469         902 my $field_length_0 = $rfield_lengths_old->[0];
2575 469         764 push @{$rfields_new}, $field_0;
  469         1124  
2576 469         835 push @{$rfield_lengths_new}, $field_length_0;
  469         1027  
2577 469         783 push @{$rpatterns_new}, $pattern_0;
  469         1015  
2578              
2579             # Loop to either copy items or concatenate fields and patterns
2580 469         903 my $jmin_del;
2581 469         1448 foreach my $j ( 0 .. $jmax_old - 1 ) {
2582 1515         2580 my $token = $rtokens_old->[$j];
2583 1515         2865 my $field = $rfields_old->[ $j + 1 ];
2584 1515         2428 my $field_length = $rfield_lengths_old->[ $j + 1 ];
2585 1515         2603 my $pattern = $rpatterns_old->[ $j + 1 ];
2586 1515 100       3382 if ( !$delete_me{$j} ) {
2587 743         1276 push @{$rtokens_new}, $token;
  743         1620  
2588 743         1271 push @{$rfields_new}, $field;
  743         1310  
2589 743         1135 push @{$rpatterns_new}, $pattern;
  743         1333  
2590 743         1126 push @{$rfield_lengths_new}, $field_length;
  743         2004  
2591             }
2592             else {
2593 772 100       1961 if ( !defined($jmin_del) ) { $jmin_del = $j }
  469         868  
2594 772         2205 $rfields_new->[-1] .= $field;
2595 772         1365 $rfield_lengths_new->[-1] += $field_length;
2596 772         1745 $rpatterns_new->[-1] .= $pattern;
2597             }
2598             }
2599              
2600             # ----- x ------ x ------ x ------
2601             #t 0 1 2 <- token indexing
2602             #f 0 1 2 3 <- field and pattern
2603              
2604 469         903 my $jmax_new = @{$rfields_new} - 1;
  469         1176  
2605 469         1031 $line_obj->{'rtokens'} = $rtokens_new;
2606 469         911 $line_obj->{'rpatterns'} = $rpatterns_new;
2607 469         904 $line_obj->{'rfields'} = $rfields_new;
2608 469         848 $line_obj->{'rfield_lengths'} = $rfield_lengths_new;
2609 469         855 $line_obj->{'jmax'} = $jmax_new;
2610              
2611             # The value of j_terminal_match will be incorrect if we delete tokens prior
2612             # to it. We will have to give up on aligning the terminal tokens if this
2613             # happens.
2614 469 100 100     1483 if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
2615 1         3 $line_obj->{'j_terminal_match'} = undef;
2616             }
2617              
2618             # update list type -
2619 469 100       1334 if ( $line_obj->{'list_seqno'} ) {
2620              
2621             ## This works, but for efficiency see if we need to make a change:
2622             ## decide_if_list($line_obj);
2623              
2624             # An existing list will still be a list but with possibly different
2625             # leading token
2626 76         193 my $old_list_type = $line_obj->{'list_type'};
2627 76         157 my $new_list_type = EMPTY_STRING;
2628 76 100       517 if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
2629 49         156 $new_list_type = $rtokens_new->[0];
2630             }
2631 76 100 100     407 if ( !$old_list_type || $old_list_type ne $new_list_type ) {
2632 44         138 decide_if_list($line_obj);
2633             }
2634             }
2635              
2636 469         798 EXPLAIN_DELETE_SELECTED && print <<EOM;
2637              
2638             new jmax: $jmax_new
2639             new tokens: <@{$rtokens_new}>
2640             new patterns: <@{$rpatterns_new}>
2641             new fields: <@{$rfields_new}>
2642             EOM
2643 469         2888 return;
2644             } ## end sub delete_selected_tokens
2645              
2646             { ## closure for sub decode_alignment_token
2647              
2648             # This routine is called repeatedly for each token, so it needs to be
2649             # efficient. We can speed things up by remembering the inputs and outputs
2650             # in a hash.
2651             my %decoded_token;
2652              
2653             sub initialize_decode {
2654              
2655             # We will re-initialize the hash for each file. Otherwise, there is
2656             # a danger that the hash can become arbitrarily large if a very large
2657             # number of files is processed at once.
2658 561     561 0 4043 %decoded_token = ();
2659 561         1166 return;
2660             } ## end sub initialize_decode
2661              
2662             sub decode_alignment_token {
2663              
2664             # Unpack the values packed in an alignment token
2665             #
2666             # Usage:
2667             # my ( $raw_tok, $lev, $tag, $tok_count ) =
2668             # decode_alignment_token($token);
2669              
2670             # Alignment tokens have a trailing decimal level and optional tag (for
2671             # commas):
2672             # For example, the first comma in the following line
2673             # sub banner { crlf; report( shift, '/', shift ); crlf }
2674             # is decorated as follows:
2675             # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6)
2676              
2677             # An optional token count may be appended with a leading dot.
2678             # Currently this is only done for '=' tokens but this could change.
2679             # For example, consider the following line:
2680             # $nport = $port = shift || $name;
2681             # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
2682             # The second '=' will be '=0.2' [level 0, second equals]
2683 9364     9364 0 16348 my ($tok) = @_;
2684              
2685 9364 100       19611 if ( defined( $decoded_token{$tok} ) ) {
2686 7923         11261 return @{ $decoded_token{$tok} };
  7923         31400  
2687             }
2688              
2689 1441         3618 my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, EMPTY_STRING, 1 );
2690 1441 100       8941 if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
2691 1135         3467 $raw_tok = $1;
2692 1135         2314 $lev = $2;
2693 1135 100       3403 $tag = $3 if ($3);
2694 1135 100       3154 $tok_count = $5 if ($5);
2695             }
2696 1441         5062 my @vals = ( $raw_tok, $lev, $tag, $tok_count );
2697 1441         4164 $decoded_token{$tok} = \@vals;
2698 1441         6793 return @vals;
2699             } ## end sub decode_alignment_token
2700             }
2701              
2702             { ## closure for sub delete_unmatched_tokens
2703              
2704             my %is_assignment;
2705             my %keep_after_deleted_assignment;
2706              
2707             BEGIN {
2708 39     39   207 my @q;
2709              
2710 39         222 @q = qw(
2711             = **= += *= &= <<= &&=
2712             -= /= |= >>= ||= //=
2713             .= %= ^=
2714             x=
2715             );
2716 39         584 @is_assignment{@q} = (1) x scalar(@q);
2717              
2718             # These tokens may be kept following an = deletion
2719 39         199 @q = qw(
2720             if unless or ||
2721             );
2722 39         89206 @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
2723              
2724             } ## end BEGIN
2725              
2726             sub delete_unmatched_tokens {
2727 1706     1706 0 4058 my ( $rlines, $group_level ) = @_;
2728              
2729             # This is a important first step in vertical alignment in which
2730             # we remove as many obviously un-needed alignment tokens as possible.
2731             # This will prevent them from interfering with the final alignment.
2732              
2733             # Returns:
2734 1706         2896 my $max_lev_diff = 0; # used to avoid a call to prune_tree
2735 1706         2938 my $saw_side_comment = 0; # used to avoid a call for side comments
2736              
2737             # Handle no lines -- shouldn't happen
2738 1706 50       2758 return unless @{$rlines};
  1706         4364  
2739              
2740             # Handle a single line
2741 1706 100       2834 if ( @{$rlines} == 1 ) {
  1706         4490  
2742 1122         2628 my $line = $rlines->[0];
2743 1122         2377 my $jmax = $line->{'jmax'};
2744 1122         2485 my $length = $line->{'rfield_lengths'}->[$jmax];
2745 1122         2322 $saw_side_comment = $length > 0;
2746 1122         5082 return ( $max_lev_diff, $saw_side_comment );
2747             }
2748              
2749             # ignore hanging side comments in these operations
2750 584         1545 my @filtered = grep { !$_->{'is_hanging_side_comment'} } @{$rlines};
  1944         6157  
  584         1697  
2751 584         1674 my $rnew_lines = \@filtered;
2752              
2753 584         1184 $saw_side_comment = @filtered != @{$rlines};
  584         1522  
2754 584         1153 $max_lev_diff = 0;
2755              
2756             # nothing to do if all lines were hanging side comments
2757 584         1016 my $jmax = @{$rnew_lines} - 1;
  584         1316  
2758 584 100       1829 return ( $max_lev_diff, $saw_side_comment ) if ( $jmax < 0 );
2759              
2760             #----------------------------------------------------
2761             # Create a hash of alignment token info for each line
2762             #----------------------------------------------------
2763 583         2184 ( my $rline_hashes, my $requals_info, $saw_side_comment, $max_lev_diff )
2764             = make_alignment_info( $group_level, $rnew_lines, $saw_side_comment );
2765              
2766             #------------------------------------------------------------
2767             # Find independent subgroups of lines. Neighboring subgroups
2768             # do not have a common alignment token.
2769             #------------------------------------------------------------
2770 583         1313 my @subgroups;
2771 583         1571 push @subgroups, [ 0, $jmax ];
2772 583         1818 foreach my $jl ( 0 .. $jmax - 1 ) {
2773 1315 100       3768 if ( $rnew_lines->[$jl]->{'end_group'} ) {
2774 72         217 $subgroups[-1]->[1] = $jl;
2775 72         259 push @subgroups, [ $jl + 1, $jmax ];
2776             }
2777             }
2778              
2779             #-----------------------------------------------------------
2780             # PASS 1 over subgroups to remove unmatched alignment tokens
2781             #-----------------------------------------------------------
2782             delete_unmatched_tokens_main_loop(
2783 583         3074 $group_level, $rnew_lines, \@subgroups,
2784             $rline_hashes, $requals_info
2785             );
2786              
2787             #----------------------------------------------------------------
2788             # PASS 2: Construct a tree of matched lines and delete some small
2789             # deeper levels of tokens. They also block good alignments.
2790             #----------------------------------------------------------------
2791 583 100       2889 prune_alignment_tree($rnew_lines) if ($max_lev_diff);
2792              
2793             #--------------------------------------------
2794             # PASS 3: compare all lines for common tokens
2795             #--------------------------------------------
2796 583         2913 match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
2797              
2798 583         6344 return ( $max_lev_diff, $saw_side_comment );
2799             } ## end sub delete_unmatched_tokens
2800              
2801             sub make_alignment_info {
2802              
2803 583     583 0 1639 my ( $group_level, $rnew_lines, $saw_side_comment ) = @_;
2804              
2805             #------------------------------------------------------------
2806             # Loop to create a hash of alignment token info for each line
2807             #------------------------------------------------------------
2808 583         1325 my $rline_hashes = [];
2809 583         1191 my @equals_info;
2810             my @line_info; # no longer used
2811 583         1050 my $jmax = @{$rnew_lines} - 1;
  583         1367  
2812 583         1178 my $max_lev_diff = 0;
2813 583         1114 foreach my $line ( @{$rnew_lines} ) {
  583         1539  
2814 1898         3442 my $rhash = {};
2815 1898         3715 my $rtokens = $line->{'rtokens'};
2816 1898         3238 my $rpatterns = $line->{'rpatterns'};
2817 1898         3044 my $i = 0;
2818 1898         4629 my ( $i_eq, $tok_eq, $pat_eq );
2819 1898         0 my ( $lev_min, $lev_max );
2820 1898         2814 foreach my $tok ( @{$rtokens} ) {
  1898         3627  
2821 5174         9362 my ( $raw_tok, $lev, $tag, $tok_count ) =
2822             decode_alignment_token($tok);
2823              
2824 5174 100       11409 if ( $tok ne '#' ) {
2825 3276 100       7148 if ( !defined($lev_min) ) {
2826 1779         2905 $lev_min = $lev;
2827 1779         3095 $lev_max = $lev;
2828             }
2829             else {
2830 1497 100       3719 if ( $lev < $lev_min ) { $lev_min = $lev }
  75         225  
2831 1497 100       3510 if ( $lev > $lev_max ) { $lev_max = $lev }
  260         520  
2832             }
2833             }
2834             else {
2835 1898 100       4748 if ( !$saw_side_comment ) {
2836 1709         5605 my $length = $line->{'rfield_lengths'}->[ $i + 1 ];
2837 1709   66     5556 $saw_side_comment ||= $length;
2838             }
2839             }
2840              
2841             # Possible future upgrade: for multiple matches,
2842             # record [$i1, $i2, ..] instead of $i
2843 5174         16762 $rhash->{$tok} =
2844             [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
2845              
2846             # remember the first equals at line level
2847 5174 100 100     16912 if ( !defined($i_eq) && $raw_tok eq '=' ) {
2848              
2849 520 100       1431 if ( $lev eq $group_level ) {
2850 405         699 $i_eq = $i;
2851 405         698 $tok_eq = $tok;
2852 405         902 $pat_eq = $rpatterns->[$i];
2853             }
2854             }
2855 5174         9183 $i++;
2856             }
2857 1898         3169 push @{$rline_hashes}, $rhash;
  1898         3648  
2858 1898         5793 push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
2859 1898         5625 push @line_info, [ $lev_min, $lev_max ];
2860 1898 100       4402 if ( defined($lev_min) ) {
2861 1779         3176 my $lev_diff = $lev_max - $lev_min;
2862 1779 100       4765 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
  162         457  
2863             }
2864             }
2865              
2866             #----------------------------------------------------
2867             # Loop to compare each line pair and remember matches
2868             #----------------------------------------------------
2869 583         1805 my $rtok_hash = {};
2870 583         1370 my $nr = 0;
2871 583         2046 foreach my $jl ( 0 .. $jmax - 1 ) {
2872 1315         2180 my $nl = $nr;
2873 1315         2110 $nr = 0;
2874 1315         2178 my $jr = $jl + 1;
2875 1315         2246 my $rhash_l = $rline_hashes->[$jl];
2876 1315         2165 my $rhash_r = $rline_hashes->[$jr];
2877 1315         2034 foreach my $tok ( keys %{$rhash_l} ) {
  1315         4708  
2878 3154 100       6713 if ( defined( $rhash_r->{$tok} ) ) {
2879 2670         4188 my $il = $rhash_l->{$tok}->[0];
2880 2670         4096 my $ir = $rhash_r->{$tok}->[0];
2881 2670         4079 $rhash_l->{$tok}->[2] = $ir;
2882 2670         3982 $rhash_r->{$tok}->[1] = $il;
2883 2670 100       5976 if ( $tok ne '#' ) {
2884 1355         2094 push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
  1355         3586  
2885 1355         2554 $nr++;
2886             }
2887             }
2888             }
2889              
2890             # Set a line break if no matching tokens between these lines
2891             # (this is not strictly necessary now but does not hurt)
2892 1315 100 100     5227 if ( $nr == 0 && $nl > 0 ) {
2893 36         187 $rnew_lines->[$jl]->{'end_group'} = 1;
2894             }
2895              
2896             # Also set a line break if both lines have simple equals but with
2897             # different leading characters in patterns. This check is similar
2898             # to one in sub check_match, and will prevent sub
2899             # prune_alignment_tree from removing alignments which otherwise
2900             # should be kept. This fix is rarely needed, but it can
2901             # occasionally improve formatting.
2902             # For example:
2903             # my $name = $this->{Name};
2904             # $type = $this->ctype($genlooptype) if defined $genlooptype;
2905             # my $declini = ( $asgnonly ? "" : "\t$type *" );
2906             # my $cast = ( $type ? "($type *)" : "" );
2907             # The last two lines start with 'my' and will not match the
2908             # previous line starting with $type, so we do not want
2909             # prune_alignment tree to delete their ? : alignments at a deeper
2910             # level.
2911 1315         2167 my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
  1315         3325  
2912 1315         2485 my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
  1315         2671  
2913 1315 100 100     4907 if ( defined($i_eq_l) && defined($i_eq_r) ) {
2914              
2915             # Also, do not align equals across a change in ci level
2916             my $ci_jump = $rnew_lines->[$jl]->{'ci_level'} !=
2917 199         659 $rnew_lines->[$jr]->{'ci_level'};
2918              
2919 199 100 66     2247 if (
      66        
      100        
      100        
2920             $tok_eq_l eq $tok_eq_r
2921             && $i_eq_l == 0
2922             && $i_eq_r == 0
2923             && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
2924             || $ci_jump )
2925             )
2926             {
2927 12         51 $rnew_lines->[$jl]->{'end_group'} = 1;
2928             }
2929             }
2930             }
2931 583         4278 return ( $rline_hashes, \@equals_info, $saw_side_comment,
2932             $max_lev_diff );
2933             } ## end sub make_alignment_info
2934              
2935             sub delete_unmatched_tokens_main_loop {
2936              
2937             my (
2938 583     583 0 1832 $group_level, $rnew_lines, $rsubgroups,
2939             $rline_hashes, $requals_info
2940             ) = @_;
2941              
2942             #--------------------------------------------------------------
2943             # Main loop over subgroups to remove unmatched alignment tokens
2944             #--------------------------------------------------------------
2945              
2946             # flag to allow skipping pass 2 - not currently used
2947 583         1015 my $saw_large_group;
2948              
2949 583         1548 my $has_terminal_match = $rnew_lines->[-1]->{'j_terminal_match'};
2950              
2951 583         1127 foreach my $item ( @{$rsubgroups} ) {
  583         1425  
2952 655         1187 my ( $jbeg, $jend ) = @{$item};
  655         1617  
2953              
2954 655         1631 my $nlines = $jend - $jbeg + 1;
2955              
2956             #---------------------------------------------------
2957             # Look for complete if/elsif/else and ternary blocks
2958             #---------------------------------------------------
2959              
2960             # We are looking for a common '$dividing_token' like these:
2961              
2962             # if ( $b and $s ) { $p->{'type'} = 'a'; }
2963             # elsif ($b) { $p->{'type'} = 'b'; }
2964             # elsif ($s) { $p->{'type'} = 's'; }
2965             # else { $p->{'type'} = ''; }
2966             # ^----------- dividing_token
2967              
2968             # my $severity =
2969             # !$routine ? '[PFX]'
2970             # : $routine =~ /warn.*_d\z/ ? '[DS]'
2971             # : $routine =~ /ck_warn/ ? 'W'
2972             # : $routine =~ /ckWARN\d*reg_d/ ? 'S'
2973             # : $routine =~ /ckWARN\d*reg/ ? 'W'
2974             # : $routine =~ /vWARN\d/ ? '[WDS]'
2975             # : '[PFX]';
2976             # ^----------- dividing_token
2977              
2978             # Only look for groups which are more than 2 lines long. Two lines
2979             # can get messed up doing this, probably due to the various
2980             # two-line rules.
2981              
2982 655         1371 my $dividing_token;
2983             my %token_line_count;
2984 655 100       2053 if ( $nlines > 2 ) {
2985              
2986 301         953 foreach my $jj ( $jbeg .. $jend ) {
2987 1281         1869 my %seen;
2988 1281         2086 my $line = $rnew_lines->[$jj];
2989 1281         2008 my $rtokens = $line->{'rtokens'};
2990 1281         1788 foreach my $tok ( @{$rtokens} ) {
  1281         2286  
2991 3581 100       6794 if ( !$seen{$tok} ) {
2992 3065         4681 $seen{$tok}++;
2993 3065         5923 $token_line_count{$tok}++;
2994             }
2995             }
2996             }
2997              
2998 301         1663 foreach my $tok ( keys %token_line_count ) {
2999 931 100       2536 if ( $token_line_count{$tok} == $nlines ) {
3000 564 100 100     3391 if ( substr( $tok, 0, 1 ) eq '?'
      100        
3001             || substr( $tok, 0, 1 ) eq '{'
3002             && $tok =~ /^\{\d+if/ )
3003             {
3004 21         64 $dividing_token = $tok;
3005 21         70 last;
3006             }
3007             }
3008             }
3009             }
3010              
3011             #-------------------------------------------------------------
3012             # Loop over subgroup lines to remove unwanted alignment tokens
3013             #-------------------------------------------------------------
3014 655         2427 foreach my $jj ( $jbeg .. $jend ) {
3015 1898         3296 my $line = $rnew_lines->[$jj];
3016 1898         3139 my $rtokens = $line->{'rtokens'};
3017 1898         2929 my $rhash = $rline_hashes->[$jj];
3018 1898         3016 my $i_eq = $requals_info->[$jj]->[0];
3019 1898         2789 my @idel;
3020 1898         2751 my $imax = @{$rtokens} - 2;
  1898         3502  
3021 1898         3037 my $delete_above_level;
3022             my $deleted_assignment_token;
3023              
3024 1898         3041 my $saw_dividing_token = EMPTY_STRING;
3025 1898   100     8752 $saw_large_group ||= $nlines > 2 && $imax > 1;
      100        
3026              
3027             # Loop over all alignment tokens
3028 1898         3793 foreach my $i ( 0 .. $imax ) {
3029 3276         5353 my $tok = $rtokens->[$i];
3030 3276 50       6551 next if ( $tok eq '#' ); # shouldn't happen
3031             my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
3032 3276         4611 @{ $rhash->{$tok} };
  3276         8146  
3033              
3034             #------------------------------------------------------
3035             # Here is the basic RULE: remove an unmatched alignment
3036             # which does not occur in the surrounding lines.
3037             #------------------------------------------------------
3038 3276   100     8619 my $delete_me = !defined($il) && !defined($ir);
3039              
3040             # Apply any user controls. Note that not all lines pass
3041             # this way so they have to be applied elsewhere too.
3042 3276         4558 my $align_ok = 1;
3043 3276 100       6203 if (%valign_control_hash) {
3044 31         55 $align_ok = $valign_control_hash{$raw_tok};
3045 31 100       60 $align_ok = $valign_control_default
3046             unless defined($align_ok);
3047 31   100     95 $delete_me ||= !$align_ok;
3048             }
3049              
3050             # But now we modify this with exceptions...
3051              
3052             # EXCEPTION 1: If we are in a complete ternary or
3053             # if/elsif/else group, and this token is not on every line
3054             # of the group, should we delete it to preserve overall
3055             # alignment?
3056 3276 100       6165 if ($dividing_token) {
3057 147 100       314 if ( $token_line_count{$tok} >= $nlines ) {
3058 120   100     377 $saw_dividing_token ||= $tok eq $dividing_token;
3059             }
3060             else {
3061              
3062             # For shorter runs, delete toks to save alignment.
3063             # For longer runs, keep toks after the '{' or '?'
3064             # to allow sub-alignments within braces. The
3065             # number 5 lines is arbitrary but seems to work ok.
3066 27   66     109 $delete_me ||=
      100        
3067             ( $nlines < 5 || !$saw_dividing_token );
3068             }
3069             }
3070              
3071             # EXCEPTION 2: Remove all tokens above a certain level
3072             # following a previous deletion. For example, we have to
3073             # remove tagged higher level alignment tokens following a
3074             # '=>' deletion because the tags of higher level tokens
3075             # will now be incorrect. For example, this will prevent
3076             # aligning commas as follows after deleting the second '=>'
3077             # $w->insert(
3078             # ListBox => origin => [ 270, 160 ],
3079             # size => [ 200, 55 ],
3080             # );
3081 3276 100       6127 if ( defined($delete_above_level) ) {
3082 280 100       1157 if ( $lev > $delete_above_level ) {
3083 132   100     454 $delete_me ||= 1;
3084             }
3085 148         357 else { $delete_above_level = undef }
3086             }
3087              
3088             # EXCEPTION 3: Remove all but certain tokens after an
3089             # assignment deletion.
3090 3276 100 100     6256 if (
      100        
3091             $deleted_assignment_token
3092             && ( $lev > $group_level
3093             || !$keep_after_deleted_assignment{$raw_tok} )
3094             )
3095             {
3096 41   100     133 $delete_me ||= 1;
3097             }
3098              
3099             # EXCEPTION 4: Do not touch the first line of a 2 line
3100             # terminal match, such as below, because j_terminal has
3101             # already been set.
3102             # if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
3103             # else { $tago = $tagc = ''; }
3104             # But see snippets 'else1.t' and 'else2.t'
3105 3276 100 100     8646 $delete_me = 0
      100        
3106             if ( $jj == $jbeg
3107             && $has_terminal_match
3108             && $nlines == 2 );
3109              
3110             # EXCEPTION 5: misc additional rules for commas and equals
3111 3276 100 100     8000 if ( $delete_me && $tok_count == 1 ) {
3112              
3113             # okay to delete second and higher copies of a token
3114              
3115             # for a comma...
3116 721 100       2015 if ( $raw_tok eq ',' ) {
3117              
3118             # Do not delete commas before an equals
3119 262 100 100     1135 $delete_me = 0
3120             if ( defined($i_eq) && $i < $i_eq );
3121              
3122             # Do not delete line-level commas
3123 262 100       726 $delete_me = 0 if ( $lev <= $group_level );
3124             }
3125              
3126             # For an assignment at group level..
3127 721 100 100     2835 if ( $is_assignment{$raw_tok}
3128             && $lev == $group_level )
3129             {
3130              
3131             # Do not delete if it is the last alignment of
3132             # multiple tokens; this will prevent some
3133             # undesirable alignments
3134 106 100 100     747 if ( $imax > 0 && $i == $imax ) {
3135 12         35 $delete_me = 0;
3136             }
3137              
3138             # Otherwise, set a flag to delete most
3139             # remaining tokens
3140 94         246 else { $deleted_assignment_token = $raw_tok }
3141             }
3142             }
3143              
3144             # Do not let a user exclusion be reactivated by above rules
3145 3276   66     10261 $delete_me ||= !$align_ok;
3146              
3147             #------------------------------------
3148             # Add this token to the deletion list
3149             #------------------------------------
3150 3276 100       6987 if ($delete_me) {
3151 661         1235 push @idel, $i;
3152              
3153             # update deletion propagation flags
3154 661 100 66     2281 if ( !defined($delete_above_level)
3155             || $lev < $delete_above_level )
3156             {
3157              
3158             # delete all following higher level alignments
3159 529         952 $delete_above_level = $lev;
3160              
3161             # but keep deleting after => to next lower level
3162             # to avoid some bizarre alignments
3163 529 100       1592 if ( $raw_tok eq '=>' ) {
3164 53         168 $delete_above_level = $lev - 1;
3165             }
3166             }
3167             }
3168             } # End loop over alignment tokens
3169              
3170             # Process all deletion requests for this line
3171 1898 100       6299 if (@idel) {
3172 413         1726 delete_selected_tokens( $line, \@idel );
3173             }
3174             } # End loop over lines
3175             } ## end main loop over subgroups
3176              
3177 583         1553 return;
3178             } ## end sub delete_unmatched_tokens_main_loop
3179             }
3180              
3181             sub match_line_pairs {
3182 583     583 0 3102 my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
3183              
3184             # Compare each pair of lines and save information about common matches
3185             # $rlines = list of lines including hanging side comments
3186             # $rnew_lines = list of lines without any hanging side comments
3187             # $rsubgroups = list of subgroups of the new lines
3188              
3189             # TODO:
3190             # Maybe change: imax_pair => pair_match_info = ref to array
3191             # = [$imax_align, $rMsg, ... ]
3192             # This may eventually have multi-level match info
3193              
3194             # Previous line vars
3195 583         2300 my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
3196             $list_type_m, $ci_level_m );
3197              
3198             # Current line vars
3199 583         0 my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
3200             $ci_level );
3201              
3202             # loop over subgroups
3203 583         1046 foreach my $item ( @{$rsubgroups} ) {
  583         1481  
3204 655         1102 my ( $jbeg, $jend ) = @{$item};
  655         1507  
3205 655         1725 my $nlines = $jend - $jbeg + 1;
3206 655 100       1928 next if ( $nlines <= 1 );
3207              
3208             # loop over lines in a subgroup
3209 564         1680 foreach my $jj ( $jbeg .. $jend ) {
3210              
3211 1807         2837 $line_m = $line;
3212 1807         2715 $rtokens_m = $rtokens;
3213 1807         2545 $rpatterns_m = $rpatterns;
3214 1807         2457 $rfield_lengths_m = $rfield_lengths;
3215 1807         2510 $imax_m = $imax;
3216 1807         2719 $list_type_m = $list_type;
3217 1807         3630 $ci_level_m = $ci_level;
3218              
3219 1807         3063 $line = $rnew_lines->[$jj];
3220 1807         3043 $rtokens = $line->{'rtokens'};
3221 1807         2942 $rpatterns = $line->{'rpatterns'};
3222 1807         2887 $rfield_lengths = $line->{'rfield_lengths'};
3223 1807         2510 $imax = @{$rtokens} - 2;
  1807         2869  
3224 1807         3115 $list_type = $line->{'list_type'};
3225 1807         2890 $ci_level = $line->{'ci_level'};
3226              
3227             # nothing to do for first line
3228 1807 100       4088 next if ( $jj == $jbeg );
3229              
3230 1243         2915 my $ci_jump = $ci_level - $ci_level_m;
3231              
3232 1243 100       3225 my $imax_min = $imax_m < $imax ? $imax_m : $imax;
3233              
3234 1243         2120 my $imax_align = -1;
3235              
3236             # find number of leading common tokens
3237              
3238             #---------------------------------
3239             # No match to hanging side comment
3240             #---------------------------------
3241 1243 50 100     5011 if ( $line->{'is_hanging_side_comment'} ) {
    100          
3242              
3243             # Should not get here; HSC's have been filtered out
3244 0         0 $imax_align = -1;
3245             }
3246              
3247             #-----------------------------
3248             # Handle comma-separated lists
3249             #-----------------------------
3250             elsif ( $list_type && $list_type eq $list_type_m ) {
3251              
3252             # do not align lists across a ci jump with new list method
3253 488 50       1224 if ($ci_jump) { $imax_min = -1 }
  0         0  
3254              
3255 488         898 my $i_nomatch = $imax_min + 1;
3256 488         1024 foreach my $i ( 0 .. $imax_min ) {
3257 883         1475 my $tok = $rtokens->[$i];
3258 883         1407 my $tok_m = $rtokens_m->[$i];
3259 883 50       2111 if ( $tok ne $tok_m ) {
3260 0         0 $i_nomatch = $i;
3261 0         0 last;
3262             }
3263             }
3264              
3265 488         884 $imax_align = $i_nomatch - 1;
3266             }
3267              
3268             #-----------------
3269             # Handle non-lists
3270             #-----------------
3271             else {
3272 755         1486 my $i_nomatch = $imax_min + 1;
3273 755         1662 foreach my $i ( 0 .. $imax_min ) {
3274 745         1455 my $tok = $rtokens->[$i];
3275 745         1243 my $tok_m = $rtokens_m->[$i];
3276 745 100       1637 if ( $tok ne $tok_m ) {
3277 19         71 $i_nomatch = $i;
3278 19         67 last;
3279             }
3280              
3281 726         1385 my $pat = $rpatterns->[$i];
3282 726         1172 my $pat_m = $rpatterns_m->[$i];
3283              
3284             # If patterns don't match, we have to be careful...
3285 726 100       1828 if ( $pat_m ne $pat ) {
3286 166         409 my $pad =
3287             $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
3288 166         517 my ( $match_code, $rmsg ) =
3289             compare_patterns( $group_level,
3290             $tok, $tok_m, $pat, $pat_m, $pad );
3291 166 100       580 if ($match_code) {
3292 8 100       26 if ( $match_code == 1 ) { $i_nomatch = $i }
  7 50       15  
3293 1         3 elsif ( $match_code == 2 ) { $i_nomatch = 0 }
3294             else { } ##ok
3295 8         22 last;
3296             }
3297             }
3298             }
3299 755         1353 $imax_align = $i_nomatch - 1;
3300             }
3301              
3302 1243         3241 $line_m->{'imax_pair'} = $imax_align;
3303              
3304             } ## end loop over lines
3305              
3306             # Put fence at end of subgroup
3307 564         1946 $line->{'imax_pair'} = -1;
3308              
3309             } ## end loop over subgroups
3310              
3311             # if there are hanging side comments, propagate the pair info down to them
3312             # so that lines can just look back one line for their pair info.
3313 583 100       1098 if ( @{$rlines} > @{$rnew_lines} ) {
  583         1245  
  583         1854  
3314 24         59 my $last_pair_info = -1;
3315 24         54 foreach my $line ( @{$rlines} ) {
  24         90  
3316 95 100       204 if ( $line->{'is_hanging_side_comment'} ) {
3317 39         106 $line->{'imax_pair'} = $last_pair_info;
3318             }
3319             else {
3320 56         123 $last_pair_info = $line->{'imax_pair'};
3321             }
3322             }
3323             }
3324 583         1392 return;
3325             } ## end sub match_line_pairs
3326              
3327             sub compare_patterns {
3328              
3329 166     166 0 562 my ( $group_level, $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
3330              
3331             # helper routine for sub match_line_pairs to decide if patterns in two
3332             # lines match well enough..Given
3333             # $tok_m, $pat_m = token and pattern of first line
3334             # $tok, $pat = token and pattern of second line
3335             # $pad = 0 if no padding is needed, !=0 otherwise
3336             # return code:
3337             # 0 = patterns match, continue
3338             # 1 = no match
3339             # 2 = no match, and lines do not match at all
3340              
3341 166         317 my $GoToMsg = EMPTY_STRING;
3342 166         278 my $return_code = 0;
3343              
3344 39     39   426 use constant EXPLAIN_COMPARE_PATTERNS => 0;
  39         128  
  39         49707  
3345              
3346 166         602 my ( $alignment_token, $lev, $tag, $tok_count ) =
3347             decode_alignment_token($tok);
3348              
3349             # We have to be very careful about aligning commas
3350             # when the pattern's don't match, because it can be
3351             # worse to create an alignment where none is needed
3352             # than to omit one. Here's an example where the ','s
3353             # are not in named containers. The first line below
3354             # should not match the next two:
3355             # ( $a, $b ) = ( $b, $r );
3356             # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
3357             # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
3358 166 100       891 if ( $alignment_token eq ',' ) {
    100          
    100          
3359              
3360             # do not align commas unless they are in named
3361             # containers
3362 26 100       145 if ( $tok !~ /[A-Za-z]/ ) {
3363 3         8 $return_code = 1;
3364 3         6 $GoToMsg = "do not align commas in unnamed containers";
3365             }
3366             else {
3367 23         50 $return_code = 0;
3368             }
3369             }
3370              
3371             # do not align parens unless patterns match;
3372             # large ugly spaces can occur in math expressions.
3373             elsif ( $alignment_token eq '(' ) {
3374              
3375             # But we can allow a match if the parens don't
3376             # require any padding.
3377 4 50       14 if ( $pad != 0 ) {
3378 4         9 $return_code = 1;
3379 4         11 $GoToMsg = "do not align '(' unless patterns match or pad=0";
3380             }
3381             else {
3382 0         0 $return_code = 0;
3383             }
3384             }
3385              
3386             # Handle an '=' alignment with different patterns to
3387             # the left.
3388             elsif ( $alignment_token eq '=' ) {
3389              
3390             # It is best to be a little restrictive when
3391             # aligning '=' tokens. Here is an example of
3392             # two lines that we will not align:
3393             # my $variable=6;
3394             # $bb=4;
3395             # The problem is that one is a 'my' declaration,
3396             # and the other isn't, so they're not very similar.
3397             # We will filter these out by comparing the first
3398             # letter of the pattern. This is crude, but works
3399             # well enough.
3400 16 50       151 if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
    100          
3401 0         0 $GoToMsg = "first character before equals differ";
3402 0         0 $return_code = 1;
3403             }
3404              
3405             # The introduction of sub 'prune_alignment_tree'
3406             # enabled alignment of lists left of the equals with
3407             # other scalar variables. For example:
3408             # my ( $D, $s, $e ) = @_;
3409             # my $d = length $D;
3410             # my $c = $e - $s - $d;
3411              
3412             # But this would change formatting of a lot of scripts,
3413             # so for now we prevent alignment of comma lists on the
3414             # left with scalars on the left. We will also prevent
3415             # any partial alignments.
3416              
3417             # set return code 2 if the = is at line level, but
3418             # set return code 1 if the = is below line level, i.e.
3419             # sub new { my ( $p, $v ) = @_; bless \$v, $p }
3420             # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
3421              
3422             elsif ( ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) ) {
3423 1         3 $GoToMsg = "mixed commas/no-commas before equals";
3424 1         2 $return_code = 1;
3425 1 50       5 if ( $lev eq $group_level ) {
3426 1         2 $return_code = 2;
3427             }
3428             }
3429             else {
3430 15         41 $return_code = 0;
3431             }
3432             }
3433             else {
3434 120         234 $return_code = 0;
3435             }
3436              
3437             EXPLAIN_COMPARE_PATTERNS
3438             && $return_code
3439 166         263 && print {*STDOUT} "no match because $GoToMsg\n";
3440              
3441 166         454 return ( $return_code, \$GoToMsg );
3442              
3443             } ## end sub compare_patterns
3444              
3445             sub fat_comma_to_comma {
3446 765     765 0 1477 my ($str) = @_;
3447              
3448             # We are changing '=>' to ',' and removing any trailing decimal count
3449             # because currently fat commas have a count and commas do not.
3450             # For example, we will change '=>2+{-3.2' into ',2+{-3'
3451 765 100       2205 if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
  181         507  
3452 765         1921 return $str;
3453             } ## end sub fat_comma_to_comma
3454              
3455             sub get_line_token_info {
3456              
3457             # scan lines of tokens and return summary information about the range of
3458             # levels and patterns.
3459 154     154 0 428 my ($rlines) = @_;
3460              
3461             # First scan to check monotonicity. Here is an example of several
3462             # lines which are monotonic. The = is the lowest level, and
3463             # the commas are all one level deeper. So this is not nonmonotonic.
3464             # $$d{"weeks"} = [ "w", "wk", "wks", "week", "weeks" ];
3465             # $$d{"days"} = [ "d", "day", "days" ];
3466             # $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ];
3467 154         317 my @all_token_info;
3468 154         336 my $all_monotonic = 1;
3469 154         351 foreach my $jj ( 0 .. @{$rlines} - 1 ) {
  154         510  
3470 627         1213 my ($line) = $rlines->[$jj];
3471 627         1116 my $rtokens = $line->{'rtokens'};
3472 627         888 my $last_lev;
3473 627         971 my $is_monotonic = 1;
3474 627         961 my $i = -1;
3475 627         913 foreach my $tok ( @{$rtokens} ) {
  627         1196  
3476 1649         2260 $i++;
3477 1649         2940 my ( $raw_tok, $lev, $tag, $tok_count ) =
3478             decode_alignment_token($tok);
3479 1649         2683 push @{ $all_token_info[$jj] },
  1649         5461  
3480             [ $raw_tok, $lev, $tag, $tok_count ];
3481 1649 100       3769 last if ( $tok eq '#' );
3482 1022 100 100     3060 if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
  81         189  
3483 1022         1773 $last_lev = $lev;
3484             }
3485 627 100       1966 if ( !$is_monotonic ) { $all_monotonic = 0 }
  78         206  
3486             }
3487              
3488 154         663 my $rline_values = [];
3489 154         497 foreach my $jj ( 0 .. @{$rlines} - 1 ) {
  154         558  
3490 627         1228 my ($line) = $rlines->[$jj];
3491              
3492 627         1145 my $rtokens = $line->{'rtokens'};
3493 627         937 my $i = -1;
3494 627         999 my ( $lev_min, $lev_max );
3495 627         1717 my $token_pattern_max = EMPTY_STRING;
3496 627         928 my %saw_level;
3497 627         931 my $is_monotonic = 1;
3498              
3499             # find the index of the last token before the side comment
3500 627         893 my $imax = @{$rtokens} - 2;
  627         1122  
3501 627         1005 my $imax_true = $imax;
3502              
3503             # If the entire group is monotonic, and the line ends in a comma list,
3504             # walk it back to the first such comma. this will have the effect of
3505             # making all trailing ragged comma lists match in the prune tree
3506             # routine. these trailing comma lists can better be handled by later
3507             # alignment rules.
3508              
3509             # Treat fat commas the same as commas here by converting them to
3510             # commas. This will improve the chance of aligning the leading parts
3511             # of ragged lists.
3512              
3513 627         1637 my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
3514 627 100 100     2593 if ( $all_monotonic && $tok_end =~ /^,/ ) {
3515 142         294 my $ii = $imax - 1;
3516 142   100     529 while ( $ii >= 0
3517             && fat_comma_to_comma( $rtokens->[$ii] ) eq $tok_end )
3518             {
3519 93         184 $imax = $ii;
3520 93         207 $ii--;
3521             }
3522             }
3523              
3524             # make a first pass to find level range
3525 627         1008 my $last_lev;
3526 627         1048 foreach my $tok ( @{$rtokens} ) {
  627         1220  
3527 1556         2474 $i++;
3528 1556 100       3005 last if ( $i > $imax );
3529 929 50       1845 last if ( $tok eq '#' );
3530             my ( $raw_tok, $lev, $tag, $tok_count ) =
3531 929         1329 @{ $all_token_info[$jj]->[$i] };
  929         2127  
3532              
3533 929 50       1834 last if ( $tok eq '#' );
3534 929         1506 $token_pattern_max .= $tok;
3535 929         1639 $saw_level{$lev}++;
3536 929 100       1818 if ( !defined($lev_min) ) {
3537 527         871 $lev_min = $lev;
3538 527         786 $lev_max = $lev;
3539             }
3540             else {
3541 402 100       1002 if ( $lev < $lev_min ) { $lev_min = $lev; }
  51         129  
3542 402 100       827 if ( $lev > $lev_max ) { $lev_max = $lev; }
  122         240  
3543 402 100       783 if ( $lev < $last_lev ) { $is_monotonic = 0 }
  81         136  
3544             }
3545 929         1592 $last_lev = $lev;
3546             }
3547              
3548             # handle no levels
3549 627         1286 my $rtoken_patterns = {};
3550 627         1047 my $rtoken_indexes = {};
3551 627         2485 my @levs = sort keys %saw_level;
3552 627 100       2028 if ( !defined($lev_min) ) {
    100          
3553 100         226 $lev_min = -1;
3554 100         195 $lev_max = -1;
3555 100         254 $levs[0] = -1;
3556 100         377 $rtoken_patterns->{$lev_min} = EMPTY_STRING;
3557 100         337 $rtoken_indexes->{$lev_min} = [];
3558             }
3559              
3560             # handle one level
3561             elsif ( $lev_max == $lev_min ) {
3562 359         945 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3563 359         1163 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3564             }
3565              
3566             # handle multiple levels
3567             else {
3568 168         1158 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3569 168         692 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3570              
3571 168         420 my $lev_top = pop @levs; # already did max level
3572 168         319 my $itok = -1;
3573 168         272 foreach my $tok ( @{$rtokens} ) {
  168         361  
3574 704         970 $itok++;
3575 704 100       1390 last if ( $itok > $imax );
3576             my ( $raw_tok, $lev, $tag, $tok_count ) =
3577 536         732 @{ $all_token_info[$jj]->[$itok] };
  536         1182  
3578 536 50       1075 last if ( $raw_tok eq '#' );
3579 536         865 foreach my $lev_test (@levs) {
3580 564 100       1274 next if ( $lev > $lev_test );
3581 280         622 $rtoken_patterns->{$lev_test} .= $tok;
3582 280         422 push @{ $rtoken_indexes->{$lev_test} }, $itok;
  280         825  
3583             }
3584             }
3585 168         449 push @levs, $lev_top;
3586             }
3587              
3588 627         1075 push @{$rline_values},
  627         3505  
3589             [
3590             $lev_min, $lev_max, $rtoken_patterns, \@levs,
3591             $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3592             ];
3593              
3594             # debug
3595 627         1888 0 && do {
3596             local $LIST_SEPARATOR = ')(';
3597             print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
3598             foreach my $key ( sort keys %{$rtoken_patterns} ) {
3599             print "$key => $rtoken_patterns->{$key}\n";
3600             print "$key => @{$rtoken_indexes->{$key}}\n";
3601             }
3602             };
3603             } ## end loop over lines
3604 154         1330 return ( $rline_values, $all_monotonic );
3605             } ## end sub get_line_token_info
3606              
3607             sub prune_alignment_tree {
3608 154     154 0 475 my ($rlines) = @_;
3609 154         351 my $jmax = @{$rlines} - 1;
  154         437  
3610 154 50       574 return if ( $jmax <= 0 );
3611              
3612             # Vertical alignment in perltidy is done as an iterative process. The
3613             # starting point is to mark all possible alignment tokens ('=', ',', '=>',
3614             # etc) for vertical alignment. Then we have to delete all alignments
3615             # which, if actually made, would detract from overall alignment. This
3616             # is done in several phases of which this is one.
3617              
3618             # In this routine we look at the alignments of a group of lines as a
3619             # hierarchical tree. We will 'prune' the tree to limited depths if that
3620             # will improve overall alignment at the lower depths.
3621             # For each line we will be looking at its alignment patterns down to
3622             # different fixed depths. For each depth, we include all lower depths and
3623             # ignore all higher depths. We want to see if we can get alignment of a
3624             # larger group of lines if we ignore alignments at some lower depth.
3625             # Here is an # example:
3626              
3627             # for (
3628             # [ '$var', sub { join $_, "bar" }, 0, "bar" ],
3629             # [ 'CONSTANT', sub { join "foo", "bar" }, 0, "bar" ],
3630             # [ 'CONSTANT', sub { join "foo", "bar", 3 }, 1, "barfoo3" ],
3631             # [ '$myvar', sub { my $var; join $var, "bar" }, 0, "bar" ],
3632             # );
3633              
3634             # In the above example, all lines have three commas at the lowest depth
3635             # (zero), so if there were no other alignments, these lines would all
3636             # align considering only the zero depth alignment token. But some lines
3637             # have additional comma alignments at the next depth, so we need to decide
3638             # if we should drop those to keep the top level alignments, or keep those
3639             # for some additional low level alignments at the expense losing some top
3640             # level alignments. In this case we will drop the deeper level commas to
3641             # keep the entire collection aligned. But in some cases the decision could
3642             # go the other way.
3643              
3644             # The tree for this example at the zero depth has one node containing
3645             # all four lines, since they are identical at zero level (three commas).
3646             # At depth one, there are three 'children' nodes, namely:
3647             # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
3648             # - line 3, which has 2 commas at depth 1
3649             # - line4, which has a ';' and a ',' at depth 1
3650             # There are no deeper alignments in this example.
3651             # so the tree structure for this example is:
3652             #
3653             # depth 0 depth 1 depth 2
3654             # [lines 1-4] -- [line 1-2] - (empty)
3655             # | [line 3] - (empty)
3656             # | [line 4] - (empty)
3657              
3658             # We can carry this to any depth, but it is not really useful to go below
3659             # depth 2. To cleanly stop there, we will consider depth 2 to contain all
3660             # alignments at depth >=2.
3661              
3662 39     39   372 use constant EXPLAIN_PRUNE => 0;
  39         87  
  39         54050  
3663              
3664             #-------------------------------------------------------------------
3665             # Prune Tree Step 1. Start by scanning the lines and collecting info
3666             #-------------------------------------------------------------------
3667              
3668             # Note that the caller had this info but we have to redo this now because
3669             # alignment tokens may have been deleted.
3670 154         655 my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
3671              
3672             # If all the lines have levels which increase monotonically from left to
3673             # right, then the sweep-left-to-right pass can do a better job of alignment
3674             # than pruning, and without deleting alignments.
3675 154 100       1007 return if ($all_monotonic);
3676              
3677             # Contents of $rline_values
3678             # [
3679             # $lev_min, $lev_max, $rtoken_patterns, \@levs,
3680             # $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3681             # ];
3682              
3683             # We can work to any depth, but there is little advantage to working
3684             # to a a depth greater than 2
3685 31         106 my $MAX_DEPTH = 2;
3686              
3687             # This arrays will hold the tree of alignment tokens at different depths
3688             # for these lines.
3689 31         66 my @match_tree;
3690              
3691             # Tree nodes contain these values:
3692             # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
3693             # $nc_beg_p, $nc_end_p, $rindexes];
3694             # where
3695             # $depth = 0,1,2 = index of depth of the match
3696              
3697             # $jbeg beginning index j of the range of lines in this match
3698             # $jend ending index j of the range of lines in this match
3699             # $n_parent = index of the containing group at $depth-1, if it exists
3700             # $level = actual level of code being matched in this group
3701             # $pattern = alignment pattern being matched
3702             # $nc_beg_p = first child
3703             # $nc_end_p = last child
3704             # $rindexes = ref to token indexes
3705              
3706             # the patterns and levels of the current group being formed at each depth
3707 31         165 my ( @token_patterns_current, @levels_current, @token_indexes_current );
3708              
3709             # the patterns and levels of the next line being tested at each depth
3710 31         0 my ( @token_patterns_next, @levels_next, @token_indexes_next );
3711              
3712             #-----------------------------------------------------------
3713             # define a recursive worker subroutine for tree construction
3714             #-----------------------------------------------------------
3715              
3716             # This is a recursive routine which is called if a match condition changes
3717             # at any depth when a new line is encountered. It ends the match node
3718             # which changed plus all deeper nodes attached to it.
3719 31         0 my $end_node;
3720             $end_node = sub {
3721 321     321   616 my ( $depth, $jl, $n_parent ) = @_;
3722              
3723             # $depth is the tree depth
3724             # $jl is the index of the line
3725             # $n_parent is index of the parent node of this node
3726              
3727 321 100       633 return if ( $depth > $MAX_DEPTH );
3728              
3729             # end any current group at this depth
3730 234 100 100     784 if ( $jl >= 0
      66        
      100        
3731             && defined( $match_tree[$depth] )
3732 75         370 && @{ $match_tree[$depth] }
3733             && defined( $levels_current[$depth] ) )
3734             {
3735 69         166 $match_tree[$depth]->[-1]->[1] = $jl;
3736             }
3737              
3738             # Define the index of the node we will create below
3739 234         366 my $ng_self = 0;
3740 234 100       470 if ( defined( $match_tree[$depth] ) ) {
3741 75         134 $ng_self = @{ $match_tree[$depth] };
  75         145  
3742             }
3743              
3744             # end any next deeper child node(s)
3745 234         783 $end_node->( $depth + 1, $jl, $ng_self );
3746              
3747             # update the levels being matched
3748 234         489 $token_patterns_current[$depth] = $token_patterns_next[$depth];
3749 234         395 $token_indexes_current[$depth] = $token_indexes_next[$depth];
3750 234         427 $levels_current[$depth] = $levels_next[$depth];
3751              
3752             # Do not start a new group at this level if it is not being used
3753 234 100 66     1031 if ( !defined( $levels_next[$depth] )
      66        
3754             || $depth > 0
3755             && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
3756             {
3757 120         200 return;
3758             }
3759              
3760             # Create a node for the next group at this depth. We initially assume
3761             # that it will continue to $jmax, and correct that later if the node
3762             # ends earlier.
3763 114         195 push @{ $match_tree[$depth] },
  114         530  
3764             [
3765             $jl + 1, $jmax, $n_parent, $levels_current[$depth],
3766             $token_patterns_current[$depth],
3767             undef, undef, $token_indexes_current[$depth],
3768             ];
3769              
3770 114         246 return;
3771 31         296 }; ## end sub end_node
3772              
3773             #-----------------------------------------------------
3774             # Prune Tree Step 2. Loop to form the tree of matches.
3775             #-----------------------------------------------------
3776 31         140 foreach my $jp ( 0 .. $jmax ) {
3777              
3778             # working with two adjacent line indexes, 'm'=minus, 'p'=plus
3779 236         378 my $jm = $jp - 1;
3780              
3781             # Pull out needed values for the next line
3782             my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
3783             $is_monotonic, $imax_true, $imax )
3784 236         330 = @{ $rline_values->[$jp] };
  236         619  
3785              
3786             # Transfer levels and patterns for this line to the working arrays.
3787             # If the number of levels differs from our chosen MAX_DEPTH ...
3788             # if fewer than MAX_DEPTH: leave levels at missing depths undefined
3789             # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
3790 236         421 @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
  236         531  
3791 236 100       363 if ( @{$rlevs} > $MAX_DEPTH ) {
  236         500  
3792 5         12 $levels_next[$MAX_DEPTH] = $rlevs->[-1];
3793             }
3794 236         376 my $depth = 0;
3795 236         388 foreach my $item (@levels_next) {
3796             $token_patterns_next[$depth] =
3797 708 100       1327 defined($item) ? $rtoken_patterns->{$item} : undef;
3798             $token_indexes_next[$depth] =
3799 708 100       1206 defined($item) ? $rtoken_indexes->{$item} : undef;
3800 708         1008 $depth++;
3801             }
3802              
3803             # Look for a change in match groups...
3804              
3805             # Initialize on the first line
3806 236 100       880 if ( $jp == 0 ) {
    100          
    50          
3807 31         69 my $n_parent;
3808 31         114 $end_node->( 0, $jm, $n_parent );
3809             }
3810              
3811             # End groups if a hard flag has been set
3812             elsif ( $rlines->[$jm]->{'end_group'} ) {
3813 10         35 my $n_parent;
3814 10         42 $end_node->( 0, $jm, $n_parent );
3815             }
3816              
3817             # Continue at hanging side comment
3818             elsif ( $rlines->[$jp]->{'is_hanging_side_comment'} ) {
3819 0         0 next;
3820             }
3821              
3822             # Otherwise see if anything changed and update the tree if so
3823             else {
3824 195         497 foreach my $depth ( 0 .. $MAX_DEPTH ) {
3825              
3826 401         665 my $def_current = defined( $token_patterns_current[$depth] );
3827 401         557 my $def_next = defined( $token_patterns_next[$depth] );
3828 401 100 100     1034 last if ( !$def_current && !$def_next );
3829 253 100 100     1148 if ( !$def_current
      100        
3830             || !$def_next
3831             || $token_patterns_current[$depth] ne
3832             $token_patterns_next[$depth] )
3833             {
3834 46         89 my $n_parent;
3835 46 100 66     231 if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
3836 23         98 $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
  23         58  
3837             }
3838 46         146 $end_node->( $depth, $jm, $n_parent );
3839 46         107 last;
3840             }
3841             }
3842             }
3843             } ## end loop to form tree of matches
3844              
3845             #---------------------------------------------------------
3846             # Prune Tree Step 3. Make links from parent to child nodes
3847             #---------------------------------------------------------
3848              
3849             # It seemed cleaner to do this as a separate step rather than during tree
3850             # construction. The children nodes have links up to the parent node which
3851             # created them. Now make links in the opposite direction, so the parents
3852             # can find the children. We store the range of children nodes ($nc_beg,
3853             # $nc_end) of each parent with two additional indexes in the original array.
3854             # These will be undef if no children.
3855 31         245 foreach my $depth ( reverse( 1 .. $MAX_DEPTH ) ) {
3856 62 100       198 next unless defined( $match_tree[$depth] );
3857 32         71 my $nc_max = @{ $match_tree[$depth] } - 1;
  32         124  
3858 32         70 my $np_now;
3859 32         110 foreach my $nc ( 0 .. $nc_max ) {
3860 50         106 my $np = $match_tree[$depth]->[$nc]->[2];
3861 50 50       156 if ( !defined($np) ) {
3862              
3863             # shouldn't happen
3864             #print STDERR "lost child $np at depth $depth\n";
3865 0         0 next;
3866             }
3867 50 100 100     192 if ( !defined($np_now) || $np != $np_now ) {
3868 35         76 $np_now = $np;
3869 35         107 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
3870             }
3871 50         146 $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
3872             }
3873             } ## end loop to make links down to the child nodes
3874              
3875 31         64 EXPLAIN_PRUNE > 0 && do {
3876             print "Tree complete. Found these groups:\n";
3877             foreach my $depth ( 0 .. $MAX_DEPTH ) {
3878             Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
3879             }
3880             };
3881              
3882             #------------------------------------------------------
3883             # Prune Tree Step 4. Make a list of nodes to be deleted
3884             #------------------------------------------------------
3885              
3886             # list of lines with tokens to be deleted:
3887             # [$jbeg, $jend, $level_keep]
3888             # $jbeg..$jend is the range of line indexes,
3889             # $level_keep is the minimum level to keep
3890 31         126 my @delete_list;
3891              
3892             # Not currently used:
3893             # Groups with ending comma lists and their range of sizes:
3894             # $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
3895             ## my %ragged_comma_group;
3896              
3897             # We work with a list of nodes to visit at the next deeper depth.
3898             my @todo_list;
3899 31 50       124 if ( defined( $match_tree[0] ) ) {
3900 31         77 @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
  31         110  
3901             }
3902              
3903 31         98 foreach my $depth ( 0 .. $MAX_DEPTH ) {
3904 86 100       241 last if ( !@todo_list );
3905 55         93 my @todo_next;
3906 55         139 foreach my $np (@todo_list) {
3907             my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
3908             $rindexes_p )
3909 92         151 = @{ $match_tree[$depth]->[$np] };
  92         278  
3910 92         177 my $nlines_p = $jend_p - $jbeg_p + 1;
3911              
3912             # nothing to do if no children
3913 92 100       243 next unless defined($nc_beg_p);
3914              
3915             # Define the number of lines to either keep or delete a child node.
3916             # This is the key decision we have to make. We want to delete
3917             # short runs of matched lines, and keep long runs. It seems easier
3918             # for the eye to follow breaks in monotonic level changes than
3919             # non-monotonic level changes. For example, the following looks
3920             # best if we delete the lower level alignments:
3921              
3922             # [1] ~~ [];
3923             # [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
3924             # [ qr/o/, qr/a/ ] ~~ [ ["foo"], ["bar"] ];
3925             # [ "foo", "bar" ] ~~ [ qr/o/, qr/a/ ];
3926             # [ qr/o/, qr/a/ ] ~~ [ "foo", "bar" ];
3927             # $deep1 ~~ $deep1;
3928              
3929             # So we will use two thresholds.
3930 35         84 my $nmin_mono = $depth + 2;
3931 35         80 my $nmin_non_mono = $depth + 6;
3932 35 100       116 if ( $nmin_mono > $nlines_p - 1 ) {
3933 21         44 $nmin_mono = $nlines_p - 1;
3934             }
3935 35 100       131 if ( $nmin_non_mono > $nlines_p - 1 ) {
3936 31         69 $nmin_non_mono = $nlines_p - 1;
3937             }
3938              
3939             # loop to keep or delete each child node
3940 35         124 foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
3941             my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
3942             $nc_end_c )
3943 50         100 = @{ $match_tree[ $depth + 1 ]->[$nc] };
  50         187  
3944 50         121 my $nlines_c = $jend_c - $jbeg_c + 1;
3945 50         99 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
3946 50 100       136 my $nmin = $is_monotonic ? $nmin_mono : $nmin_non_mono;
3947 50 100       160 if ( $nlines_c < $nmin ) {
3948             ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
3949 22         80 push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
3950             }
3951             else {
3952             ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
3953 28         112 push @todo_next, $nc;
3954             }
3955             }
3956             }
3957 55         154 @todo_list = @todo_next;
3958             } ## end loop to mark nodes to delete
3959              
3960             #------------------------------------------------------------
3961             # Prune Tree Step 5. Loop to delete selected alignment tokens
3962             #------------------------------------------------------------
3963 31         194 foreach my $item (@delete_list) {
3964 22         36 my ( $jbeg, $jend, $level_keep ) = @{$item};
  22         61  
3965 22         55 foreach my $jj ( $jbeg .. $jend ) {
3966 28         44 my $line = $rlines->[$jj];
3967 28         49 my @idel;
3968 28         55 my $rtokens = $line->{'rtokens'};
3969 28         41 my $imax = @{$rtokens} - 2;
  28         54  
3970 28         71 foreach my $i ( 0 .. $imax ) {
3971 152         226 my $tok = $rtokens->[$i];
3972 152         244 my ( $raw_tok, $lev, $tag, $tok_count ) =
3973             decode_alignment_token($tok);
3974 152 100       344 if ( $lev > $level_keep ) {
3975 83         170 push @idel, $i;
3976             }
3977             }
3978 28 50       78 if (@idel) {
3979 28         79 delete_selected_tokens( $line, \@idel );
3980             }
3981             }
3982             } ## end loop to delete selected alignment tokens
3983              
3984 31         377 return;
3985             } ## end sub prune_alignment_tree
3986              
3987             sub Dump_tree_groups {
3988 0     0 0 0 my ( $rgroup, $msg ) = @_;
3989              
3990             # Debug routine
3991 0         0 print "$msg\n";
3992 0         0 local $LIST_SEPARATOR = ')(';
3993 0         0 foreach my $item ( @{$rgroup} ) {
  0         0  
3994 0         0 my @fix = @{$item};
  0         0  
3995 0 0       0 foreach my $val (@fix) { $val = "undef" unless defined $val; }
  0         0  
3996 0         0 $fix[4] = "...";
3997 0         0 print "(@fix)\n";
3998             }
3999 0         0 return;
4000             } ## end sub Dump_tree_groups
4001              
4002             { ## closure for sub is_marginal_match
4003              
4004             my %is_if_or;
4005             my %is_assignment;
4006             my %is_good_alignment;
4007              
4008             # This test did not give sufficiently better results to use as an update,
4009             # but the flag is worth keeping as a starting point for future testing.
4010 39     39   392 use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
  39         150  
  39         6292  
4011              
4012             BEGIN {
4013              
4014 39     39   227 my @q = qw(
4015             if unless or ||
4016             );
4017 39         204 @is_if_or{@q} = (1) x scalar(@q);
4018              
4019 39         232 @q = qw(
4020             = **= += *= &= <<= &&=
4021             -= /= |= >>= ||= //=
4022             .= %= ^=
4023             x=
4024             );
4025 39         385 @is_assignment{@q} = (1) x scalar(@q);
4026              
4027             # Vertically aligning on certain "good" tokens is usually okay
4028             # so we can be less restrictive in marginal cases.
4029 39         135 @q = qw( { ? => = );
4030 39         108 push @q, (',');
4031 39         222829 @is_good_alignment{@q} = (1) x scalar(@q);
4032             } ## end BEGIN
4033              
4034             sub is_marginal_match {
4035              
4036 256     256 0 818 my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
4037              
4038             # Decide if we should undo some or all of the common alignments of a
4039             # group of just two lines.
4040              
4041             # Given:
4042             # $line_0 and $line_1 - the two lines
4043             # $group_level = the indentation level of the group being processed
4044             # $imax_align = the maximum index of the common alignment tokens
4045             # of the two lines
4046             # $imax_prev = the maximum index of the common alignment tokens
4047             # with the line before $line_0 (=-1 of does not exist)
4048              
4049             # Return:
4050             # $is_marginal = true if the two lines should NOT be fully aligned
4051             # = false if the two lines can remain fully aligned
4052             # $imax_align = the index of the highest alignment token shared by
4053             # these two lines to keep if the match is marginal.
4054              
4055             # When we have an alignment group of just two lines like this, we are
4056             # working in the twilight zone of what looks good and what looks bad.
4057             # This routine is a collection of rules which work have been found to
4058             # work fairly well, but it will need to be updated from time to time.
4059              
4060 256         508 my $is_marginal = 0;
4061              
4062             #---------------------------------------
4063             # Always align certain special cases ...
4064             #---------------------------------------
4065 256 100 100     2170 if (
      100        
4066              
4067             # always keep alignments of a terminal else or ternary
4068             defined( $line_1->{'j_terminal_match'} )
4069              
4070             # always align lists
4071             || $line_0->{'list_type'}
4072              
4073             # always align hanging side comments
4074             || $line_1->{'is_hanging_side_comment'}
4075              
4076             )
4077             {
4078 127         430 return ( $is_marginal, $imax_align );
4079             }
4080              
4081 129         338 my $jmax_0 = $line_0->{'jmax'};
4082 129         352 my $jmax_1 = $line_1->{'jmax'};
4083 129         318 my $rtokens_1 = $line_1->{'rtokens'};
4084 129         273 my $rtokens_0 = $line_0->{'rtokens'};
4085 129         285 my $rfield_lengths_0 = $line_0->{'rfield_lengths'};
4086 129         257 my $rfield_lengths_1 = $line_1->{'rfield_lengths'};
4087 129         904 my $rpatterns_0 = $line_0->{'rpatterns'};
4088 129         297 my $rpatterns_1 = $line_1->{'rpatterns'};
4089 129         300 my $imax_next = $line_1->{'imax_pair'};
4090              
4091             # We will scan the alignment tokens and set a flag '$is_marginal' if
4092             # it seems that the an alignment would look bad.
4093 129         313 my $max_pad = 0;
4094 129         288 my $saw_good_alignment = 0;
4095 129         234 my $saw_if_or; # if we saw an 'if' or 'or' at group level
4096 129         281 my $raw_tokb = EMPTY_STRING; # first token seen at group level
4097 129         426 my $jfirst_bad;
4098             my $line_ending_fat_comma; # is last token just a '=>' ?
4099 129         0 my $j0_eq_pad;
4100 129         265 my $j0_max_pad = 0;
4101              
4102 129         481 foreach my $j ( 0 .. $jmax_1 - 2 ) {
4103 162         550 my ( $raw_tok, $lev, $tag, $tok_count ) =
4104             decode_alignment_token( $rtokens_1->[$j] );
4105 162 100 66     1084 if ( $raw_tok && $lev == $group_level ) {
4106 140 100       494 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
  119         269  
4107 140   100     697 $saw_if_or ||= $is_if_or{$raw_tok};
4108             }
4109              
4110             # When the first of the two lines ends in a bare '=>' this will
4111             # probably be marginal match. (For a bare =>, the next field length
4112             # will be 2 or 3, depending on side comment)
4113             $line_ending_fat_comma =
4114 162   100     979 $j == $jmax_1 - 2
4115             && $raw_tok eq '=>'
4116             && $rfield_lengths_0->[ $j + 1 ] <= 3;
4117              
4118 162         443 my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
4119 162 100       521 if ( $j == 0 ) {
4120             $pad += $line_1->{'leading_space_count'} -
4121 124         407 $line_0->{'leading_space_count'};
4122              
4123             # Remember the pad at a leading equals
4124 124 100 66     733 if ( $raw_tok eq '=' && $lev == $group_level ) {
4125 73         182 $j0_eq_pad = $pad;
4126 73         317 $j0_max_pad =
4127             0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
4128 73 100       306 $j0_max_pad = 4 if ( $j0_max_pad < 4 );
4129             }
4130             }
4131              
4132 162 100       535 if ( $pad < 0 ) { $pad = -$pad }
  36         111  
4133 162 100       508 if ( $pad > $max_pad ) { $max_pad = $pad }
  89         189  
4134 162 100 100     871 if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
4135 128         317 $saw_good_alignment = 1;
4136             }
4137             else {
4138 34 100       117 $jfirst_bad = $j unless defined($jfirst_bad);
4139             }
4140 162 100       663 if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
4141              
4142             # Flag this as a marginal match since patterns differ.
4143             # Normally, we will not allow just two lines to match if
4144             # marginal. But we can allow matching in some specific cases.
4145              
4146 33 100       150 $jfirst_bad = $j if ( !defined($jfirst_bad) );
4147 33 50       148 $is_marginal = 1 if ( $is_marginal == 0 );
4148 33 100       171 if ( $raw_tok eq '=' ) {
4149              
4150             # Here is an example of a marginal match:
4151             # $done{$$op} = 1;
4152             # $op = compile_bblock($op);
4153             # The left tokens are both identifiers, but
4154             # one accesses a hash and the other doesn't.
4155             # We'll let this be a tentative match and undo
4156             # it later if we don't find more than 2 lines
4157             # in the group.
4158 12         40 $is_marginal = 2;
4159             }
4160             }
4161             }
4162              
4163 129 50 66     735 $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
4164              
4165             # Turn off the "marginal match" flag in some cases...
4166             # A "marginal match" occurs when the alignment tokens agree
4167             # but there are differences in the other tokens (patterns).
4168             # If we leave the marginal match flag set, then the rule is that we
4169             # will align only if there are more than two lines in the group.
4170             # We will turn of the flag if we almost have a match
4171             # and either we have seen a good alignment token or we
4172             # just need a small pad (2 spaces) to fit. These rules are
4173             # the result of experimentation. Tokens which misaligned by just
4174             # one or two characters are annoying. On the other hand,
4175             # large gaps to less important alignment tokens are also annoying.
4176 129 100 100     555 if ( $is_marginal == 1
      100        
4177             && ( $saw_good_alignment || $max_pad < 3 ) )
4178             {
4179 17         39 $is_marginal = 0;
4180             }
4181              
4182             # We will use the line endings to help decide on alignments...
4183             # See if the lines end with semicolons...
4184 129         337 my $sc_term0;
4185             my $sc_term1;
4186 129 50 33     737 if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
4187              
4188             # shouldn't happen
4189             }
4190             else {
4191 129         396 my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
4192 129         362 my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
4193 129         818 $sc_term0 = $pat0 =~ /;b?$/;
4194 129         605 $sc_term1 = $pat1 =~ /;b?$/;
4195             }
4196              
4197 129 100 100     759 if ( !$is_marginal && !$sc_term0 ) {
4198              
4199             # First line of assignment should be semicolon terminated.
4200             # For example, do not align here:
4201             # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4202             # $$href{-NUM_DIRS} = 0;
4203 30 100       153 if ( $is_assignment{$raw_tokb} ) {
4204 1         4 $is_marginal = 1;
4205             }
4206             }
4207              
4208             # Try to avoid some undesirable alignments of opening tokens
4209             # for example, the space between grep and { here:
4210             # return map { ( $_ => $_ ) }
4211             # grep { /$handles/ } $self->_get_delegate_method_list;
4212             $is_marginal ||=
4213 129   100     1314 ( $raw_tokb eq '(' || $raw_tokb eq '{' )
      100        
4214             && $jmax_1 == 2
4215             && $sc_term0 ne $sc_term1;
4216              
4217             #---------------------------------------
4218             # return if this is not a marginal match
4219             #---------------------------------------
4220 129 100       451 if ( !$is_marginal ) {
4221 111         557 return ( $is_marginal, $imax_align );
4222             }
4223              
4224             # Undo the marginal match flag in certain cases,
4225              
4226             # Two lines with a leading equals-like operator are allowed to
4227             # align if the patterns to the left of the equals are the same.
4228             # For example the following two lines are a marginal match but have
4229             # the same left side patterns, so we will align the equals.
4230             # my $orig = my $format = "^<<<<< ~~\n";
4231             # my $abc = "abc";
4232             # But these have a different left pattern so they will not be
4233             # aligned
4234             # $xmldoc .= $`;
4235             # $self->{'leftovers'} .= "<bx-seq:seq" . $';
4236              
4237             # First line semicolon terminated but second not, usually ok:
4238             # my $want = "'ab', 'a', 'b'";
4239             # my $got = join( ", ",
4240             # map { defined($_) ? "'$_'" : "undef" }
4241             # @got );
4242             # First line not semicolon terminated, Not OK to match:
4243             # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4244             # $$href{-NUM_DIRS} = 0;
4245 18         54 my $pat0 = $rpatterns_0->[0];
4246 18         50 my $pat1 = $rpatterns_1->[0];
4247              
4248             #---------------------------------------------------------
4249             # Turn off the marginal flag for some types of assignments
4250             #---------------------------------------------------------
4251 18 100       103 if ( $is_assignment{$raw_tokb} ) {
    50          
    50          
4252              
4253             # undo marginal flag if first line is semicolon terminated
4254             # and leading patters match
4255 13 100       47 if ($sc_term0) { # && $sc_term1) {
4256 12         39 $is_marginal = $pat0 ne $pat1;
4257             }
4258             }
4259             elsif ( $raw_tokb eq '=>' ) {
4260              
4261             # undo marginal flag if patterns match
4262 0   0     0 $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
4263             }
4264             elsif ( $raw_tokb eq '=~' ) {
4265              
4266             # undo marginal flag if both lines are semicolon terminated
4267             # and leading patters match
4268 0 0 0     0 if ( $sc_term1 && $sc_term0 ) {
4269 0         0 $is_marginal = $pat0 ne $pat1;
4270             }
4271             }
4272             else {
4273             ##ok: (none of the above)
4274             }
4275              
4276             #-----------------------------------------------------
4277             # Turn off the marginal flag if we saw an 'if' or 'or'
4278             #-----------------------------------------------------
4279              
4280             # A trailing 'if' and 'or' often gives a good alignment
4281             # For example, we can align these:
4282             # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/;
4283             # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
4284              
4285             # or
4286             # $d_in_m[2] = 29 if ( &Date_LeapYear($y) );
4287             # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] );
4288              
4289 18 100       76 if ($saw_if_or) {
4290              
4291             # undo marginal flag if both lines are semicolon terminated
4292 4 50 33     24 if ( $sc_term0 && $sc_term1 ) {
4293 4         12 $is_marginal = 0;
4294             }
4295             }
4296              
4297             # For a marginal match, only keep matches before the first 'bad' match
4298 18 50 100     175 if ( $is_marginal
      66        
4299             && defined($jfirst_bad)
4300             && $imax_align > $jfirst_bad - 1 )
4301             {
4302 0         0 $imax_align = $jfirst_bad - 1;
4303             }
4304              
4305             #----------------------------------------------------------
4306             # Allow sweep to match lines with leading '=' in some cases
4307             #----------------------------------------------------------
4308 18 100 66     146 if ( $imax_align < 0 && defined($j0_eq_pad) ) {
4309              
4310 13 0 50     137 if (
      33        
      33        
4311              
4312             # If there is a following line with leading equals, or
4313             # preceding line with leading equals, then let the sweep align
4314             # them without restriction. For example, the first two lines
4315             # here are a marginal match, but they are followed by a line
4316             # with leading equals, so the sweep-lr logic can align all of
4317             # the lines:
4318              
4319             # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4320             # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4321             # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4322             # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4323              
4324             # Likewise, if we reverse the two pairs we want the same result
4325              
4326             # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4327             # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4328             # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4329             # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4330              
4331             (
4332             $imax_next >= 0
4333             || $imax_prev >= 0
4334             || TEST_MARGINAL_EQ_ALIGNMENT
4335             )
4336             && $j0_eq_pad >= -$j0_max_pad
4337             && $j0_eq_pad <= $j0_max_pad
4338             )
4339             {
4340              
4341             # But do not do this if there is a comma before the '='.
4342             # For example, the first two lines below have commas and
4343             # therefore are not allowed to align with lines 3 & 4:
4344              
4345             # my ( $x, $y ) = $self->Size(); #<--line_0
4346             # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
4347             # my $vx = $right - $left;
4348             # my $vy = $bottom - $top;
4349              
4350 0 0 0     0 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
4351 0         0 $imax_align = 0;
4352             }
4353             }
4354             }
4355              
4356 18         94 return ( $is_marginal, $imax_align );
4357             } ## end sub is_marginal_match
4358             } ## end closure for sub is_marginal_match
4359              
4360             sub get_extra_leading_spaces {
4361              
4362 376     376 0 1290 my ( $rlines, $rgroups ) = @_;
4363              
4364             #----------------------------------------------------------
4365             # Define any extra indentation space (for the -lp option).
4366             # Here is why:
4367             # If a list has side comments, sub scan_list must dump the
4368             # list before it sees everything. When this happens, it sets
4369             # the indentation to the standard scheme, but notes how
4370             # many spaces it would have liked to use. We may be able
4371             # to recover that space here in the event that all of the
4372             # lines of a list are back together again.
4373             #----------------------------------------------------------
4374              
4375 376 50 33     724 return 0 if ( !@{$rlines} || !@{$rgroups} );
  376         1445  
  376         1398  
4376              
4377 376         1087 my $object = $rlines->[0]->{'indentation'};
4378 376 100       1452 return 0 if ( !ref($object) );
4379 58         139 my $extra_leading_spaces = 0;
4380 58         242 my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
4381 58 100       224 return ($extra_leading_spaces) if ( !$extra_indentation_spaces_wanted );
4382              
4383 13         45 my $min_spaces = $extra_indentation_spaces_wanted;
4384 13 50       106 if ( $min_spaces > 0 ) { $min_spaces = 0 }
  13         29  
4385              
4386             # loop over all groups
4387 13         39 my $ng = -1;
4388 13         24 my $ngroups = @{$rgroups};
  13         30  
4389 13         30 foreach my $item ( @{$rgroups} ) {
  13         42  
4390 33         58 $ng++;
4391 33         64 my ( $jbeg, $jend ) = @{$item};
  33         71  
4392 33         92 foreach my $j ( $jbeg .. $jend ) {
4393 44 100       117 next if ( $j == 0 );
4394              
4395             # all indentation objects must be the same
4396 31 100       148 if ( $object != $rlines->[$j]->{'indentation'} ) {
4397 1         4 return 0;
4398             }
4399             }
4400              
4401             # find the maximum space without exceeding the line length for this group
4402 32         127 my $avail = $rlines->[$jbeg]->get_available_space_on_right();
4403 32 100       119 my $spaces =
4404             ( $avail > $extra_indentation_spaces_wanted )
4405             ? $extra_indentation_spaces_wanted
4406             : $avail;
4407              
4408             #--------------------------------------------------------
4409             # Note: min spaces can be negative; for example with -gnu
4410             # f(
4411             # do { 1; !!(my $x = bless []); }
4412             # );
4413             #--------------------------------------------------------
4414             # The following rule is needed to match older formatting:
4415             # For multiple groups, we will keep spaces non-negative.
4416             # For a single group, we will allow a negative space.
4417 32 50 66     153 if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
  0         0  
4418              
4419             # update the minimum spacing
4420 32 100 66     166 if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
4421 13         38 $extra_leading_spaces = $spaces;
4422             }
4423             }
4424              
4425             # update the indentation object because with -icp the terminal
4426             # ');' will use the same adjustment.
4427 12         184 $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
4428 12         62 return $extra_leading_spaces;
4429             } ## end sub get_extra_leading_spaces
4430              
4431             sub forget_side_comment {
4432 111     111 0 354 my ($self) = @_;
4433 111         295 $self->[_last_side_comment_column_] = 0;
4434 111         253 return;
4435             }
4436              
4437             sub is_good_side_comment_column {
4438 199     199 0 637 my ( $self, $line, $line_number, $level, $num5 ) = @_;
4439              
4440             # Upon encountering the first side comment of a group, decide if
4441             # a previous side comment should be forgotten. This involves
4442             # checking several rules.
4443              
4444             # Return true to KEEP old comment location
4445             # Return false to FORGET old comment location
4446 199         440 my $KEEP = 1;
4447 199         349 my $FORGET = 0;
4448              
4449 199         450 my $rfields = $line->{'rfields'};
4450 199         474 my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4451              
4452             # RULE1: Never forget comment before a hanging side comment
4453 199 100       1638 return $KEEP if ($is_hanging_side_comment);
4454              
4455             # RULE2: Forget a side comment after a short line difference,
4456             # where 'short line difference' is computed from a formula.
4457             # Using a smooth formula helps minimize sudden large changes.
4458 189         453 my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
4459 189         574 my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
4460              
4461             # '$num5' is the number of comments in the first 5 lines after the first
4462             # comment. It is needed to keep a compact group of side comments from
4463             # being influenced by a more distant side comment.
4464 189 50       509 $num5 = 1 if ( !$num5 );
4465              
4466             # Some values:
4467              
4468             # $adiff $num5 $short_diff
4469             # 0 * 12
4470             # 1 1 6
4471             # 1 2 4
4472             # 1 3 3
4473             # 1 4 2
4474             # 2 1 4
4475             # 2 2 2
4476             # 2 3 1
4477             # 3 1 3
4478             # 3 2 1
4479              
4480 189         591 my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
4481              
4482 189 100 100     1071 return $FORGET
4483             if ( $line_diff > $short_diff
4484             || !$self->[_rOpts_valign_side_comments_] );
4485              
4486             # RULE3: Forget a side comment if this line is at lower level and
4487             # ends a block
4488 122         270 my $last_sc_level = $self->[_last_side_comment_level_];
4489             return $FORGET
4490             if ( $level < $last_sc_level
4491 122 100 100     654 && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
4492              
4493             # RULE 4: Forget the last side comment if this comment might join a cached
4494             # line ...
4495 104 100       501 if ( my $cached_line_type = get_cached_line_type() ) {
4496              
4497             # ... otherwise side comment alignment will get messed up.
4498             # For example, in the following test script
4499             # with using 'perltidy -sct -act=2', the last comment would try to
4500             # align with the previous and then be in the wrong column when
4501             # the lines are combined:
4502              
4503             # foreach $line (
4504             # [0, 1, 2], [3, 4, 5], [6, 7, 8], # rows
4505             # [0, 3, 6], [1, 4, 7], [2, 5, 8], # columns
4506             # [0, 4, 8], [2, 4, 6]
4507             # ) # diagonals
4508 4 50 33     33 return $FORGET
4509             if ( $cached_line_type == 2 || $cached_line_type == 4 );
4510             }
4511              
4512             # Otherwise, keep it alive
4513 104         280 return $KEEP;
4514             } ## end sub is_good_side_comment_column
4515              
4516             sub align_side_comments {
4517              
4518 199     199 0 574 my ( $self, $rlines, $rgroups ) = @_;
4519              
4520             # Align any side comments in this batch of lines
4521              
4522             # Given:
4523             # $rlines - the lines
4524             # $rgroups - the partition of the lines into groups
4525             #
4526             # We will be working group-by-group because all side comments
4527             # (real or fake) in each group are already aligned. So we just have
4528             # to make alignments between groups wherever possible.
4529              
4530             # An unusual aspect is that within each group we have aligned both real
4531             # and fake side comments. This has the consequence that the lengths of
4532             # long lines without real side comments can cause 'push' all side comments
4533             # to the right. This seems unusual, but testing with and without this
4534             # feature shows that it is usually better this way. Otherwise, side
4535             # comments can be hidden between long lines without side comments and
4536             # thus be harder to read.
4537              
4538 199         492 my $group_level = $self->[_group_level_];
4539 199   100     871 my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
4540             && $group_level == $self->[_last_level_written_];
4541              
4542             # Find groups with side comments, and remember the first nonblank comment
4543 199         425 my $j_sc_beg;
4544             my @todo;
4545 199         395 my $ng = -1;
4546 199         401 foreach my $item ( @{$rgroups} ) {
  199         553  
4547 312         520 $ng++;
4548 312         484 my ( $jbeg, $jend ) = @{$item};
  312         681  
4549 312         743 foreach my $j ( $jbeg .. $jend ) {
4550 346         657 my $line = $rlines->[$j];
4551 346         627 my $jmax = $line->{'jmax'};
4552 346 100       1075 if ( $line->{'rfield_lengths'}->[$jmax] ) {
4553              
4554             # this group has a line with a side comment
4555 228         550 push @todo, $ng;
4556 228 100       723 if ( !defined($j_sc_beg) ) {
4557 199         381 $j_sc_beg = $j;
4558             }
4559 228         583 last;
4560             }
4561             }
4562             }
4563              
4564             # done if no groups with side comments
4565 199 50       816 return unless @todo;
4566              
4567             # Count $num5 = number of comments in the 5 lines after the first comment
4568             # This is an important factor in a decision formula
4569 199         473 my $num5 = 1;
4570 199         499 foreach my $jj ( $j_sc_beg + 1 .. @{$rlines} - 1 ) {
  199         561  
4571 194         399 my $ldiff = $jj - $j_sc_beg;
4572 194 100       469 last if ( $ldiff > 5 );
4573 190         357 my $line = $rlines->[$jj];
4574 190         349 my $jmax = $line->{'jmax'};
4575 190         335 my $sc_len = $line->{'rfield_lengths'}->[$jmax];
4576 190 100       497 next if ( !$sc_len );
4577 121         233 $num5++;
4578             }
4579              
4580             # Forget the old side comment location if necessary
4581 199         566 my $line_0 = $rlines->[$j_sc_beg];
4582 199         1430 my $lnum =
4583             $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
4584 199         967 my $keep_it =
4585             $self->is_good_side_comment_column( $line_0, $lnum, $group_level, $num5 );
4586 199 100       679 my $last_side_comment_column =
4587             $keep_it ? $self->[_last_side_comment_column_] : 0;
4588              
4589             # If there are multiple groups we will do two passes
4590             # so that we can find a common alignment for all groups.
4591 199 100       588 my $MAX_PASS = @todo > 1 ? 2 : 1;
4592              
4593             # Loop over passes
4594 199         403 my $max_comment_column = $last_side_comment_column;
4595 199         559 foreach my $PASS ( 1 .. $MAX_PASS ) {
4596              
4597             # If there are two passes, then on the last pass make the old column
4598             # equal to the largest of the group. This will result in the comments
4599             # being aligned if possible.
4600 223 100       609 if ( $PASS == $MAX_PASS ) {
4601 199         381 $last_side_comment_column = $max_comment_column;
4602             }
4603              
4604             # Loop over the groups with side comments
4605 223         399 my $column_limit;
4606 223         581 foreach my $ng (@todo) {
4607 281         491 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
  281         674  
4608              
4609             # Note that since all lines in a group have common alignments, we
4610             # just have to work on one of the lines (the first line).
4611 281         573 my $line = $rlines->[$jbeg];
4612 281         541 my $jmax = $line->{'jmax'};
4613 281         551 my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4614             last
4615 281 100 100     925 if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
4616              
4617             # the maximum space without exceeding the line length:
4618 277         1011 my $avail = $line->get_available_space_on_right();
4619              
4620             # try to use the previous comment column
4621 277         1029 my $side_comment_column = $line->get_column( $jmax - 1 );
4622 277         743 my $move = $last_side_comment_column - $side_comment_column;
4623              
4624             # Remember the maximum possible column of the first line with
4625             # side comment
4626 277 100       847 if ( !defined($column_limit) ) {
4627 223         441 $column_limit = $side_comment_column + $avail;
4628             }
4629              
4630 277 50       760 next if ( $jmax <= 0 );
4631              
4632             # but if this doesn't work, give up and use the minimum space
4633 277         589 my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
4634 277 100       834 if ( $move > $avail ) {
4635 13         33 $move = $min_move;
4636             }
4637              
4638             # but we want some minimum space to the comment
4639 277 100 100     1330 if ( $move >= 0
      100        
4640             && $j_sc_beg == 0
4641             && $continuing_sc_flow )
4642             {
4643 3         11 $min_move = 0;
4644             }
4645              
4646             # remove constraints on hanging side comments
4647 277 100       713 if ($is_hanging_side_comment) { $min_move = 0 }
  14         25  
4648              
4649 277 100       756 if ( $move < $min_move ) {
4650 194         357 $move = $min_move;
4651             }
4652              
4653             # don't exceed the available space
4654 277 100       637 if ( $move > $avail ) { $move = $avail }
  11         36  
4655              
4656             # We can only increase space, never decrease.
4657 277 100       706 if ( $move < 0 ) { $move = 0 }
  8         14  
4658              
4659             # Discover the largest column on the preliminary pass
4660 277 100       675 if ( $PASS < $MAX_PASS ) {
4661 49         140 my $col = $line->get_column( $jmax - 1 ) + $move;
4662              
4663             # but ignore columns too large for the starting line
4664 49 100 66     326 if ( $col > $max_comment_column && $col < $column_limit ) {
4665 23         63 $max_comment_column = $col;
4666             }
4667             }
4668              
4669             # Make the changes on the final pass
4670             else {
4671 228         1047 $line->increase_field_width( $jmax - 1, $move );
4672              
4673             # remember this column for the next group
4674 228         993 $last_side_comment_column = $line->get_column( $jmax - 1 );
4675             }
4676             } ## end loop over groups
4677             } ## end loop over passes
4678              
4679             # Find the last side comment
4680 199         489 my $j_sc_last;
4681 199         458 my $ng_last = $todo[-1];
4682 199         361 my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
  199         566  
4683 199         683 foreach my $jj ( reverse( $jbeg .. $jend ) ) {
4684 201         474 my $line = $rlines->[$jj];
4685 201         407 my $jmax = $line->{'jmax'};
4686 201 100       613 if ( $line->{'rfield_lengths'}->[$jmax] ) {
4687 199         394 $j_sc_last = $jj;
4688 199         419 last;
4689             }
4690             }
4691              
4692             # Save final side comment info for possible use by the next batch
4693 199 50       604 if ( defined($j_sc_last) ) {
4694 199         758 my $line_number =
4695             $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
4696 199         509 $self->[_last_side_comment_column_] = $last_side_comment_column;
4697 199         387 $self->[_last_side_comment_line_number_] = $line_number;
4698 199         422 $self->[_last_side_comment_level_] = $group_level;
4699             }
4700 199         500 return;
4701             } ## end sub align_side_comments
4702              
4703             ###############################
4704             # CODE SECTION 6: Output Step A
4705             ###############################
4706              
4707             sub valign_output_step_A {
4708              
4709             #------------------------------------------------------------
4710             # This is Step A in writing vertically aligned lines.
4711             # The line is prepared according to the alignments which have
4712             # been found. Then it is shipped to the next step.
4713             #------------------------------------------------------------
4714              
4715 3066     3066 0 6742 my ( $self, $rinput_hash ) = @_;
4716              
4717 3066         5910 my $line = $rinput_hash->{line};
4718 3066         5059 my $min_ci_gap = $rinput_hash->{min_ci_gap};
4719 3066         5181 my $do_not_align = $rinput_hash->{do_not_align};
4720 3066         4918 my $group_leader_length = $rinput_hash->{group_leader_length};
4721 3066         4963 my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
4722 3066         5043 my $level = $rinput_hash->{level};
4723 3066         4944 my $maximum_line_length = $rinput_hash->{maximum_line_length};
4724              
4725 3066         5324 my $rfields = $line->{'rfields'};
4726 3066         5031 my $rfield_lengths = $line->{'rfield_lengths'};
4727 3066         4929 my $leading_space_count = $line->{'leading_space_count'};
4728 3066         4889 my $outdent_long_lines = $line->{'outdent_long_lines'};
4729 3066         5206 my $maximum_field_index = $line->{'jmax'};
4730 3066         5026 my $rvertical_tightness_flags = $line->{'rvertical_tightness_flags'};
4731 3066         5407 my $Kend = $line->{'Kend'};
4732 3066         5424 my $level_end = $line->{'level_end'};
4733              
4734             # Check for valid hash keys at end of lifetime of $line during development
4735 3066         4353 DEVEL_MODE
4736             && check_keys( $line, \%valid_LINE_keys,
4737             "Checking line keys at valign_output_step_A", 1 );
4738              
4739             # add any extra spaces
4740 3066 100       6951 if ( $leading_space_count > $group_leader_length ) {
4741 47         208 $leading_space_count += $min_ci_gap;
4742             }
4743              
4744 3066         6365 my $str = $rfields->[0];
4745 3066         5041 my $str_len = $rfield_lengths->[0];
4746              
4747 3066         4801 my @alignments = @{ $line->{'ralignments'} };
  3066         7365  
4748 3066 50       8137 if ( @alignments != $maximum_field_index + 1 ) {
4749              
4750             # Shouldn't happen: sub install_new_alignments makes jmax alignments
4751 0         0 my $jmax_alignments = @alignments - 1;
4752 0         0 if (DEVEL_MODE) {
4753             Fault(
4754             "alignment jmax=$jmax_alignments should equal $maximum_field_index\n"
4755             );
4756             }
4757 0         0 $do_not_align = 1;
4758             }
4759              
4760             # loop to concatenate all fields of this line and needed padding
4761 3066         5408 my $total_pad_count = 0;
4762 3066         6869 for my $j ( 1 .. $maximum_field_index ) {
4763              
4764             # skip zero-length side comments
4765             last
4766             if (
4767 7285 100 66     25744 ( $j == $maximum_field_index )
      100        
4768             && ( !defined( $rfields->[$j] )
4769             || ( $rfield_lengths->[$j] == 0 ) )
4770             );
4771              
4772             # compute spaces of padding before this field
4773 4544         9194 my $col = $alignments[ $j - 1 ]->{'column'};
4774 4544         7798 my $pad = $col - ( $str_len + $leading_space_count );
4775              
4776 4544 50       8691 if ($do_not_align) {
4777 0 0       0 $pad =
4778             ( $j < $maximum_field_index )
4779             ? 0
4780             : $self->[_rOpts_minimum_space_to_comment_] - 1;
4781             }
4782              
4783             # if the -fpsc flag is set, move the side comment to the selected
4784             # column if and only if it is possible, ignoring constraints on
4785             # line length and minimum space to comment
4786 4544 100 100     10201 if ( $self->[_rOpts_fixed_position_side_comment_]
4787             && $j == $maximum_field_index )
4788             {
4789 9         22 my $newpad =
4790             $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
4791 9 50       20 if ( $newpad >= 0 ) { $pad = $newpad; }
  9         15  
4792             }
4793              
4794             # accumulate the padding
4795 4544 100       9245 if ( $pad > 0 ) { $total_pad_count += $pad; }
  1321         2190  
4796              
4797             # only add padding when we have a finite field;
4798             # this avoids extra terminal spaces if we have empty fields
4799 4544 100       8789 if ( $rfield_lengths->[$j] > 0 ) {
4800 4533         9211 $str .= SPACE x $total_pad_count;
4801 4533         6403 $str_len += $total_pad_count;
4802 4533         6382 $total_pad_count = 0;
4803 4533         7857 $str .= $rfields->[$j];
4804 4533         8005 $str_len += $rfield_lengths->[$j];
4805             }
4806             else {
4807 11         35 $total_pad_count = 0;
4808             }
4809             }
4810              
4811 3066         5721 my $side_comment_length = $rfield_lengths->[$maximum_field_index];
4812              
4813             # ship this line off
4814 3066         26109 $self->valign_output_step_B(
4815             {
4816             leading_space_count => $leading_space_count + $extra_leading_spaces,
4817             line => $str,
4818             line_length => $str_len,
4819             side_comment_length => $side_comment_length,
4820             outdent_long_lines => $outdent_long_lines,
4821             rvertical_tightness_flags => $rvertical_tightness_flags,
4822             level => $level,
4823             level_end => $level_end,
4824             Kend => $Kend,
4825             maximum_line_length => $maximum_line_length,
4826             }
4827             );
4828 3066         15274 return;
4829             } ## end sub valign_output_step_A
4830              
4831             sub combine_fields {
4832              
4833             # We have a group of two lines for which we do not want to align tokens
4834             # between index $imax_align and the side comment. So we will delete fields
4835             # between $imax_align and the side comment. Alignments have already
4836             # been set so we have to adjust them.
4837              
4838 14     14 0 57 my ( $line_0, $line_1, $imax_align ) = @_;
4839              
4840 14 50       50 if ( !defined($imax_align) ) { $imax_align = -1 }
  0         0  
4841              
4842             # First delete the unwanted tokens
4843 14         51 my $jmax_old = $line_0->{'jmax'};
4844 14         73 my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
4845 14 50       58 return if ( !@idel );
4846              
4847             # Get old alignments before any changes are made
4848 14         36 my @old_alignments = @{ $line_0->{'ralignments'} };
  14         62  
4849              
4850 14         53 foreach my $line ( $line_0, $line_1 ) {
4851 28         86 delete_selected_tokens( $line, \@idel );
4852             }
4853              
4854             # Now adjust the alignments. Note that the side comment alignment
4855             # is always at jmax-1, and there is an ending alignment at jmax.
4856 14         61 my @new_alignments;
4857 14 50       128 if ( $imax_align >= 0 ) {
4858 0         0 @new_alignments[ 0 .. $imax_align ] =
4859             @old_alignments[ 0 .. $imax_align ];
4860             }
4861              
4862 14         70 my $jmax_new = $line_0->{'jmax'};
4863              
4864 14         60 $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
4865 14         43 $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
4866 14         61 $line_0->{'ralignments'} = \@new_alignments;
4867 14         43 $line_1->{'ralignments'} = \@new_alignments;
4868 14         68 return;
4869             } ## end sub combine_fields
4870              
4871             sub get_output_line_number {
4872              
4873             # The output line number reported to a caller =
4874             # the number of items still in the buffer +
4875             # the number of items written.
4876 49     49 0 160 return $_[0]->group_line_count() +
4877             $_[0]->[_file_writer_object_]->get_output_line_number();
4878             } ## end sub get_output_line_number
4879              
4880             ###############################
4881             # CODE SECTION 7: Output Step B
4882             ###############################
4883              
4884             { ## closure for sub valign_output_step_B
4885              
4886             # These are values for a cache used by valign_output_step_B.
4887             my $cached_line_text;
4888             my $cached_line_text_length;
4889             my $cached_line_type;
4890             my $cached_line_opening_flag;
4891             my $cached_line_closing_flag;
4892             my $cached_seqno;
4893             my $cached_line_valid;
4894             my $cached_line_leading_space_count;
4895             my $cached_seqno_string;
4896             my $cached_line_Kend;
4897             my $cached_line_maximum_length;
4898              
4899             # These are passed to step_C:
4900             my $seqno_string;
4901             my $last_nonblank_seqno_string;
4902              
4903             sub set_last_nonblank_seqno_string {
4904 394     394 0 873 my ($val) = @_;
4905 394         617 $last_nonblank_seqno_string = $val;
4906 394         676 return;
4907             }
4908              
4909             sub get_cached_line_opening_flag {
4910 224     224 0 471 return $cached_line_opening_flag;
4911             }
4912              
4913             sub get_cached_line_type {
4914 7489     7489 0 14004 return $cached_line_type;
4915             }
4916              
4917             sub set_cached_line_valid {
4918 3     3 0 9 my ($val) = @_;
4919 3         7 $cached_line_valid = $val;
4920 3         7 return;
4921             }
4922              
4923             sub get_cached_seqno {
4924 224     224 0 568 return $cached_seqno;
4925             }
4926              
4927             sub initialize_step_B_cache {
4928              
4929             # valign_output_step_B cache:
4930 561     561 0 1927 $cached_line_text = EMPTY_STRING;
4931 561         1343 $cached_line_text_length = 0;
4932 561         1324 $cached_line_type = 0;
4933 561         1280 $cached_line_opening_flag = 0;
4934 561         1244 $cached_line_closing_flag = 0;
4935 561         1298 $cached_seqno = 0;
4936 561         1210 $cached_line_valid = 0;
4937 561         1127 $cached_line_leading_space_count = 0;
4938 561         1169 $cached_seqno_string = EMPTY_STRING;
4939 561         1103 $cached_line_Kend = undef;
4940 561         1208 $cached_line_maximum_length = undef;
4941              
4942             # These vars hold a string of sequence numbers joined together used by
4943             # the cache
4944 561         1441 $seqno_string = EMPTY_STRING;
4945 561         1229 $last_nonblank_seqno_string = EMPTY_STRING;
4946 561         1113 return;
4947             } ## end sub initialize_step_B_cache
4948              
4949             sub _flush_step_B_cache {
4950 1818     1818   3980 my ($self) = @_;
4951              
4952             # Send any text in the step_B cache on to step_C
4953 1818 100       4525 if ($cached_line_type) {
4954 1         3 $seqno_string = $cached_seqno_string;
4955 1         5 $self->valign_output_step_C(
4956             $seqno_string,
4957             $last_nonblank_seqno_string,
4958              
4959             $cached_line_text,
4960             $cached_line_leading_space_count,
4961             $self->[_last_level_written_],
4962             $cached_line_Kend,
4963             );
4964 1         2 $cached_line_type = 0;
4965 1         3 $cached_line_text = EMPTY_STRING;
4966 1         1 $cached_line_text_length = 0;
4967 1         3 $cached_seqno_string = EMPTY_STRING;
4968 1         10 $cached_line_Kend = undef;
4969 1         2 $cached_line_maximum_length = undef;
4970             }
4971 1818         3101 return;
4972             } ## end sub _flush_step_B_cache
4973              
4974             sub handle_cached_line {
4975              
4976 158     158 0 469 my ( $self, $rinput, $leading_string, $leading_string_length ) = @_;
4977              
4978             # The cached line will either be:
4979             # - passed along to step_C, or
4980             # - or combined with the current line
4981              
4982 158         334 my $last_level_written = $self->[_last_level_written_];
4983              
4984 158         316 my $leading_space_count = $rinput->{leading_space_count};
4985 158         329 my $str = $rinput->{line};
4986 158         282 my $str_length = $rinput->{line_length};
4987 158         313 my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
4988 158         310 my $level = $rinput->{level};
4989 158         326 my $level_end = $rinput->{level_end};
4990 158         286 my $maximum_line_length = $rinput->{maximum_line_length};
4991              
4992 158         331 my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
4993             $seqno_beg, $seqno_end );
4994 158 50       426 if ($rvertical_tightness_flags) {
4995              
4996 158         278 $open_or_close = $rvertical_tightness_flags->{_vt_type};
4997 158         292 $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
4998             }
4999              
5000             # Dump an invalid cached line
5001 158 100 100     1036 if ( !$cached_line_valid ) {
    100          
5002 91         277 $self->valign_output_step_C(
5003             $seqno_string,
5004             $last_nonblank_seqno_string,
5005              
5006             $cached_line_text,
5007             $cached_line_leading_space_count,
5008             $last_level_written,
5009             $cached_line_Kend,
5010             );
5011             }
5012              
5013             # Handle cached line ending in OPENING tokens
5014             elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
5015              
5016 30         77 my $gap = $leading_space_count - $cached_line_text_length;
5017              
5018             # handle option of just one tight opening per line:
5019 30 100       106 if ( $cached_line_opening_flag == 1 ) {
5020 14 50 33     77 if ( defined($open_or_close) && $open_or_close == 1 ) {
5021 0         0 $gap = -1;
5022             }
5023             }
5024              
5025             # Do not join the lines if this might produce a one-line
5026             # container which exceeds the maximum line length. This is
5027             # necessary prevent blinking, particularly with the combination
5028             # -xci -pvt=2. In that case a one-line block alternately forms
5029             # and breaks, causing -xci to alternately turn on and off (case
5030             # b765).
5031             # Patched to fix cases b656 b862 b971 b972: always do the check
5032             # if the maximum line length changes (due to -vmll).
5033 30 50 33     228 if (
      66        
5034             $gap >= 0
5035             && ( $maximum_line_length != $cached_line_maximum_length
5036             || ( defined($level_end) && $level > $level_end ) )
5037             )
5038             {
5039 0         0 my $test_line_length =
5040             $cached_line_text_length + $gap + $str_length;
5041              
5042             # Add a small tolerance in the length test (fixes case b862)
5043 0 0       0 if ( $test_line_length > $cached_line_maximum_length - 2 ) {
5044 0         0 $gap = -1;
5045             }
5046             }
5047              
5048 30 100 66     156 if ( $gap >= 0 && defined($seqno_beg) ) {
5049 18         44 $maximum_line_length = $cached_line_maximum_length;
5050 18         64 $leading_string = $cached_line_text . SPACE x $gap;
5051 18         36 $leading_string_length = $cached_line_text_length + $gap;
5052 18         49 $leading_space_count = $cached_line_leading_space_count;
5053 18         65 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5054 18         52 $level = $last_level_written;
5055             }
5056             else {
5057 12         54 $self->valign_output_step_C(
5058             $seqno_string,
5059             $last_nonblank_seqno_string,
5060              
5061             $cached_line_text,
5062             $cached_line_leading_space_count,
5063             $last_level_written,
5064             $cached_line_Kend,
5065             );
5066             }
5067             }
5068              
5069             # Handle cached line ending in CLOSING tokens
5070             else {
5071 37         178 my $test_line =
5072             $cached_line_text . SPACE x $cached_line_closing_flag . $str;
5073 37         105 my $test_line_length =
5074             $cached_line_text_length +
5075             $cached_line_closing_flag +
5076             $str_length;
5077 37 100 66     471 if (
      66        
      100        
5078              
5079             # The new line must start with container
5080             $seqno_beg
5081              
5082             # The container combination must be okay..
5083             && (
5084              
5085             # okay to combine like types
5086             ( $open_or_close == $cached_line_type )
5087              
5088             # closing block brace may append to non-block
5089             || ( $cached_line_type == 2 && $open_or_close == 4 )
5090              
5091             # something like ');'
5092             || ( !$open_or_close && $cached_line_type == 2 )
5093              
5094             )
5095              
5096             # The combined line must fit
5097             && ( $test_line_length <= $cached_line_maximum_length )
5098             )
5099             {
5100              
5101 33         96 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5102              
5103             # Patch to outdent closing tokens ending # in ');' If we
5104             # are joining a line like ');' to a previous stacked set of
5105             # closing tokens, then decide if we may outdent the
5106             # combined stack to the indentation of the ');'. Since we
5107             # should not normally outdent any of the other tokens more
5108             # than the indentation of the lines that contained them, we
5109             # will only do this if all of the corresponding opening
5110             # tokens were on the same line. This can happen with -sot
5111             # and -sct.
5112              
5113             # For example, it is ok here:
5114             # __PACKAGE__->load_components( qw(
5115             # PK::Auto
5116             # Core
5117             # ));
5118             #
5119             # But, for example, we do not outdent in this example
5120             # because that would put the closing sub brace out farther
5121             # than the opening sub brace:
5122             #
5123             # perltidy -sot -sct
5124             # $c->Tk::bind(
5125             # '<Control-f>' => sub {
5126             # my ($c) = @_;
5127             # my $e = $c->XEvent;
5128             # itemsUnderArea $c;
5129             # } );
5130             #
5131 33 100 100     301 if ( $str =~ /^\);/
5132             && $cached_line_text =~ /^[\)\}\]\s]*$/ )
5133             {
5134              
5135             # The way to tell this is if the stacked sequence
5136             # numbers of this output line are the reverse of the
5137             # stacked sequence numbers of the previous non-blank
5138             # line of sequence numbers. So we can join if the
5139             # previous nonblank string of tokens is the mirror
5140             # image. For example if stack )}] is 13:8:6 then we
5141             # are looking for a leading stack like [{( which
5142             # is 6:8:13. We only need to check the two ends,
5143             # because the intermediate tokens must fall in order.
5144             # Note on speed: having to split on colons and
5145             # eliminate multiple colons might appear to be slow,
5146             # but it's not an issue because we almost never come
5147             # through here. In a typical file we don't.
5148              
5149 4         13 $seqno_string =~ s/^:+//;
5150 4         11 $last_nonblank_seqno_string =~ s/^:+//;
5151 4         18 $seqno_string =~ s/:+/:/g;
5152 4         19 $last_nonblank_seqno_string =~ s/:+/:/g;
5153              
5154             # how many spaces can we outdent?
5155 4         19 my $diff =
5156             $cached_line_leading_space_count - $leading_space_count;
5157 4 100 33     52 if ( $diff > 0
      66        
5158             && length($seqno_string)
5159             && length($last_nonblank_seqno_string) ==
5160             length($seqno_string) )
5161             {
5162 3         31 my @seqno_last =
5163             ( split /:/, $last_nonblank_seqno_string );
5164 3         12 my @seqno_now = ( split /:/, $seqno_string );
5165 3 50 33     41 if ( @seqno_now
      33        
      33        
5166             && @seqno_last
5167             && $seqno_now[-1] == $seqno_last[0]
5168             && $seqno_now[0] == $seqno_last[-1] )
5169             {
5170              
5171             # OK to outdent ..
5172             # for absolute safety, be sure we only remove
5173             # whitespace
5174 3         9 my $ws = substr( $test_line, 0, $diff );
5175 3 50 33     32 if ( ( length($ws) == $diff )
5176             && $ws =~ /^\s+$/ )
5177             {
5178              
5179 3         9 $test_line = substr( $test_line, $diff );
5180 3         7 $cached_line_leading_space_count -= $diff;
5181 3         14 $last_level_written =
5182             $self->level_change(
5183             $cached_line_leading_space_count,
5184             $diff, $last_level_written );
5185 3         13 $self->reduce_valign_buffer_indentation($diff);
5186             }
5187              
5188             # shouldn't happen, but not critical:
5189             ##else {
5190             ## ERROR transferring indentation here
5191             ##}
5192             }
5193             }
5194             }
5195              
5196             # Change the args to look like we received the combined line
5197 33         72 $str = $test_line;
5198 33         59 $str_length = $test_line_length;
5199 33         65 $leading_string = EMPTY_STRING;
5200 33         61 $leading_string_length = 0;
5201 33         59 $leading_space_count = $cached_line_leading_space_count;
5202 33         62 $level = $last_level_written;
5203 33         79 $maximum_line_length = $cached_line_maximum_length;
5204             }
5205             else {
5206 4         19 $self->valign_output_step_C(
5207             $seqno_string,
5208             $last_nonblank_seqno_string,
5209              
5210             $cached_line_text,
5211             $cached_line_leading_space_count,
5212             $last_level_written,
5213             $cached_line_Kend,
5214             );
5215             }
5216             }
5217 158         873 return ( $str, $str_length, $leading_string, $leading_string_length,
5218             $leading_space_count, $level, $maximum_line_length, );
5219              
5220             } ## end sub handle_cached_line
5221              
5222             sub valign_output_step_B {
5223              
5224             #---------------------------------------------------------
5225             # This is Step B in writing vertically aligned lines.
5226             # Vertical tightness is applied according to preset flags.
5227             # In particular this routine handles stacking of opening
5228             # and closing tokens.
5229             #---------------------------------------------------------
5230              
5231 7384     7384 0 15218 my ( $self, $rinput ) = @_;
5232              
5233 7384         13203 my $leading_space_count = $rinput->{leading_space_count};
5234 7384         13455 my $str = $rinput->{line};
5235 7384         11368 my $str_length = $rinput->{line_length};
5236 7384         11431 my $side_comment_length = $rinput->{side_comment_length};
5237 7384         11697 my $outdent_long_lines = $rinput->{outdent_long_lines};
5238 7384         11155 my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
5239 7384         11400 my $level = $rinput->{level};
5240 7384         11411 my $level_end = $rinput->{level_end};
5241 7384         11524 my $Kend = $rinput->{Kend};
5242 7384         11688 my $maximum_line_length = $rinput->{maximum_line_length};
5243              
5244             # Useful -gcs test cases for wide characters are
5245             # perl527/(method.t.2, reg_mesg.t, mime-header.t)
5246              
5247             # handle outdenting of long lines:
5248 7384         10687 my $is_outdented_line;
5249 7384 100       15338 if ($outdent_long_lines) {
5250 276         745 my $excess =
5251             $str_length -
5252             $side_comment_length +
5253             $leading_space_count -
5254             $maximum_line_length;
5255 276 100       1020 if ( $excess > 0 ) {
5256 10         21 $leading_space_count = 0;
5257 10         28 my $file_writer_object = $self->[_file_writer_object_];
5258 10         39 my $last_outdented_line_at =
5259             $file_writer_object->get_output_line_number();
5260 10         24 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
5261              
5262 10         25 my $outdented_line_count = $self->[_outdented_line_count_];
5263 10 100       30 if ( !$outdented_line_count ) {
5264 3         8 $self->[_first_outdented_line_at_] =
5265             $last_outdented_line_at;
5266             }
5267 10         19 $outdented_line_count++;
5268 10         19 $self->[_outdented_line_count_] = $outdented_line_count;
5269 10         19 $is_outdented_line = 1;
5270             }
5271             }
5272              
5273             # Make preliminary leading whitespace. It could get changed
5274             # later by entabbing, so we have to keep track of any changes
5275             # to the leading_space_count from here on.
5276 7384 100       19017 my $leading_string =
5277             $leading_space_count > 0
5278             ? ( SPACE x $leading_space_count )
5279             : EMPTY_STRING;
5280 7384         12040 my $leading_string_length = length($leading_string);
5281              
5282             # Unpack any recombination data; it was packed by
5283             # sub 'Formatter::set_vertical_tightness_flags'
5284              
5285             # old hash Meaning
5286             # index key
5287             #
5288             # 0 _vt_type: 1=opening non-block 2=closing non-block
5289             # 3=opening block brace 4=closing block brace
5290             #
5291             # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
5292             # 1b _vt_closing_flag: spaces of padding to use if closing
5293             # 2 _vt_seqno: sequence number of container
5294             # 3 _vt_valid flag: do not append if this flag is false. Will be
5295             # true if appropriate -vt flag is set. Otherwise, Will be
5296             # made true only for 2 line container in parens with -lp
5297             # 4 _vt_seqno_beg: sequence number of first token of line
5298             # 5 _vt_seqno_end: sequence number of last token of line
5299             # 6 _vt_min_lines: min number of lines for joining opening cache,
5300             # 0=no constraint
5301             # 7 _vt_max_lines: max number of lines for joining opening cache,
5302             # 0=no constraint
5303              
5304 7384         12860 my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
5305             $seqno_beg, $seqno_end );
5306 7384 100       14946 if ($rvertical_tightness_flags) {
5307              
5308 1308         2344 $open_or_close = $rvertical_tightness_flags->{_vt_type};
5309 1308         2134 $opening_flag = $rvertical_tightness_flags->{_vt_opening_flag};
5310 1308         2182 $closing_flag = $rvertical_tightness_flags->{_vt_closing_flag};
5311 1308         2055 $seqno = $rvertical_tightness_flags->{_vt_seqno};
5312 1308         2115 $valid = $rvertical_tightness_flags->{_vt_valid_flag};
5313 1308         2262 $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
5314 1308         2155 $seqno_end = $rvertical_tightness_flags->{_vt_seqno_end};
5315             }
5316              
5317 7384         11544 $seqno_string = $seqno_end;
5318              
5319             # handle any cached line ..
5320             # either append this line to it or write it out
5321             # Note: the function length() is used in this next test out of caution.
5322             # All testing has shown that the variable $cached_line_text_length is
5323             # correct, but its calculation is complex and a loss of cached text
5324             # would be a disaster.
5325 7384 100       15523 if ( length($cached_line_text) ) {
5326              
5327             (
5328 158         802 $str,
5329             $str_length,
5330             $leading_string,
5331             $leading_string_length,
5332             $leading_space_count,
5333             $level,
5334             $maximum_line_length
5335              
5336             ) = $self->handle_cached_line( $rinput, $leading_string,
5337             $leading_string_length );
5338              
5339 158         376 $cached_line_type = 0;
5340 158         296 $cached_line_text = EMPTY_STRING;
5341 158         319 $cached_line_text_length = 0;
5342 158         276 $cached_line_Kend = undef;
5343 158         274 $cached_line_maximum_length = undef;
5344              
5345             }
5346              
5347             # make the line to be written
5348 7384         15489 my $line = $leading_string . $str;
5349 7384         12258 my $line_length = $leading_string_length + $str_length;
5350              
5351             # Safety check: be sure that a line to be cached as a stacked block
5352             # brace line ends in the appropriate opening or closing block brace.
5353             # This should always be the case if the caller set flags correctly.
5354             # Code '3' is for -sobb, code '4' is for -scbb.
5355 7384 100       14461 if ($open_or_close) {
5356 159 50 66     1231 if ( $open_or_close == 3 && $line !~ /\{\s*$/
      66        
      33        
5357             || $open_or_close == 4 && $line !~ /\}\s*$/ )
5358             {
5359 0         0 $open_or_close = 0;
5360             }
5361             }
5362              
5363             # write or cache this line ...
5364             # fix for case b999: do not cache an outdented line
5365             # fix for b1378: do not cache an empty line
5366 7384 100 66     21108 if ( !$open_or_close
      66        
      33        
5367             || $side_comment_length > 0
5368             || $is_outdented_line
5369             || !$line_length )
5370             {
5371 7225         17823 $self->valign_output_step_C(
5372             $seqno_string,
5373             $last_nonblank_seqno_string,
5374              
5375             $line,
5376             $leading_space_count,
5377             $level,
5378             $Kend,
5379             );
5380             }
5381             else {
5382 159         350 $cached_line_text = $line;
5383 159         278 $cached_line_text_length = $line_length;
5384 159         320 $cached_line_type = $open_or_close;
5385 159         287 $cached_line_opening_flag = $opening_flag;
5386 159         266 $cached_line_closing_flag = $closing_flag;
5387 159         284 $cached_seqno = $seqno;
5388 159         293 $cached_line_valid = $valid;
5389 159         280 $cached_line_leading_space_count = $leading_space_count;
5390 159         316 $cached_seqno_string = $seqno_string;
5391 159         258 $cached_line_Kend = $Kend;
5392 159         268 $cached_line_maximum_length = $maximum_line_length;
5393             }
5394              
5395 7384         12679 $self->[_last_level_written_] = $level;
5396 7384         11223 $self->[_last_side_comment_length_] = $side_comment_length;
5397 7384         16406 return;
5398             } ## end sub valign_output_step_B
5399             }
5400              
5401             ###############################
5402             # CODE SECTION 8: Output Step C
5403             ###############################
5404              
5405             { ## closure for sub valign_output_step_C
5406              
5407             # Vertical alignment buffer used by valign_output_step_C
5408             my $valign_buffer_filling;
5409             my @valign_buffer;
5410              
5411             sub initialize_valign_buffer {
5412 561     561 0 1536 @valign_buffer = ();
5413 561         1338 $valign_buffer_filling = EMPTY_STRING;
5414 561         969 return;
5415             }
5416              
5417             sub dump_valign_buffer {
5418 1820     1820 0 3643 my ($self) = @_;
5419              
5420             # Send all lines in the current buffer on to step_D
5421 1820 100       4786 if (@valign_buffer) {
5422 2         8 foreach (@valign_buffer) {
5423 7         13 $self->valign_output_step_D( @{$_} );
  7         18  
5424             }
5425 2         11 @valign_buffer = ();
5426             }
5427 1820         3567 $valign_buffer_filling = EMPTY_STRING;
5428 1820         3059 return;
5429             } ## end sub dump_valign_buffer
5430              
5431             sub reduce_valign_buffer_indentation {
5432              
5433 3     3 0 8 my ( $self, $diff ) = @_;
5434              
5435             # Reduce the leading indentation of lines in the current
5436             # buffer by $diff spaces
5437 3 100 66     17 if ( $valign_buffer_filling && $diff ) {
5438 2         5 my $max_valign_buffer = @valign_buffer;
5439 2         8 foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
5440             my ( $line, $leading_space_count, $level, $Kend ) =
5441 7         12 @{ $valign_buffer[$i] };
  7         18  
5442 7         16 my $ws = substr( $line, 0, $diff );
5443 7 50 33     43 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
5444 7         18 $line = substr( $line, $diff );
5445             }
5446 7 50       21 if ( $leading_space_count >= $diff ) {
5447 7         11 $leading_space_count -= $diff;
5448 7         19 $level =
5449             $self->level_change( $leading_space_count, $diff,
5450             $level );
5451             }
5452 7         38 $valign_buffer[$i] =
5453             [ $line, $leading_space_count, $level, $Kend ];
5454             }
5455             }
5456 3         9 return;
5457             } ## end sub reduce_valign_buffer_indentation
5458              
5459             sub valign_output_step_C {
5460              
5461             #-----------------------------------------------------------------------
5462             # This is Step C in writing vertically aligned lines.
5463             # Lines are either stored in a buffer or passed along to the next step.
5464             # The reason for storing lines is that we may later want to reduce their
5465             # indentation when -sot and -sct are both used.
5466             #-----------------------------------------------------------------------
5467             my (
5468 7333     7333 0 21232 $self,
5469             $seqno_string,
5470             $last_nonblank_seqno_string,
5471              
5472             @args_to_D,
5473             ) = @_;
5474              
5475             # Dump any saved lines if we see a line with an unbalanced opening or
5476             # closing token.
5477 7333 100 100     17321 $self->dump_valign_buffer()
5478             if ( $seqno_string && $valign_buffer_filling );
5479              
5480             # Either store or write this line
5481 7333 100       13401 if ($valign_buffer_filling) {
5482 7         34 push @valign_buffer, [@args_to_D];
5483             }
5484             else {
5485 7326         16965 $self->valign_output_step_D(@args_to_D);
5486             }
5487              
5488             # For lines starting or ending with opening or closing tokens..
5489 7333 100       14530 if ($seqno_string) {
5490 394         826 $last_nonblank_seqno_string = $seqno_string;
5491 394         1253 set_last_nonblank_seqno_string($seqno_string);
5492              
5493             # Start storing lines when we see a line with multiple stacked
5494             # opening tokens.
5495             # patch for RT #94354, requested by Colin Williams
5496 394 100 100     1902 if ( index( $seqno_string, ':' ) >= 0
      100        
5497             && $seqno_string =~ /^\d+(\:+\d+)+$/
5498             && $args_to_D[0] !~ /^[\}\)\]\:\?]/ )
5499             {
5500              
5501             # This test is efficient but a little subtle: The first test
5502             # says that we have multiple sequence numbers and hence
5503             # multiple opening or closing tokens in this line. The second
5504             # part of the test rejects stacked closing and ternary tokens.
5505             # So if we get here then we should have stacked unbalanced
5506             # opening tokens.
5507              
5508             # Here is a complex example:
5509              
5510             # Foo($Bar[0], { # (side comment)
5511             # baz => 1,
5512             # });
5513              
5514             # The first line has sequence 6::4. It does not begin with
5515             # a closing token or ternary, so it passes the test and must be
5516             # stacked opening tokens.
5517              
5518             # The last line has sequence 4:6 but is a stack of closing
5519             # tokens, so it gets rejected.
5520              
5521             # Note that the sequence number of an opening token for a qw
5522             # quote is a negative number and will be rejected. For
5523             # example, for the following line: skip_symbols([qw(
5524             # $seqno_string='10:5:-1'. It would be okay to accept it but I
5525             # decided not to do this after testing.
5526              
5527 8         24 $valign_buffer_filling = $seqno_string;
5528              
5529             }
5530             }
5531 7333         13922 return;
5532             } ## end sub valign_output_step_C
5533             }
5534              
5535             ###############################
5536             # CODE SECTION 9: Output Step D
5537             ###############################
5538              
5539             sub valign_output_step_D {
5540              
5541             #----------------------------------------------------------------
5542             # This is Step D in writing vertically aligned lines.
5543             # It is the end of the vertical alignment pipeline.
5544             # Write one vertically aligned line of code to the output object.
5545             #----------------------------------------------------------------
5546              
5547 7333     7333 0 16391 my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
5548              
5549             # The line is currently correct if there is no tabbing (recommended!)
5550             # We may have to lop off some leading spaces and replace with tabs.
5551 7333 100       15925 if ( $leading_space_count > 0 ) {
5552              
5553 4326         8502 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5554 4326         7362 my $rOpts_tabs = $self->[_rOpts_tabs_];
5555 4326         6950 my $rOpts_entab_leading_whitespace =
5556             $self->[_rOpts_entab_leading_whitespace_];
5557              
5558             # Nothing to do if no tabs
5559 4326 100 66     18257 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
    50 66        
5560             || $rOpts_indent_columns <= 0 )
5561             {
5562              
5563             # nothing to do
5564             }
5565              
5566             # Handle entab option
5567             elsif ($rOpts_entab_leading_whitespace) {
5568              
5569             # Patch 12-nov-2018 based on report from Glenn. Extra padding was
5570             # not correctly entabbed, nor were side comments: Increase leading
5571             # space count for a padded line to get correct tabbing
5572 45 50       269 if ( $line =~ /^(\s+)(.*)$/ ) {
5573 45         135 my $spaces = length($1);
5574 45 50       107 if ( $spaces > $leading_space_count ) {
5575 0         0 $leading_space_count = $spaces;
5576             }
5577             }
5578              
5579 45         90 my $space_count =
5580             $leading_space_count % $rOpts_entab_leading_whitespace;
5581 45         97 my $tab_count =
5582             int( $leading_space_count / $rOpts_entab_leading_whitespace );
5583 45         112 my $leading_string = "\t" x $tab_count . SPACE x $space_count;
5584 45 50       587 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5585 45         156 substr( $line, 0, $leading_space_count, $leading_string );
5586             }
5587             else {
5588              
5589             # shouldn't happen - program error counting whitespace
5590             # - skip entabbing
5591 0         0 DEBUG_TABS
5592             && warning(
5593             "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5594             );
5595             }
5596             }
5597              
5598             # Handle option of one tab per level
5599             else {
5600 0         0 my $leading_string = ( "\t" x $level );
5601 0         0 my $space_count =
5602             $leading_space_count - $level * $rOpts_indent_columns;
5603              
5604             # shouldn't happen:
5605 0 0       0 if ( $space_count < 0 ) {
5606              
5607             # But it could be an outdented comment
5608 0 0       0 if ( $line !~ /^\s*#/ ) {
5609 0         0 DEBUG_TABS
5610             && warning(
5611             "Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
5612             );
5613             }
5614 0         0 $leading_string = ( SPACE x $leading_space_count );
5615             }
5616             else {
5617 0         0 $leading_string .= ( SPACE x $space_count );
5618             }
5619 0 0       0 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5620 0         0 substr( $line, 0, $leading_space_count, $leading_string );
5621             }
5622             else {
5623              
5624             # shouldn't happen - program error counting whitespace
5625             # we'll skip entabbing
5626 0         0 DEBUG_TABS
5627             && warning(
5628             "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5629             );
5630             }
5631             }
5632             }
5633 7333         13137 my $file_writer_object = $self->[_file_writer_object_];
5634 7333         33271 $file_writer_object->write_code_line( $line . "\n", $Kend );
5635              
5636 7333         14904 return;
5637             } ## end sub valign_output_step_D
5638              
5639             ##########################
5640             # CODE SECTION 10: Summary
5641             ##########################
5642              
5643             sub report_anything_unusual {
5644 561     561 0 1431 my $self = shift;
5645              
5646 561         1650 my $outdented_line_count = $self->[_outdented_line_count_];
5647 561 100       2155 if ( $outdented_line_count > 0 ) {
5648 21         145 write_logfile_entry(
5649             "$outdented_line_count long lines were outdented:\n");
5650 21         74 my $first_outdented_line_at = $self->[_first_outdented_line_at_];
5651 21         128 write_logfile_entry(
5652             " First at output line $first_outdented_line_at\n");
5653              
5654 21 100       180 if ( $outdented_line_count > 1 ) {
5655 7         25 my $last_outdented_line_at = $self->[_last_outdented_line_at_];
5656 7         36 write_logfile_entry(
5657             " Last at output line $last_outdented_line_at\n");
5658             }
5659             write_logfile_entry(
5660 21         125 " use -noll to prevent outdenting, -l=n to increase line length\n"
5661             );
5662 21         115 write_logfile_entry("\n");
5663             }
5664 561         1478 return;
5665             } ## end sub report_anything_unusual
5666             1;