package Perl::Tidy::VerticalAligner;
use strict;
use warnings;
our $VERSION = '20210111';
use Perl::Tidy::VerticalAligner::Alignment;
use Perl::Tidy::VerticalAligner::Line;
# The Perl::Tidy::VerticalAligner package collects output lines and
# attempts to line up certain common tokens, such as => and #, which are
# identified by the calling routine.
#
# Usage:
# - Initiate an object with a call to new().
# - Write lines one-by-one with calls to valign_input().
# - Make a final call to flush() to empty the pipeline.
#
# The sub valign_input collects lines into groups. When a group reaches
# the maximum possible size it is processed for alignment and output.
# The maximum group size is reached whenerver there is a change in indentation
# level, a blank line, a block comment, or an external flush call. The calling
# routine may also force a break in alignment at any time.
#
# If the calling routine needs to interrupt the output and send other text to
# the output, it must first call flush() to empty the output pipeline. This
# might occur for example if a block of pod text needs to be sent to the output
# between blocks of code.
# It is essential that a final call to flush() be made. Otherwise some
# final lines of text will be lost.
# Index...
# CODE SECTION 1: Preliminary code, global definitions and sub new
# sub new
# CODE SECTION 2: Some Basic Utilities
# CODE SECTION 3: Code to accept input and form groups
# sub valign_input
# CODE SECTION 4: Code to process comment lines
# sub _flush_comment_lines
# CODE SECTION 5: Code to process groups of code lines
# sub _flush_group_lines
# CODE SECTION 6: Output Step A
# sub valign_output_step_A
# CODE SECTION 7: Output Step B
# sub valign_output_step_B
# CODE SECTION 8: Output Step C
# sub valign_output_step_C
# CODE SECTION 9: Output Step D
# sub valign_output_step_D
# CODE SECTION 10: Summary
# sub report_anything_unusual
##################################################################
# CODE SECTION 1: Preliminary code, global definitions and sub new
##################################################################
sub AUTOLOAD {
# Catch any undefined sub calls so that we are sure to get
# some diagnostic information. This sub should never be called
# except for a programming error.
our $AUTOLOAD;
return if ( $AUTOLOAD =~ /\bDESTROY$/ );
my ( $pkg, $fname, $lno ) = caller();
my $my_package = __PACKAGE__;
print STDERR <<EOM;
======================================================================
Error detected in package '$my_package', version $VERSION
Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
Called from package: '$pkg'
Called from File '$fname' at line '$lno'
This error is probably due to a recent programming change
======================================================================
EOM
exit 1;
}
sub DESTROY {
# required to avoid call to AUTOLOAD in some versions of perl
}
BEGIN {
# Define the fixed indexes for variables in $self, which is an array
# reference. Note the convention of leading and trailing underscores to
# keep them unique.
my $i = 0;
use constant {
_file_writer_object_ => $i++,
_logger_object_ => $i++,
_diagnostics_object_ => $i++,
_length_function_ => $i++,
_rOpts_ => $i++,
_rOpts_indent_columns_ => $i++,
_rOpts_tabs_ => $i++,
_rOpts_entab_leading_whitespace_ => $i++,
_rOpts_fixed_position_side_comment_ => $i++,
_rOpts_minimum_space_to_comment_ => $i++,
_rOpts_maximum_line_length_ => $i++,
_rOpts_variable_maximum_line_length_ => $i++,
_rOpts_valign_ => $i++,
_last_level_written_ => $i++,
_last_side_comment_column_ => $i++,
_last_side_comment_line_number_ => $i++,
_last_side_comment_length_ => $i++,
_last_side_comment_level_ => $i++,
_outdented_line_count_ => $i++,
_first_outdented_line_at_ => $i++,
_last_outdented_line_at_ => $i++,
_consecutive_block_comments_ => $i++,
_rgroup_lines_ => $i++,
_group_level_ => $i++,
_group_type_ => $i++,
_zero_count_ => $i++,
_last_leading_space_count_ => $i++,
_comment_leading_space_count_ => $i++,
_extra_indent_ok_ => $i++,
};
# Debug flag. This is a relic from the original program development
# looking for problems with tab characters. Caution: this debug flag can
# produce a lot of output It should be 0 except when debugging small
# scripts.
use constant DEBUG_TABS => 0;
my $debug_warning = sub {
print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
return;
};
DEBUG_TABS && $debug_warning->('TABS');
}
sub new {
my ( $class, @args ) = @_;
my %defaults = (
rOpts => undef,
file_writer_object => undef,
logger_object => undef,
diagnostics_object => undef,
length_function => sub { return length( $_[0] ) },
);
my %args = ( %defaults, @args );
# Initialize other caches and buffers
initialize_step_B_cache();
initialize_valign_buffer();
initialize_leading_string_cache();
initialize_decode();
# Initialize all variables in $self.
# To add an item to $self, first define a new constant index in the BEGIN
# section.
my $self = [];
# objects
$self->[_file_writer_object_] = $args{file_writer_object};
$self->[_logger_object_] = $args{logger_object};
$self->[_diagnostics_object_] = $args{diagnostics_object};
$self->[_length_function_] = $args{length_function};
# shortcuts to user options
my $rOpts = $args{rOpts};
$self->[_rOpts_] = $rOpts;
$self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
$self->[_rOpts_tabs_] = $rOpts->{'tabs'};
$self->[_rOpts_entab_leading_whitespace_] =
$rOpts->{'entab-leading-whitespace'};
$self->[_rOpts_fixed_position_side_comment_] =
$rOpts->{'fixed-position-side-comment'};
$self->[_rOpts_minimum_space_to_comment_] =
$rOpts->{'minimum-space-to-comment'};
$self->[_rOpts_maximum_line_length_] = $rOpts->{'maximum-line-length'};
$self->[_rOpts_variable_maximum_line_length_] =
$rOpts->{'variable-maximum-line-length'};
$self->[_rOpts_valign_] = $rOpts->{'valign'};
# Batch of lines being collected
$self->[_rgroup_lines_] = [];
$self->[_group_level_] = 0;
$self->[_group_type_] = "";
$self->[_zero_count_] = 0;
$self->[_comment_leading_space_count_] = 0;
$self->[_last_leading_space_count_] = 0;
$self->[_extra_indent_ok_] = 0;
# Memory of what has been processed
$self->[_last_level_written_] = -1;
$self->[_last_side_comment_column_] = 0;
$self->[_last_side_comment_line_number_] = 0;
$self->[_last_side_comment_length_] = 0;
$self->[_last_side_comment_level_] = -1;
$self->[_outdented_line_count_] = 0;
$self->[_first_outdented_line_at_] = 0;
$self->[_last_outdented_line_at_] = 0;
$self->[_consecutive_block_comments_] = 0;
bless $self, $class;
return $self;
}
#################################
# CODE SECTION 2: Basic Utilities
#################################
sub flush {
# flush() is the external call to completely empty the pipeline.
my ($self) = @_;
# push things out the pipline...
# push out any current group lines
$self->_flush_group_lines();
# then anything left in the cache of step_B
$self->_flush_cache();
# then anything left in the buffer of step_C
$self->dump_valign_buffer();
return;
}
sub initialize_for_new_group {
my ($self) = @_;
$self->[_rgroup_lines_] = [];
$self->[_group_type_] = "";
$self->[_zero_count_] = 0;
$self->[_comment_leading_space_count_] = 0;
$self->[_last_leading_space_count_] = 0;
# Note that the value for _group_level_ is
# handled separately in sub valign_input
return;
}
sub group_line_count {
return +@{ $_[0]->[_rgroup_lines_] };
}
# interface to Perl::Tidy::Diagnostics routines
# For debugging; not currently used
sub write_diagnostics {
my ( $self, $msg ) = @_;
my $diagnostics_object = $self->[_diagnostics_object_];
if ($diagnostics_object) {
$diagnostics_object->write_diagnostics($msg);
}
return;
}
# interface to Perl::Tidy::Logger routines
sub warning {
my ( $self, $msg ) = @_;
my $logger_object = $self->[_logger_object_];
if ($logger_object) {
$logger_object->warning($msg);
}
return;
}
sub write_logfile_entry {
my ( $self, $msg ) = @_;
my $logger_object = $self->[_logger_object_];
if ($logger_object) {
$logger_object->write_logfile_entry($msg);
}
return;
}
sub report_definite_bug {
my ( $self, $msg ) = @_;
my $logger_object = $self->[_logger_object_];
if ($logger_object) {
$logger_object->report_definite_bug();
}
return;
}
sub get_cached_line_count {
my $self = shift;
return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
}
sub get_spaces {
# return the number of leading spaces associated with an indentation
# variable $indentation is either a constant number of spaces or an
# object with a get_spaces method.
my $indentation = shift;
return ref($indentation) ? $indentation->get_spaces() : $indentation;
}
sub get_recoverable_spaces {
# return the number of spaces (+ means shift right, - means shift left)
# that we would like to shift a group of lines with the same indentation
# to get them to line up with their opening parens
my $indentation = shift;
return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
}
sub maximum_line_length_for_level {
# return maximum line length for line starting with a given level
my ( $self, $level ) = @_;
my $maximum_line_length = $self->[_rOpts_maximum_line_length_];
if ( $self->[_rOpts_variable_maximum_line_length_] ) {
if ( $level < 0 ) { $level = 0 }
$maximum_line_length += $level * $self->[_rOpts_indent_columns_];
}
return $maximum_line_length;
}
######################################################
# CODE SECTION 3: Code to accept input and form groups
######################################################
sub push_group_line {
my ( $self, $new_line ) = @_;
my $rgroup_lines = $self->[_rgroup_lines_];
push @{$rgroup_lines}, $new_line;
return;
}
use constant DEBUG_VALIGN => 0;
use constant SC_LONG_LINE_DIFF => 12;
sub valign_input {
# Place one line in the current vertical group.
#
# The input parameters are:
# $level = indentation level of this line
# $rfields = reference to array of fields
# $rpatterns = reference to array of patterns, one per field
# $rtokens = reference to array of tokens starting fields 1,2,..
#
# Here is an example of what this package does. In this example,
# we are trying to line up both the '=>' and the '#'.
#
# '18' => 'grave', # \`
# '19' => 'acute', # `'
# '20' => 'caron', # \v
# <-tabs-><f1-><--field 2 ---><-f3->
# | | | |
# | | | |
# col1 col2 col3 col4
#
# The calling routine has already broken the entire line into 3 fields as
# indicated. (So the work of identifying promising common tokens has
# already been done).
#
# In this example, there will be 2 tokens being matched: '=>' and '#'.
# They are the leading parts of fields 2 and 3, but we do need to know
# what they are so that we can dump a group of lines when these tokens
# change.
#
# The fields contain the actual characters of each field. The patterns
# are like the fields, but they contain mainly token types instead
# of tokens, so they have fewer characters. They are used to be
# sure we are matching fields of similar type.
#
# In this example, there will be 4 column indexes being adjusted. The
# first one is always at zero. The interior columns are at the start of
# the matching tokens, and the last one tracks the maximum line length.
#
# Each time a new line comes in, it joins the current vertical
# group if possible. Otherwise it causes the current group to be flushed
# and a new group is started.
#
# For each new group member, the column locations are increased, as
# necessary, to make room for the new fields. When the group is finally
# output, these column numbers are used to compute the amount of spaces of
# padding needed for each field.
#
# Programming note: the fields are assumed not to have any tab characters.
# Tabs have been previously removed except for tabs in quoted strings and
# side comments. Tabs in these fields can mess up the column counting.
# The log file warns the user if there are any such tabs.
my ( $self, $rline_hash ) = @_;
my $level = $rline_hash->{level};
my $level_end = $rline_hash->{level_end};
my $level_adj = $rline_hash->{level_adj};
my $indentation = $rline_hash->{indentation};
my $list_seqno = $rline_hash->{list_seqno};
my $outdent_long_lines = $rline_hash->{outdent_long_lines};
my $is_terminal_ternary = $rline_hash->{is_terminal_ternary};
my $is_terminal_statement = $rline_hash->{is_terminal_statement};
my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags};
my $level_jump = $rline_hash->{level_jump};
my $rfields = $rline_hash->{rfields};
my $rtokens = $rline_hash->{rtokens};
my $rpatterns = $rline_hash->{rpatterns};
my $rfield_lengths = $rline_hash->{rfield_lengths};
my $terminal_block_type = $rline_hash->{terminal_block_type};
my $batch_count = $rline_hash->{batch_count};
my $break_alignment_before = $rline_hash->{break_alignment_before};
my $break_alignment_after = $rline_hash->{break_alignment_after};
my $Kend = $rline_hash->{Kend};
my $ci_level = $rline_hash->{ci_level};
# The index '$Kend' is a value which passed along with the line text to sub
# 'write_code_line' for a convergence check.
# number of fields is $jmax
# number of tokens between fields is $jmax-1
my $jmax = @{$rfields} - 1;
my $leading_space_count = get_spaces($indentation);
# set outdented flag to be sure we either align within statements or
# across statement boundaries, but not both.
my $is_outdented =
$self->[_last_leading_space_count_] > $leading_space_count;
$self->[_last_leading_space_count_] = $leading_space_count;
# Identify a hanging side comment. Hanging side comments have an empty
# initial field.
my $is_hanging_side_comment =
( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
# Undo outdented flag for a hanging side comment
$is_outdented = 0 if $is_hanging_side_comment;
# Identify a block comment.
my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
# Block comment .. update count
if ($is_block_comment) {
$self->[_consecutive_block_comments_]++;
}
# Not a block comment ..
# Forget side comment column if we saw 2 or more block comments,
# and reset the count
else {
if ( $self->[_consecutive_block_comments_] > 1 ) {
$self->forget_side_comment();
}
$self->[_consecutive_block_comments_] = 0;
}
# Reset side comment location if we are entering a new block from level 0.
# This is intended to keep them from drifting too far to the right.
if ( $terminal_block_type && $level_adj == 0 && $level_end > $level ) {
$self->forget_side_comment();
}
my $group_level = $self->[_group_level_];
DEBUG_VALIGN && do {
my $nlines = $self->group_line_count();
print STDOUT
"Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level_jump=$level_jump, level=$level, group_level=$group_level, level_jump=$level_jump\n";
};
# Validate cached line if necessary: If we can produce a container
# with just 2 lines total by combining an existing cached opening
# token with the closing token to follow, then we will mark both
# cached flags as valid.
my $cached_line_type = get_cached_line_type();
if ($cached_line_type) {
my $cached_line_flag = get_cached_line_flag();
if ($rvertical_tightness_flags) {
my $cached_seqno = get_cached_seqno();
if ( $cached_seqno
&& $self->group_line_count() <= 1
&& $rvertical_tightness_flags->[2]
&& $rvertical_tightness_flags->[2] == $cached_seqno )
{
$rvertical_tightness_flags->[3] ||= 1;
set_cached_line_valid(1);
}
}
# do not join an opening block brace with an unbalanced line
# unless requested with a flag value of 2
if ( $cached_line_type == 3
&& !$self->group_line_count()
&& $cached_line_flag < 2
&& $level_jump != 0 )
{
set_cached_line_valid(0);
}
}
# shouldn't happen:
if ( $level < 0 ) { $level = 0 }
# do not align code across indentation level changes
# or if vertical alignment is turned off for debugging
if ( $level != $group_level || $is_outdented || !$self->[_rOpts_valign_] ) {
# we are allowed to shift a group of lines to the right if its
# level is greater than the previous and next group
$self->[_extra_indent_ok_] =
( $level < $group_level
&& $self->[_last_level_written_] < $group_level );
$self->_flush_group_lines();
# If we know that this line will get flushed out by itself because
# of level changes, we can leave the extra_indent_ok flag set.
# That way, if we get an external flush call, we will still be
# able to do some -lp alignment if necessary.
$self->[_extra_indent_ok_] =
( $is_terminal_statement && $level > $group_level );
$group_level = $level;
$self->[_group_level_] = $group_level;
# wait until after the above flush to get the leading space
# count because it may have been changed if the -icp flag is in
# effect
$leading_space_count = get_spaces($indentation);
}
# --------------------------------------------------------------------
# Collect outdentable block COMMENTS
# --------------------------------------------------------------------
my $is_blank_line = "";
if ( $self->[_group_type_] eq 'COMMENT' ) {
if (
(
$is_block_comment
&& $outdent_long_lines
&& $leading_space_count ==
$self->[_comment_leading_space_count_]
)
|| $is_blank_line
)
{
# Note that for a comment group we are not storing a line
# but rather just the text and its length.
$self->push_group_line(
[ $rfields->[0], $rfield_lengths->[0], $Kend ] );
return;
}
else {
$self->_flush_group_lines();
}
}
my $rgroup_lines = $self->[_rgroup_lines_];
if ( $break_alignment_before && @{$rgroup_lines} ) {
$rgroup_lines->[-1]->set_end_group(1);
}
# --------------------------------------------------------------------
# add dummy fields for terminal ternary
# --------------------------------------------------------------------
my $j_terminal_match;
if ( $is_terminal_ternary && @{$rgroup_lines} ) {
$j_terminal_match =
fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
$rpatterns, $rfield_lengths, $group_level, );
$jmax = @{$rfields} - 1;
}
# --------------------------------------------------------------------
# add dummy fields for else statement
# --------------------------------------------------------------------
# Note the trailing space after 'else' here. If there were no space between
# the else and the next '{' then we would not be able to do vertical
# alignment of the '{'.
if ( $rfields->[0] eq 'else '
&& @{$rgroup_lines}
&& $level_jump == 0 )
{
$j_terminal_match =
fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
$rpatterns, $rfield_lengths );
$jmax = @{$rfields} - 1;
}
# --------------------------------------------------------------------
# Handle simple line of code with no fields to match.
# --------------------------------------------------------------------
if ( $jmax <= 0 ) {
$self->[_zero_count_]++;
if ( @{$rgroup_lines}
&& !get_recoverable_spaces( $rgroup_lines->[0]->get_indentation() )
)
{
# flush the current group if it has some aligned columns..
# or we haven't seen a comment lately
if ( $rgroup_lines->[0]->get_jmax() > 1
|| $self->[_zero_count_] > 3 )
{
$self->_flush_group_lines();
}
}
# start new COMMENT group if this comment may be outdented
if ( $is_block_comment
&& $outdent_long_lines
&& !$self->group_line_count() )
{
$self->[_group_type_] = 'COMMENT';
$self->[_comment_leading_space_count_] = $leading_space_count;
$self->push_group_line(
[ $rfields->[0], $rfield_lengths->[0], $Kend ] );
return;
}
# just write this line directly if no current group, no side comment,
# and no space recovery is needed.
if ( !$self->group_line_count()
&& !get_recoverable_spaces($indentation) )
{
$self->valign_output_step_B(
{
leading_space_count => $leading_space_count,
line => $rfields->[0],
line_length => $rfield_lengths->[0],
side_comment_length => 0,
outdent_long_lines => $outdent_long_lines,
rvertical_tightness_flags => $rvertical_tightness_flags,
level => $level,
Kend => $Kend,
}
);
return;
}
}
else {
$self->[_zero_count_] = 0;
}
# programming check: (shouldn't happen)
# The number of tokens which separate the fields must always be
# one less than the number of fields. If this is not true then
# an error has been made by the Formatter in defining these
# quantities. See Formatter.pm/sub make_alignment_patterns.
if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
my $nt = @{$rtokens};
my $nf = @{$rfields};
my $msg = <<EOM;
"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $nt should be one less than number of fields: $nf)\n"
EOM
$self->warning($msg);
$self->report_definite_bug();
# TODO: this has never happened, but we should probably call Die here.
# Needs some testing
# Perl::Tidy::Die($msg);
}
my $maximum_line_length_for_level =
$self->maximum_line_length_for_level($level);
# --------------------------------------------------------------------
# It simplifies things to create a zero length side comment
# if none exists.
# --------------------------------------------------------------------
$self->make_side_comment( $rtokens, $rfields, $rpatterns, $rfield_lengths );
$jmax = @{$rfields} - 1;
# --------------------------------------------------------------------
# create an object to hold this line
# --------------------------------------------------------------------
my $new_line = Perl::Tidy::VerticalAligner::Line->new(
{
jmax => $jmax,
rtokens => $rtokens,
rfields => $rfields,
rpatterns => $rpatterns,
rfield_lengths => $rfield_lengths,
indentation => $indentation,
leading_space_count => $leading_space_count,
outdent_long_lines => $outdent_long_lines,
list_seqno => $list_seqno,
list_type => "",
is_hanging_side_comment => $is_hanging_side_comment,
maximum_line_length => $maximum_line_length_for_level,
rvertical_tightness_flags => $rvertical_tightness_flags,
is_terminal_ternary => $is_terminal_ternary,
j_terminal_match => $j_terminal_match,
end_group => $break_alignment_after,
Kend => $Kend,
ci_level => $ci_level,
imax_pair => -1,
}
);
# --------------------------------------------------------------------
# Decide if this is a simple list of items.
# We use this to be less restrictive in deciding what to align.
# --------------------------------------------------------------------
decide_if_list($new_line) if ($list_seqno);
# --------------------------------------------------------------------
# Append this line to the current group (or start new group)
# --------------------------------------------------------------------
$self->push_group_line($new_line);
# output this group if it ends in a terminal else or ternary line
if ( defined($j_terminal_match) ) {
$self->_flush_group_lines();
}
# Force break after jump to lower level
if ( $level_jump < 0 ) {
$self->_flush_group_lines();
}
# --------------------------------------------------------------------
# Some old debugging stuff
# --------------------------------------------------------------------
DEBUG_VALIGN && do {
print STDOUT "exiting valign_input fields:";
dump_array( @{$rfields} );
print STDOUT "exiting valign_input tokens:";
dump_array( @{$rtokens} );
print STDOUT "exiting valign_input patterns:";
dump_array( @{$rpatterns} );
};
return;
}
sub join_hanging_comment {
# Add dummy fields to a hanging side comment to make it look
# like the first line in its potential group. This simplifies
# the coding.
my ( $new_line, $old_line ) = @_;
my $jmax = $new_line->get_jmax();
# must be 2 fields
return 0 unless $jmax == 1;
my $rtokens = $new_line->get_rtokens();
# the second field must be a comment
return 0 unless $rtokens->[0] eq '#';
my $rfields = $new_line->get_rfields();
# the first field must be empty
return 0 unless $rfields->[0] =~ /^\s*$/;
# the current line must have fewer fields
my $maximum_field_index = $old_line->get_jmax();
return 0
unless $maximum_field_index > $jmax;
# looks ok..
my $rpatterns = $new_line->get_rpatterns();
my $rfield_lengths = $new_line->get_rfield_lengths();
$new_line->set_is_hanging_side_comment(1);
$jmax = $maximum_field_index;
$new_line->set_jmax($jmax);
$rfields->[$jmax] = $rfields->[1];
$rfield_lengths->[$jmax] = $rfield_lengths->[1];
$rtokens->[ $jmax - 1 ] = $rtokens->[0];
$rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
foreach my $j ( 1 .. $jmax - 1 ) {
$rfields->[$j] = '';
$rfield_lengths->[$j] = 0;
$rtokens->[ $j - 1 ] = "";
$rpatterns->[ $j - 1 ] = "";
}
return 1;
}
sub make_side_comment {
# create an empty side comment if none exists
my ( $self, $rtokens, $rfields, $rpatterns, $rfield_lengths ) = @_;
my $jmax = @{$rfields} - 1;
# if line does not have a side comment...
if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
$jmax += 1;
$rtokens->[ $jmax - 1 ] = '#';
$rfields->[$jmax] = '';
$rfield_lengths->[$jmax] = 0;
$rpatterns->[$jmax] = '#';
}
return;
}
{ ## closure for sub decide_if_list
my %is_comma_token;
BEGIN {
my @q = qw( => );
push @q, ',';
@is_comma_token{@q} = (1) x scalar(@q);
}
sub decide_if_list {
my $line = shift;
# A list will be taken to be a line with a forced break in which all
# of the field separators are commas or comma-arrows (except for the
# trailing #)
my $rtokens = $line->get_rtokens();
my $test_token = $rtokens->[0];
my ( $raw_tok, $lev, $tag, $tok_count ) =
decode_alignment_token($test_token);
if ( $is_comma_token{$raw_tok} ) {
my $list_type = $test_token;
my $jmax = $line->get_jmax();
foreach ( 1 .. $jmax - 2 ) {
( $raw_tok, $lev, $tag, $tok_count ) =
decode_alignment_token( $rtokens->[$_] );
if ( !$is_comma_token{$raw_tok} ) {
$list_type = "";
last;
}
}
$line->set_list_type($list_type);
}
return;
}
}
sub fix_terminal_ternary {
# Add empty fields as necessary to align a ternary term
# like this:
#
# my $leapyear =
# $year % 4 ? 0
# : $year % 100 ? 1
# : $year % 400 ? 0
# : 1;
#
# returns the index of the terminal question token, if any
my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
$group_level )
= @_;
return unless ($old_line);
use constant EXPLAIN_TERNARY => 0;
my $jmax = @{$rfields} - 1;
my $rfields_old = $old_line->get_rfields();
my $rpatterns_old = $old_line->get_rpatterns();
my $rtokens_old = $old_line->get_rtokens();
my $maximum_field_index = $old_line->get_jmax();
# look for the question mark after the :
my ($jquestion);
my $depth_question;
my $pad = "";
my $pad_length = 0;
foreach my $j ( 0 .. $maximum_field_index - 1 ) {
my $tok = $rtokens_old->[$j];
my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
if ( $raw_tok eq '?' ) {
$depth_question = $lev;
# depth must be correct
next unless ( $depth_question eq $group_level );
$jquestion = $j;
if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
$pad_length = length($1);
$pad = " " x $pad_length;
}
else {
return; # shouldn't happen
}
last;
}
}
return unless ( defined($jquestion) ); # shouldn't happen
# Now splice the tokens and patterns of the previous line
# into the else line to insure a match. Add empty fields
# as necessary.
my $jadd = $jquestion;
# Work on copies of the actual arrays in case we have
# to return due to an error
my @fields = @{$rfields};
my @patterns = @{$rpatterns};
my @tokens = @{$rtokens};
my @field_lengths = @{$rfield_lengths};
EXPLAIN_TERNARY && do {
local $" = '><';
print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
};
# handle cases of leading colon on this line
if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
my ( $colon, $therest ) = ( $1, $2 );
# Handle sub-case of first field with leading colon plus additional code
# This is the usual situation as at the '1' below:
# ...
# : $year % 400 ? 0
# : 1;
if ($therest) {
# Split the first field after the leading colon and insert padding.
# Note that this padding will remain even if the terminal value goes
# out on a separate line. This does not seem to look to bad, so no
# mechanism has been included to undo it.
my $field1 = shift @fields;
my $field_length1 = shift @field_lengths;
my $len_colon = length($colon);
unshift @fields, ( $colon, $pad . $therest );
unshift @field_lengths,
( $len_colon, $pad_length + $field_length1 - $len_colon );
# change the leading pattern from : to ?
return unless ( $patterns[0] =~ s/^\:/?/ );
# install leading tokens and patterns of existing line
unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
# insert appropriate number of empty fields
splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
}
# handle sub-case of first field just equal to leading colon.
# This can happen for example in the example below where
# the leading '(' would create a new alignment token
# : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
# : ( $mname = $name . '->' );
else {
return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
# prepend a leading ? onto the second pattern
$patterns[1] = "?b" . $patterns[1];
# pad the second field
$fields[1] = $pad . $fields[1];
$field_lengths[1] = $pad_length + $field_lengths[1];
# install leading tokens and patterns of existing line, replacing
# leading token and inserting appropriate number of empty fields
splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
}
}
# Handle case of no leading colon on this line. This will
# be the case when -wba=':' is used. For example,
# $year % 400 ? 0 :
# 1;
else {
# install leading tokens and patterns of existing line
$patterns[0] = '?' . 'b' . $patterns[0];
unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
# insert appropriate number of empty fields
$jadd = $jquestion + 1;
$fields[0] = $pad . $fields[0];
$field_lengths[0] = $pad_length + $field_lengths[0];
splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
splice( @field_lengths, 0, 0, (0) x $jadd ) if $jadd;
}
EXPLAIN_TERNARY && do {
local $" = '><';
print STDOUT "MODIFIED TOKENS=<@tokens>\n";
print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
print STDOUT "MODIFIED FIELDS=<@fields>\n";
};
# all ok .. update the arrays
@{$rfields} = @fields;
@{$rtokens} = @tokens;
@{$rpatterns} = @patterns;
@{$rfield_lengths} = @field_lengths;
# force a flush after this line
return $jquestion;
}
sub fix_terminal_else {
# Add empty fields as necessary to align a balanced terminal
# else block to a previous if/elsif/unless block,
# like this:
#
# if ( 1 || $x ) { print "ok 13\n"; }
# else { print "not ok 13\n"; }
#
# returns a positive value if the else block should be indented
#
my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
return unless ($old_line);
my $jmax = @{$rfields} - 1;
return unless ( $jmax > 0 );
# check for balanced else block following if/elsif/unless
my $rfields_old = $old_line->get_rfields();
# TBD: add handling for 'case'
return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
# look for the opening brace after the else, and extract the depth
my $tok_brace = $rtokens->[0];
my $depth_brace;
if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
# probably: "else # side_comment"
else { return }
my $rpatterns_old = $old_line->get_rpatterns();
my $rtokens_old = $old_line->get_rtokens();
my $maximum_field_index = $old_line->get_jmax();
# be sure the previous if/elsif is followed by an opening paren
my $jparen = 0;
my $tok_paren = '(' . $depth_brace;
my $tok_test = $rtokens_old->[$jparen];
return unless ( $tok_test eq $tok_paren ); # shouldn't happen
# Now find the opening block brace
my ($jbrace);
foreach my $j ( 1 .. $maximum_field_index - 1 ) {
my $tok = $rtokens_old->[$j];
if ( $tok eq $tok_brace ) {
$jbrace = $j;
last;
}
}
return unless ( defined($jbrace) ); # shouldn't happen
# Now splice the tokens and patterns of the previous line
# into the else line to insure a match. Add empty fields
# as necessary.
my $jadd = $jbrace - $jparen;
splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
splice( @{$rfields}, 1, 0, ('') x $jadd );
splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
# force a flush after this line if it does not follow a case
if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
else { return $jbrace }
}
my %is_closing_block_type;
BEGIN {
@_ = qw< } ] >;
@is_closing_block_type{@_} = (1) x scalar(@_);
}
sub check_match {
# See if the current line matches the current vertical alignment group.
my ( $self, $new_line, $base_line, $prev_line ) = @_;
# Given:
# $new_line = the line being considered for group inclusion
# $base_line = the first line of the current group
# $prev_line = the line just before $new_line
# returns a flag and a value as follows:
# return (0, $imax_align) if the line does not match
# return (1, $imax_align) if the line matches but does not fit
# return (2, $imax_align) if the line matches and fits
# Returns '$imax_align' which is the index of the maximum matching token.
# It will be used in the subsequent left-to-right sweep to align as many
# tokens as possible for lines which partially match.
my $imax_align = -1;
# variable $GoToMsg explains reason for no match, for debugging
my $GoToMsg = "";
use constant EXPLAIN_CHECK_MATCH => 0;
# This is a flag for testing alignment by sub sweep_left_to_right only.
# This test can help find problems with the alignment logic.
# This flag should normally be zero.
use constant TEST_SWEEP_ONLY => 0;
my $jmax = $new_line->get_jmax();
my $maximum_field_index = $base_line->get_jmax();
my $jlimit = $jmax - 2;
if ( $jmax > $maximum_field_index ) {
$jlimit = $maximum_field_index - 2;
}
if ( $new_line->get_is_hanging_side_comment() ) {
# HSC's can join the group if they fit
}
# Everything else
else {
# A group with hanging side comments ends with the first non hanging
# side comment.
if ( $base_line->get_is_hanging_side_comment() ) {
$GoToMsg = "end of hanging side comments";
goto NO_MATCH;
}
# The number of tokens that this line shares with the previous line
# has been stored with the previous line. This value was calculated
# and stored by sub 'match_line_pair'.
$imax_align = $prev_line->get_imax_pair();
if ( $imax_align != $jlimit ) {
$GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
goto NO_MATCH;
}
}
# The tokens match, but the lines must have identical number of
# tokens to join the group.
if ( $maximum_field_index != $jmax ) {
$GoToMsg = "token count differs";
goto NO_MATCH;
}
# The tokens match. Now See if there is space for this line in the
# current group.
if ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY ) {
EXPLAIN_CHECK_MATCH
&& print "match and fit, imax_align=$imax_align, jmax=$jmax\n";
return ( 2, $jlimit );
}
else {
EXPLAIN_CHECK_MATCH
&& print "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
return ( 1, $jlimit );
}
NO_MATCH:
EXPLAIN_CHECK_MATCH
&& print
"no match because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
return ( 0, $imax_align );
}
sub check_fit {
my ( $self, $new_line, $old_line ) = @_;
# The new line has alignments identical to the current group. Now we have
# to fit the new line into the group without causing a field to exceed the
# line length limit.
# return true if successful
# return false if not successful
my $jmax = $new_line->get_jmax();
my $leading_space_count = $new_line->get_leading_space_count();
my $rfield_lengths = $new_line->get_rfield_lengths();
my $padding_available = $old_line->get_available_space_on_right();
my $jmax_old = $old_line->get_jmax();
# Safety check ... only lines with equal array sizes should arrive here
# from sub check_match. So if this error occurs, look at recent changes in
# sub check_match. It is only supposed to check the fit of lines with
# identical numbers of alignment tokens.
if ( $jmax_old ne $jmax ) {
$self->warning(<<EOM);
Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
unexpected difference in array lengths: $jmax != $jmax_old
EOM
return;
}
# Save current columns in case this line does not fit.
my @alignments = $old_line->get_alignments();
foreach my $alignment (@alignments) {
$alignment->save_column();
}
my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
# Loop over all alignments ...
my $maximum_field_index = $old_line->get_jmax();
for my $j ( 0 .. $jmax ) {
my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
if ( $j == 0 ) {
$pad += $leading_space_count;
}
# Keep going if this field does not need any space.
next if $pad < 0;
# See if it needs too much space.
if ( $pad > $padding_available ) {
################################################
# Line does not fit -- revert to starting state
################################################
foreach my $alignment (@alignments) {
$alignment->restore_column();
}
return;
}
# make room for this field
$old_line->increase_field_width( $j, $pad );
$padding_available -= $pad;
}
######################################
# The line fits, the match is accepted
######################################
return 1;
}
sub install_new_alignments {
my ($new_line) = @_;
my $jmax = $new_line->get_jmax();
my $rfield_lengths = $new_line->get_rfield_lengths();
my $col = $new_line->get_leading_space_count();
for my $j ( 0 .. $jmax ) {
$col += $rfield_lengths->[$j];
# create initial alignments for the new group
my $alignment =
Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
$new_line->set_alignment( $j, $alignment );
}
return;
}
sub copy_old_alignments {
my ( $new_line, $old_line ) = @_;
my @new_alignments = $old_line->get_alignments();
$new_line->set_alignments(@new_alignments);
return;
}
sub dump_array {
# debug routine to dump array contents
local $" = ')(';
print STDOUT "(@_)\n";
return;
}
sub level_change {
# compute decrease in level when we remove $diff spaces from the
# leading spaces
my ( $self, $leading_space_count, $diff, $level ) = @_;
my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
if ($rOpts_indent_columns) {
my $olev =
int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
my $nlev = int( $leading_space_count / $rOpts_indent_columns );
$level -= ( $olev - $nlev );
if ( $level < 0 ) { $level = 0 }
}
return $level;
}
###############################################
# CODE SECTION 4: Code to process comment lines
###############################################
sub _flush_comment_lines {
# Output a group consisting of COMMENT lines
my ($self) = @_;
my $rgroup_lines = $self->[_rgroup_lines_];
return unless ( @{$rgroup_lines} );
my $group_level = $self->[_group_level_];
my $leading_space_count = $self->[_comment_leading_space_count_];
my $leading_string =
$self->get_leading_string( $leading_space_count, $group_level );
# look for excessively long lines
my $max_excess = 0;
foreach my $item ( @{$rgroup_lines} ) {
my ( $str, $str_len ) = @{$item};
my $excess =
$str_len +
$leading_space_count -
$self->maximum_line_length_for_level($group_level);
if ( $excess > $max_excess ) {
$max_excess = $excess;
}
}
# zero leading space count if any lines are too long
if ( $max_excess > 0 ) {
$leading_space_count -= $max_excess;
if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
my $file_writer_object = $self->[_file_writer_object_];
my $last_outdented_line_at =
$file_writer_object->get_output_line_number();
$self->[_last_outdented_line_at_] = $last_outdented_line_at;
my $outdented_line_count = $self->[_outdented_line_count_];
unless ($outdented_line_count) {
$self->[_first_outdented_line_at_] = $last_outdented_line_at;
}
my $nlines = @{$rgroup_lines};
$outdented_line_count += $nlines;
$self->[_outdented_line_count_] = $outdented_line_count;
}
# write the lines
my $outdent_long_lines = 0;
foreach my $item ( @{$rgroup_lines} ) {
my ( $line, $line_len, $Kend ) = @{$item};
$self->valign_output_step_B(
{
leading_space_count => $leading_space_count,
line => $line,
line_length => $line_len,
side_comment_length => 0,
outdent_long_lines => $outdent_long_lines,
rvertical_tightness_flags => "",
level => $group_level,
Kend => $Kend,
}
);
}
$self->initialize_for_new_group();
return;
}
######################################################
# CODE SECTION 5: Code to process groups of code lines
######################################################
sub _flush_group_lines {
# This is the vertical aligner internal flush, which leaves the cache
# intact
my ($self) = @_;
my $rgroup_lines = $self->[_rgroup_lines_];
return unless ( @{$rgroup_lines} );
my $group_type = $self->[_group_type_];
my $group_level = $self->[_group_level_];
# Debug
0 && do {
my ( $a, $b, $c ) = caller();
my $nlines = @{$rgroup_lines};
print STDOUT
"APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
};
############################################
# Section 1: Handle a group of COMMENT lines
############################################
if ( $group_type eq 'COMMENT' ) {
$self->_flush_comment_lines();
return;
}
#########################################################################
# Section 2: Handle line(s) of CODE. Most of the actual work of vertical
# aligning happens here in the following steps:
#########################################################################
# STEP 1: Remove most unmatched tokens. They block good alignments.
my ( $max_lev_diff, $saw_side_comment ) =
delete_unmatched_tokens( $rgroup_lines, $group_level );
# STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
# matching common alignments. The indexes of these subgroups are in the
# return variable.
my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
# STEP 3: Sweep left to right through the lines, looking for leading
# alignment tokens shared by groups.
sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
if ( @{$rgroups} > 1 );
# STEP 4: Move side comments to a common column if possible.
if ($saw_side_comment) {
$self->align_side_comments( $rgroup_lines, $rgroups );
}
# STEP 5: For the -lp option, increase the indentation of lists
# to the desired amount, but do not exceed the line length limit.
my $extra_leading_spaces =
$self->[_extra_indent_ok_]
? get_extra_leading_spaces( $rgroup_lines, $rgroups )
: 0;
# STEP 6: Output the lines.
# All lines in this batch have the same basic leading spacing:
my $group_leader_length = $rgroup_lines->[0]->get_leading_space_count();
foreach my $line ( @{$rgroup_lines} ) {
$self->valign_output_step_A(
{
line => $line,
min_ci_gap => 0,
do_not_align => 0,
group_leader_length => $group_leader_length,
extra_leading_spaces => $extra_leading_spaces,
level => $group_level,
}
);
}
$self->initialize_for_new_group();
return;
}
{ ## closure for sub sweep_top_down
my $rall_lines; # all of the lines
my $grp_level; # level of all lines
my $rgroups; # describes the partition of lines we will make here
my $group_line_count; # number of lines in current partition
BEGIN { $rgroups = [] }
sub initialize_for_new_rgroup {
$group_line_count = 0;
return;
}
sub add_to_rgroup {
my ($jend) = @_;
my $rline = $rall_lines->[$jend];
my $jbeg = $jend;
if ( $group_line_count == 0 ) {
install_new_alignments($rline);
}
else {
my $rvals = pop @{$rgroups};
$jbeg = $rvals->[0];
copy_old_alignments( $rline, $rall_lines->[$jbeg] );
}
push @{$rgroups}, [ $jbeg, $jend, undef ];
$group_line_count++;
return;
}
sub get_rgroup_jrange {
return unless @{$rgroups};
return unless ( $group_line_count > 0 );
my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
return ( $jbeg, $jend );
}
sub end_rgroup {
my ($imax_align) = @_;
return unless @{$rgroups};
return unless ( $group_line_count > 0 );
my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
# Undo some alignments of poor two-line combinations.
# We had to wait until now to know the line count.
if ( $jend - $jbeg == 1 ) {
my $line_0 = $rall_lines->[$jbeg];
my $line_1 = $rall_lines->[$jend];
my $imax_pair = $line_1->get_imax_pair();
if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
## flag for possible future use:
## my $is_isolated_pair = $imax_pair < 0
## && ( $jbeg == 0
## || $rall_lines->[ $jbeg - 1 ]->get_imax_pair() < 0 );
my $imax_prev =
$jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->get_imax_pair() : -1;
my ( $is_marginal, $imax_align_fix ) =
is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
$imax_prev );
if ($is_marginal) {
combine_fields( $line_0, $line_1, $imax_align_fix );
}
}
initialize_for_new_rgroup();
return;
}
sub block_penultimate_match {
# emergency reset to prevent sweep_left_to_right from trying to match a
# failed terminal else match
return unless @{$rgroups} > 1;
$rgroups->[-2]->[2] = -1;
return;
}
sub sweep_top_down {
my ( $self, $rlines, $group_level ) = @_;
# Partition the set of lines into final alignment subgroups
# and store the alignments with the lines.
# The alignment subgroups we are making here are groups of consecutive
# lines which have (1) identical alignment tokens and (2) do not
# exceed the allowable maximum line length. A later sweep from
# left-to-right ('sweep_lr') will handle additional alignments.
# transfer args to closure variables
$rall_lines = $rlines;
$grp_level = $group_level;
$rgroups = [];
initialize_for_new_rgroup();
return unless @{$rlines}; # shouldn't happen
# Unset the _end_group flag for the last line if it it set because it
# is not needed and can causes problems for -lp formatting
$rall_lines->[-1]->set_end_group(0);
# Loop over all lines ...
my $jline = -1;
foreach my $new_line ( @{$rall_lines} ) {
$jline++;
# Start a new subgroup if necessary
if ( !$group_line_count ) {
add_to_rgroup($jline);
if ( $new_line->get_end_group() ) {
end_rgroup(-1);
}
next;
}
my $j_terminal_match = $new_line->get_j_terminal_match();
my ( $jbeg, $jend ) = get_rgroup_jrange();
if ( !defined($jbeg) ) {
# safety check, shouldn't happen
$self->warning(<<EOM);
Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
undefined index for group line count $group_line_count
EOM
$jbeg = $jline;
}
my $base_line = $rall_lines->[$jbeg];
# Initialize a global flag saying if the last line of the group
# should match end of group and also terminate the group. There
# should be no returns between here and where the flag is handled
# at the bottom.
my $col_matching_terminal = 0;
if ( defined($j_terminal_match) ) {
# remember the column of the terminal ? or { to match with
$col_matching_terminal =
$base_line->get_column($j_terminal_match);
# Ignore an undefined value as a defensive step; shouldn't
# normally happen.
$col_matching_terminal = 0
unless defined($col_matching_terminal);
}
# -------------------------------------------------------------
# Allow hanging side comment to join current group, if any. The
# only advantage is to keep the other tokens in the same group. For
# example, this would make the '=' align here:
# $ax = 1; # side comment
# # hanging side comment
# $boondoggle = 5; # side comment
# $beetle = 5; # side comment
# here is another example..
# _rtoc_name_count => {}, # hash to track ..
# _rpackage_stack => [], # stack to check ..
# # name changes
# _rlast_level => \$last_level, # brace indentation
#
#
# If this were not desired, the next step could be skipped.
# -------------------------------------------------------------
if ( $new_line->get_is_hanging_side_comment() ) {
join_hanging_comment( $new_line, $base_line );
}
# If this line has no matching tokens, then flush out the lines
# BEFORE this line unless both it and the previous line have side
# comments. This prevents this line from pushing side coments out
# to the right.
elsif ( $new_line->get_jmax() == 1 ) {
# There are no matching tokens, so now check side comments.
# Programming note: accessing arrays with index -1 is
# risky in Perl, but we have verified there is at least one
# line in the group and that there is at least one field.
my $prev_comment =
$rall_lines->[ $jline - 1 ]->get_rfields()->[-1];
my $side_comment = $new_line->get_rfields()->[-1];
end_rgroup(-1) unless ( $side_comment && $prev_comment );
}
# See if the new line matches and fits the current group,
# if it still exists. Flush the current group if not.
my $match_code;
if ($group_line_count) {
( $match_code, my $imax_align ) =
$self->check_match( $new_line, $base_line,
$rall_lines->[ $jline - 1 ] );
if ( $match_code != 2 ) { end_rgroup($imax_align) }
}
# Store the new line
add_to_rgroup($jline);
if ( defined($j_terminal_match) ) {
# Decide if we should fix a terminal match. We can either:
# 1. fix it and prevent the sweep_lr from changing it, or
# 2. leave it alone and let sweep_lr try to fix it.
# The current logic is to fix it if:
# -it has not joined to previous lines,
# -and either the previous subgroup has just 1 line, or
# -this line matched but did not fit (so sweep won't work)
my $fixit;
if ( $group_line_count == 1 ) {
$fixit ||= $match_code;
if ( !$fixit ) {
if ( @{$rgroups} > 1 ) {
my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
my $nlines = $jendx - $jbegx + 1;
$fixit ||= $nlines <= 1;
}
}
}
if ($fixit) {
$base_line = $new_line;
my $col_now = $base_line->get_column($j_terminal_match);
# Ignore an undefined value as a defensive step; shouldn't
# normally happen.
$col_now = 0 unless defined($col_now);
my $pad = $col_matching_terminal - $col_now;
my $padding_available =
$base_line->get_available_space_on_right();
if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
$base_line->increase_field_width( $j_terminal_match,
$pad );
}
# do not let sweep_left_to_right change an isolated 'else'
if ( !$new_line->get_is_terminal_ternary() ) {
block_penultimate_match();
}
}
end_rgroup(-1);
}
# end the group if we know we cannot match next line.
elsif ( $new_line->get_end_group() ) {
end_rgroup(-1);
}
} ## end loop over lines
end_rgroup(-1);
return ($rgroups);
}
}
sub two_line_pad {
my ( $line_m, $line, $imax_min ) = @_;
# Given:
# two isolated (list) lines
# imax_min = number of common alignment tokens
# Return:
# $pad_max = maximum suggested pad distnce
# = 0 if alignment not recommended
# Note that this is only for two lines which do not have alignment tokens
# in common with any other lines. It is intended for lists, but it might
# also be used for two non-list lines with a common leading '='.
# Allow alignment if the difference in the two unpadded line lengths
# is not more than either line length. The idea is to avoid
# aligning lines with very different field lengths, like these two:
# [
# 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
# 1, 0, 0, 0, undef, 0, 0
# ];
my $rfield_lengths = $line->get_rfield_lengths();
my $rfield_lengths_m = $line_m->get_rfield_lengths();
# Safety check - shouldn't happen
return 0
unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
my $lensum_m = 0;
my $lensum = 0;
for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
$lensum_m += $rfield_lengths_m->[$i];
$lensum += $rfield_lengths->[$i];
}
my ( $lenmin, $lenmax ) =
$lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
my $patterns_match;
if ( $line_m->get_list_type() && $line->get_list_type() ) {
$patterns_match = 1;
my $rpatterns_m = $line_m->get_rpatterns();
my $rpatterns = $line->get_rpatterns();
for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
my $pat = $rpatterns->[$i];
my $pat_m = $rpatterns_m->[$i];
if ( $pat ne $pat_m ) { $patterns_match = 0; last }
}
}
my $pad_max = $lenmax;
if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
return $pad_max;
}
sub sweep_left_to_right {
my ( $rlines, $rgroups, $group_level ) = @_;
# So far we have divided the lines into groups having an equal number of
# identical alignments. Here we are going to look for common leading
# alignments between the different groups and align them when possible.
# For example, the three lines below are in three groups because each line
# has a different number of commas. In this routine we will sweep from
# left to right, aligning the leading commas as we go, but stopping if we
# hit the line length limit.
# my ( $num, $numi, $numj, $xyza, $ka, $xyzb, $kb, $aff, $error );
# my ( $i, $j, $error, $aff, $asum, $avec );
# my ( $km, $area, $varea );
# nothing to do if just one group
my $ng_max = @{$rgroups} - 1;
return unless ( $ng_max > 0 );
############################################################################
# Step 1: Loop over groups to find all common leading alignment tokens
############################################################################
my $line;
my $rtokens;
my $imax; # index of maximum non-side-comment alignment token
my $istop; # an optional stopping index
my $jbeg; # starting line index
my $jend; # ending line index
my $line_m;
my $rtokens_m;
my $imax_m;
my $istop_m;
my $jbeg_m;
my $jend_m;
my $istop_mm;
# Look at neighboring pairs of groups and form a simple list
# of all common leading alignment tokens. Foreach such match we
# store [$i, $ng], where
# $i = index of the token in the line (0,1,...)
# $ng is the second of the two groups with this common token
my @icommon;
# Hash to hold the maximum alignment change for any group
my %max_move;
# a small number of columns
my $short_pad = 4;
my $ng = -1;
foreach my $item ( @{$rgroups} ) {
$ng++;
$istop_mm = $istop_m;
# save _m values of previous group
$line_m = $line;
$rtokens_m = $rtokens;
$imax_m = $imax;
$istop_m = $istop;
$jbeg_m = $jbeg;
$jend_m = $jend;
# Get values for this group. Note that we just have to use values for
# one of the lines of the group since all members have the same
# alignments.
( $jbeg, $jend, $istop ) = @{$item};
$line = $rlines->[$jbeg];
$rtokens = $line->get_rtokens();
$imax = $line->get_jmax() - 2;
$istop = -1 unless ( defined($istop) );
$istop = $imax if ( $istop > $imax );
# Initialize on first group
next if ( $ng == 0 );
# Use the minimum index limit of the two groups
my $imax_min = $imax > $imax_m ? $imax_m : $imax;
# Also impose a limit if given.
if ( $istop_m < $imax_min ) {
$imax_min = $istop_m;
}
# Special treatment of two one-line groups isolated from other lines,
# unless they form a simple list or a terminal match. Otherwise the
# alignment can look strange in some cases.
my $list_type = $rlines->[$jbeg]->get_list_type();
if (
$jend == $jbeg
&& $jend_m == $jbeg_m
&& ( $ng == 1 || $istop_mm < 0 )
&& ( $ng == $ng_max || $istop < 0 )
&& !$line->get_j_terminal_match()
# Only do this for imperfect matches. This is normally true except
# when two perfect matches cannot form a group because the line
# length limit would be exceeded. In that case we can still try
# to match as many alignments as possible.
&& ( $imax != $imax_m || $istop_m != $imax_m )
)
{
# We will just align assignments and simple lists
next unless ( $imax_min >= 0 );
next
unless ( $rtokens->[0] =~ /^=\d/
|| $list_type );
# In this case we will limit padding to a short distance. This
# is a compromise to keep some vertical alignment but prevent large
# gaps, which do not look good for just two lines.
my $pad_max =
two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
next unless ($pad_max);
my $ng_m = $ng - 1;
$max_move{"$ng_m"} = $pad_max;
$max_move{"$ng"} = $pad_max;
}
# Loop to find all common leading tokens.
if ( $imax_min >= 0 ) {
foreach my $i ( 0 .. $imax_min ) {
my $tok = $rtokens->[$i];
my $tok_m = $rtokens_m->[$i];
last if ( $tok ne $tok_m );
push @icommon, [ $i, $ng, $tok ];
}
}
}
return unless @icommon;
###########################################################
# Step 2: Reorder and consolidate the list into a task list
###########################################################
# We have to work first from lowest token index to highest, then by group,
# sort our list first on token index then group number
@icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
# Make a task list of the form
# [$i, ng_beg, $ng_end, $tok], ..
# where
# $i is the index of the token to be aligned
# $ng_beg..$ng_end is the group range for this action
my @todo;
my ( $i, $ng_end, $tok );
foreach my $item (@icommon) {
my $ng_last = $ng_end;
my $i_last = $i;
( $i, $ng_end, $tok ) = @{$item};
my $ng_beg = $ng_end - 1;
if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
my $var = pop(@todo);
$ng_beg = $var->[1];
}
my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
}
###############################
# Step 3: Execute the task list
###############################
do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
$group_level );
return;
}
{ ## closure for sub do_left_to_right_sweep
my %is_good_alignment_token;
BEGIN {
# One of the most difficult aspects of vertical alignment is knowing
# when not to align. Alignment can go from looking very nice to very
# bad when overdone. In the sweep algorithm there are two special
# cases where we may need to limit padding to a '$short_pad' distance
# to avoid some very ugly formatting:
# 1. Two isolated lines with partial alignment
# 2. A 'tail-wag-dog' situation, in which a single terminal
# line with partial alignment could cause a significant pad
# increase in many previous lines if allowed to join the alignment.
# For most alignment tokens, we will allow only a small pad to be
# introduced (the hardwired $short_pad variable) . But for some 'good'
# alignments we can be less restrictive.
# These are 'good' alignments, which are allowed more padding:
my @q = qw(
=> = ? if unless or || {
);
push @q, ',';
@is_good_alignment_token{@q} = (0) x scalar(@q);
# Promote a few of these to 'best', with essentially no pad limit:
$is_good_alignment_token{'='} = 1;
$is_good_alignment_token{'if'} = 1;
$is_good_alignment_token{'unless'} = 1;
$is_good_alignment_token{'=>'} = 1
# Note the hash values are set so that:
# if ($is_good_alignment_token{$raw_tok}) => best
# if defined ($is_good_alignment_token{$raw_tok}) => good or best
}
sub do_left_to_right_sweep {
my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
= @_;
# $blocking_level[$nj is the level at a match failure between groups
# $ng-1 and $ng
my @blocking_level;
my $group_list_type = $rlines->[0]->get_list_type();
my $move_to_common_column = sub {
# Move the alignment column of token $itok to $col_want for a
# sequence of groups.
my ( $ngb, $nge, $itok, $col_want, $raw_tok ) = @_;
return unless ( defined($ngb) && $nge > $ngb );
foreach my $ng ( $ngb .. $nge ) {
my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
my $line = $rlines->[$jbeg];
my $col = $line->get_column($itok);
my $avail = $line->get_available_space_on_right();
my $move = $col_want - $col;
if ( $move > 0 ) {
# limit padding increase in isolated two lines
next
if ( defined( $rmax_move->{$ng} )
&& $move > $rmax_move->{$ng}
&& !$is_good_alignment_token{$raw_tok} );
$line->increase_field_width( $itok, $move );
}
elsif ( $move < 0 ) {
# spot to take special action on failure to move
}
}
};
foreach my $task ( @{$rtodo} ) {
my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
# Nothing to do for a single group
next unless ( $ng_end > $ng_beg );
my $ng_first; # index of the first group of a continuous sequence
my $col_want; # the common alignment column of a sequence of groups
my $col_limit; # maximum column before bumping into max line length
my $line_count_ng_m = 0;
my $jmax_m;
my $it_stop_m;
# Loop over the groups
# 'ix_' = index in the array of lines
# 'ng_' = index in the array of groups
# 'it_' = index in the array of tokens
my $ix_min = $rgroups->[$ng_beg]->[0];
my $ix_max = $rgroups->[$ng_end]->[1];
my $lines_total = $ix_max - $ix_min + 1;
foreach my $ng ( $ng_beg .. $ng_end ) {
my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
my $line_count_ng = $ix_end - $ix_beg + 1;
# Important: note that since all lines in a group have a common
# alignments object, we just have to work on one of the lines
# (the first line). All of the rest will be changed
# automatically.
my $line = $rlines->[$ix_beg];
my $jmax = $line->get_jmax();
# the maximum space without exceeding the line length:
my $avail = $line->get_available_space_on_right();
my $col = $line->get_column($itok);
my $col_max = $col + $avail;
# Initialize on first group
if ( !defined($col_want) ) {
$ng_first = $ng;
$col_want = $col;
$col_limit = $col_max;
$line_count_ng_m = $line_count_ng;
$jmax_m = $jmax;
$it_stop_m = $it_stop;
next;
}
# RULE: Throw a blocking flag upon encountering a token level
# different from the level of the first blocking token. For
# example, in the following example, if the = matches get
# blocked between two groups as shown, then we want to start
# blocking matches at the commas, which are at deeper level, so
# that we do not get the big gaps shown here:
# my $unknown3 = pack( "v", -2 );
# my $unknown4 = pack( "v", 0x09 );
# my $unknown5 = pack( "VVV", 0x06, 0x00, 0x00 );
# my $num_bbd_blocks = pack( "V", $num_lists );
# my $root_startblock = pack( "V", $root_start );
# my $unknown6 = pack( "VV", 0x00, 0x1000 );
# On the other hand, it is okay to keep matching at the same
# level such as in a simple list of commas and/or fat arrors.
my $is_blocked = defined( $blocking_level[$ng] )
&& $lev > $blocking_level[$ng];
# TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
# Do not let one or two lines with a **different number of
# alignments** open up a big gap in a large block. For
# example, we will prevent something like this, where the first
# line prys open the rest:
# $worksheet->write( "B7", "http://www.perl.com", undef, $format );
# $worksheet->write( "C7", "", $format );
# $worksheet->write( "D7", "", $format );
# $worksheet->write( "D8", "", $format );
# $worksheet->write( "D8", "", $format );
# We should exclude from consideration two groups which are
# effectively the same but separated because one does not
# fit in the maximum allowed line length.
my $is_same_group =
$jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
my $lines_above = $ix_beg - $ix_min;
my $lines_below = $lines_total - $lines_above;
# Increase the tolerable gap for certain favorable factors
my $factor = 1;
my $top_level = $lev == $group_level;
# Align best top level alignment tokens like '=', 'if', ...
# A factor of 10 allows a gap of up to 40 spaces
if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
$factor = 10;
}
# Otherwise allow some minimal padding of good alignments
elsif (
defined( $is_good_alignment_token{$raw_tok} )
# We have to be careful if there are just 2 lines. This
# two-line factor allows large gaps only for 2 lines which
# are simple lists with fewer items on the second line. It
# gives results similar to previous versions of perltidy.
&& ( $lines_total > 2
|| $group_list_type && $jmax < $jmax_m && $top_level )
)
{
$factor += 1;
if ($top_level) {
$factor += 1;
}
}
my $is_big_gap;
if ( !$is_same_group ) {
$is_big_gap ||=
( $lines_above == 1
|| $lines_above == 2 && $lines_below >= 4 )
&& $col_want > $col + $short_pad * $factor;
$is_big_gap ||=
( $lines_below == 1
|| $lines_below == 2 && $lines_above >= 4 )
&& $col > $col_want + $short_pad * $factor;
}
# if match is limited by gap size, stop aligning at this level
if ($is_big_gap) {
$blocking_level[$ng] = $lev - 1;
}
# quit and restart if it cannot join this batch
if ( $col_want > $col_max
|| $col > $col_limit
|| $is_big_gap
|| $is_blocked )
{
# remember the level of the first blocking token
if ( !defined( $blocking_level[$ng] ) ) {
$blocking_level[$ng] = $lev;
}
$move_to_common_column->(
$ng_first, $ng - 1, $itok, $col_want, $raw_tok
);
$ng_first = $ng;
$col_want = $col;
$col_limit = $col_max;
$line_count_ng_m = $line_count_ng;
$jmax_m = $jmax;
$it_stop_m = $it_stop;
next;
}
$line_count_ng_m += $line_count_ng;
# update the common column and limit
if ( $col > $col_want ) { $col_want = $col }
if ( $col_max < $col_limit ) { $col_limit = $col_max }
} ## end loop over groups
if ( $ng_end > $ng_first ) {
$move_to_common_column->(
$ng_first, $ng_end, $itok, $col_want, $raw_tok
);
} ## end loop over groups for one task
} ## end loop over tasks
return;
}
}
sub delete_selected_tokens {
my ( $line_obj, $ridel ) = @_;
# $line_obj is the line to be modified
# $ridel is a ref to list of indexes to be deleted
# remove an unused alignment token(s) to improve alignment chances
return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
my $jmax_old = $line_obj->get_jmax();
my $rfields_old = $line_obj->get_rfields();
my $rfield_lengths_old = $line_obj->get_rfield_lengths();
my $rpatterns_old = $line_obj->get_rpatterns();
my $rtokens_old = $line_obj->get_rtokens();
my $j_terminal_match = $line_obj->get_j_terminal_match();
use constant EXPLAIN_DELETE_SELECTED => 0;
local $" = '> <';
EXPLAIN_DELETE_SELECTED && print <<EOM;
delete indexes: <@{$ridel}>
old jmax: $jmax_old
old tokens: <@{$rtokens_old}>
old patterns: <@{$rpatterns_old}>
old fields: <@{$rfields_old}>
old field_lengths: <@{$rfield_lengths_old}>
EOM
my $rfields_new = [];
my $rpatterns_new = [];
my $rtokens_new = [];
my $rfield_lengths_new = [];
# Convert deletion list to a hash to allow any order, multiple entries,
# and avoid problems with index values out of range
my %delete_me;
@delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
my $pattern = $rpatterns_old->[0];
my $field = $rfields_old->[0];
my $field_length = $rfield_lengths_old->[0];
push @{$rfields_new}, $field;
push @{$rfield_lengths_new}, $field_length;
push @{$rpatterns_new}, $pattern;
# Loop to either copy items or concatenate fields and patterns
my $jmin_del;
for ( my $j = 0 ; $j < $jmax_old ; $j++ ) {
my $token = $rtokens_old->[$j];
my $field = $rfields_old->[ $j + 1 ];
my $field_length = $rfield_lengths_old->[ $j + 1 ];
my $pattern = $rpatterns_old->[ $j + 1 ];
if ( !$delete_me{$j} ) {
push @{$rtokens_new}, $token;
push @{$rfields_new}, $field;
push @{$rpatterns_new}, $pattern;
push @{$rfield_lengths_new}, $field_length;
}
else {
if ( !defined($jmin_del) ) { $jmin_del = $j }
$rfields_new->[-1] .= $field;
$rfield_lengths_new->[-1] += $field_length;
$rpatterns_new->[-1] .= $pattern;
}
}
# ----- x ------ x ------ x ------
#t 0 1 2 <- token indexing
#f 0 1 2 3 <- field and pattern
my $jmax_new = @{$rfields_new} - 1;
$line_obj->set_rtokens($rtokens_new);
$line_obj->set_rpatterns($rpatterns_new);
$line_obj->set_rfields($rfields_new);
$line_obj->set_rfield_lengths($rfield_lengths_new);
$line_obj->set_jmax($jmax_new);
# The value of j_terminal_match will be incorrect if we delete tokens prior
# to it. We will have to give up on aligning the terminal tokens if this
# happens.
if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
$line_obj->set_j_terminal_match(undef);
}
# update list type -
if ( $line_obj->get_list_seqno() ) {
## This works, but for efficiency see if we need to make a change:
## decide_if_list($line_obj);
# An existing list will still be a list but with possibly different
# leading token
my $old_list_type = $line_obj->get_list_type();
my $new_list_type = "";
if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
$new_list_type = $rtokens_new->[0];
}
if ( !$old_list_type || $old_list_type ne $new_list_type ) {
decide_if_list($line_obj);
}
}
EXPLAIN_DELETE_SELECTED && print <<EOM;
new jmax: $jmax_new
new tokens: <@{$rtokens_new}>
new patterns: <@{$rpatterns_new}>
new fields: <@{$rfields_new}>
EOM
return;
}
{ ## closure for sub decode_alignment_token
# This routine is called repeatedly for each token, so it needs to be
# efficient. We can speed things up by remembering the inputs and outputs
# in a hash.
my %decoded_token;
sub initialize_decode {
# We will re-initialize the hash for each file. Otherwise, there is
# a danger that the hash can become arbitrarily large if a very large
# number of files is processed at once.
%decoded_token = ();
return;
}
sub decode_alignment_token {
# Unpack the values packed in an alignment token
#
# Usage:
# my ( $raw_tok, $lev, $tag, $tok_count ) =
# decode_alignment_token($token);
# Alignment tokens have a trailing decimal level and optional tag (for
# commas):
# For example, the first comma in the following line
# sub banner { crlf; report( shift, '/', shift ); crlf }
# is decorated as follows:
# ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6)
# An optional token count may be appended with a leading dot.
# Currently this is only done for '=' tokens but this could change.
# For example, consider the following line:
# $nport = $port = shift || $name;
# The first '=' may either be '=0' or '=0.1' [level 0, first equals]
# The second '=' will be '=0.2' [level 0, second equals]
my ($tok) = @_;
if ( defined( $decoded_token{$tok} ) ) {
return @{ $decoded_token{$tok} };
}
my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 );
if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
$raw_tok = $1;
$lev = $2;
$tag = $3 if ($3);
$tok_count = $5 if ($5);
}
my @vals = ( $raw_tok, $lev, $tag, $tok_count );
$decoded_token{$tok} = \@vals;
return @vals;
}
}
{ ## closure for sub delete_unmatched_tokens
my %is_assignment;
my %keep_after_deleted_assignment;
BEGIN {
my @q;
@q = qw(
= **= += *= &= <<= &&=
-= /= |= >>= ||= //=
.= %= ^=
x=
);
@is_assignment{@q} = (1) x scalar(@q);
# These tokens may be kept following an = deletion
@q = qw(
if unless or ||
);
@keep_after_deleted_assignment{@q} = (1) x scalar(@q);
}
# This flag is for testing only and should normally be zero.
use constant TEST_DELETE_NULL => 0;
sub delete_unmatched_tokens {
my ( $rlines, $group_level ) = @_;
# This is a preliminary step in vertical alignment in which we remove
# as many obviously un-needed alignment tokens as possible. This will
# prevent them from interfering with the final alignment.
# These are the return values
my $max_lev_diff = 0; # used to avoid a call to prune_tree
my $saw_side_comment = 0; # used to avoid a call for side comments
# Handle no lines -- shouldn't happen
return unless @{$rlines};
# Handle a single line
if ( @{$rlines} == 1 ) {
my $line = $rlines->[0];
my $jmax = $line->get_jmax();
my $length = $line->get_rfield_lengths()->[$jmax];
$saw_side_comment = $length > 0;
return ( $max_lev_diff, $saw_side_comment );
}
my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
# ignore hanging side comments in these operations
my @filtered = grep { !$_->get_is_hanging_side_comment() } @{$rlines};
my $rnew_lines = \@filtered;
$saw_side_comment = @filtered != @{$rlines};
$max_lev_diff = 0;
# nothing to do if all lines were hanging side comments
my $jmax = @{$rnew_lines} - 1;
return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
my @equals_info;
my @line_info;
my %is_good_tok;
# create a hash of tokens for each line
my $rline_hashes = [];
foreach my $line ( @{$rnew_lines} ) {
my $rhash = {};
my $rtokens = $line->get_rtokens();
my $rpatterns = $line->get_rpatterns();
my $i = 0;
my ( $i_eq, $tok_eq, $pat_eq );
my ( $lev_min, $lev_max );
foreach my $tok ( @{$rtokens} ) {
my ( $raw_tok, $lev, $tag, $tok_count ) =
decode_alignment_token($tok);
if ( $tok ne '#' ) {
if ( !defined($lev_min) ) {
$lev_min = $lev;
$lev_max = $lev;
}
else {
if ( $lev < $lev_min ) { $lev_min = $lev }
if ( $lev > $lev_max ) { $lev_max = $lev }
}
}
else {
if ( !$saw_side_comment ) {
my $length = $line->get_rfield_lengths()->[ $i + 1 ];
$saw_side_comment ||= $length;
}
}
# Possible future upgrade: for multiple matches,
# record [$i1, $i2, ..] instead of $i
$rhash->{$tok} =
[ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
# remember the first equals at line level
if ( !defined($i_eq) && $raw_tok eq '=' ) {
if ( $lev eq $group_level ) {
$i_eq = $i;
$tok_eq = $tok;
$pat_eq = $rpatterns->[$i];
}
}
$i++;
}
push @{$rline_hashes}, $rhash;
push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
push @line_info, [ $lev_min, $lev_max ];
if ( defined($lev_min) ) {
my $lev_diff = $lev_max - $lev_min;
if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
}
}
# compare each line pair and record matches
my $rtok_hash = {};
my $nr = 0;
for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
my $nl = $nr;
$nr = 0;
my $jr = $jl + 1;
my $rhash_l = $rline_hashes->[$jl];
my $rhash_r = $rline_hashes->[$jr];
my $count = 0; # UNUSED NOW?
my $ntoks = 0;
foreach my $tok ( keys %{$rhash_l} ) {
$ntoks++;
if ( defined( $rhash_r->{$tok} ) ) {
if ( $tok ne '#' ) { $count++; }
my $il = $rhash_l->{$tok}->[0];
my $ir = $rhash_r->{$tok}->[0];
$rhash_l->{$tok}->[2] = $ir;
$rhash_r->{$tok}->[1] = $il;
if ( $tok ne '#' ) {
push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
$nr++;
}
}
}
# Set a line break if no matching tokens between these lines
# (this is not strictly necessary now but does not hurt)
if ( $nr == 0 && $nl > 0 ) {
$rnew_lines->[$jl]->set_end_group(1);
}
# Also set a line break if both lines have simple equals but with
# different leading characters in patterns. This check is similar
# to one in sub check_match, and will prevent sub
# prune_alignment_tree from removing alignments which otherwise
# should be kept. This fix is rarely needed, but it can
# occasionally improve formatting.
# For example:
# my $name = $this->{Name};
# $type = $this->ctype($genlooptype) if defined $genlooptype;
# my $declini = ( $asgnonly ? "" : "\t$type *" );
# my $cast = ( $type ? "($type *)" : "" );
# The last two lines start with 'my' and will not match the
# previous line starting with $type, so we do not want
# prune_alignment tree to delete their ? : alignments at a deeper
# level.
my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
if ( defined($i_eq_l) && defined($i_eq_r) ) {
# Also, do not align equals across a change in ci level
my $ci_jump = $rnew_lines->[$jl]->get_ci_level() !=
$rnew_lines->[$jr]->get_ci_level();
if (
$tok_eq_l eq $tok_eq_r
&& $i_eq_l == 0
&& $i_eq_r == 0
&& ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
|| $ci_jump )
)
{
$rnew_lines->[$jl]->set_end_group(1);
}
}
}
# find subgroups
my @subgroups;
push @subgroups, [ 0, $jmax ];
for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
if ( $rnew_lines->[$jl]->get_end_group() ) {
$subgroups[-1]->[1] = $jl;
push @subgroups, [ $jl + 1, $jmax ];
}
}
# flag to allow skipping pass 2
my $saw_large_group;
############################################################
# PASS 1 over subgroups to remove unmatched alignment tokens
############################################################
foreach my $item (@subgroups) {
my ( $jbeg, $jend ) = @{$item};
my $nlines = $jend - $jbeg + 1;
####################################################
# Look for complete if/elsif/else and ternary blocks
####################################################
# We are looking for a common '$dividing_token' like these:
# if ( $b and $s ) { $p->{'type'} = 'a'; }
# elsif ($b) { $p->{'type'} = 'b'; }
# elsif ($s) { $p->{'type'} = 's'; }
# else { $p->{'type'} = ''; }
# ^----------- dividing_token
# my $severity =
# !$routine ? '[PFX]'
# : $routine =~ /warn.*_d\z/ ? '[DS]'
# : $routine =~ /ck_warn/ ? 'W'
# : $routine =~ /ckWARN\d*reg_d/ ? 'S'
# : $routine =~ /ckWARN\d*reg/ ? 'W'
# : $routine =~ /vWARN\d/ ? '[WDS]'
# : '[PFX]';
# ^----------- dividing_token
# Only look for groups which are more than 2 lines long. Two lines
# can get messed up doing this, probably due to the various
# two-line rules.
my $dividing_token;
my %token_line_count;
if ( $nlines > 2 ) {
for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
my %seen;
my $line = $rnew_lines->[$jj];
my $rtokens = $line->get_rtokens();
foreach my $tok ( @{$rtokens} ) {
if ( !$seen{$tok} ) {
$seen{$tok}++;
$token_line_count{$tok}++;
}
}
}
foreach my $tok ( keys %token_line_count ) {
if ( $token_line_count{$tok} == $nlines ) {
if ( substr( $tok, 0, 1 ) eq '?'
|| substr( $tok, 0, 1 ) eq '{'
&& $tok =~ /^\{\d+if/ )
{
$dividing_token = $tok;
last;
}
}
}
}
#####################################################
# Loop over lines to remove unwanted alignment tokens
#####################################################
for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
my $line = $rnew_lines->[$jj];
my $rtokens = $line->get_rtokens();
my $rhash = $rline_hashes->[$jj];
my $i_eq = $equals_info[$jj]->[0];
my @idel;
my $imax = @{$rtokens} - 2;
my $delete_above_level;
my $deleted_assignment_token;
my $saw_dividing_token = "";
$saw_large_group ||= $nlines > 2 && $imax > 1;
# Loop over all alignment tokens
for ( my $i = 0 ; $i <= $imax ; $i++ ) {
my $tok = $rtokens->[$i];
next if ( $tok eq '#' ); # shouldn't happen
my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
@{ $rhash->{$tok} };
#######################################################
# Here is the basic RULE: remove an unmatched alignment
# which does not occur in the surrounding lines.
#######################################################
my $delete_me = !defined($il) && !defined($ir);
# But now we modify this with exceptions...
# EXCEPTION 1: If we are in a complete ternary or
# if/elsif/else group, and this token is not on every line
# of the group, should we delete it to preserve overall
# alignment?
if ($dividing_token) {
if ( $token_line_count{$tok} >= $nlines ) {
$saw_dividing_token ||= $tok eq $dividing_token;
}
else {
# For shorter runs, delete toks to save alignment.
# For longer runs, keep toks after the '{' or '?'
# to allow sub-alignments within braces. The
# number 5 lines is arbitrary but seems to work ok.
$delete_me ||=
( $nlines < 5 || !$saw_dividing_token );
}
}
# EXCEPTION 2: Remove all tokens above a certain level
# following a previous deletion. For example, we have to
# remove tagged higher level alignment tokens following a
# '=>' deletion because the tags of higher level tokens
# will now be incorrect. For example, this will prevent
# aligning commas as follows after deleting the second '=>'
# $w->insert(
# ListBox => origin => [ 270, 160 ],
# size => [ 200, 55 ],
# );
if ( defined($delete_above_level) ) {
if ( $lev > $delete_above_level ) {
$delete_me ||= 1; #$tag;
}
else { $delete_above_level = undef }
}
# EXCEPTION 3: Remove all but certain tokens after an
# assignment deletion.
if (
$deleted_assignment_token
&& ( $lev > $group_level
|| !$keep_after_deleted_assignment{$raw_tok} )
)
{
$delete_me ||= 1;
}
# EXCEPTION 4: Do not touch the first line of a 2 line
# terminal match, such as below, because j_terminal has
# already been set.
# if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
# else { $tago = $tagc = ''; }
# But see snippets 'else1.t' and 'else2.t'
$delete_me = 0
if ( $jj == $jbeg
&& $has_terminal_match
&& $nlines == 2 );
# EXCEPTION 5: misc additional rules for commas and equals
if ($delete_me) {
# okay to delete second and higher copies of a token
if ( $tok_count == 1 ) {
# for a comma...
if ( $raw_tok eq ',' ) {
# Do not delete commas before an equals
$delete_me = 0
if ( defined($i_eq) && $i < $i_eq );
# Do not delete line-level commas
$delete_me = 0 if ( $lev <= $group_level );
}
# For an assignment at group level..
if ( $is_assignment{$raw_tok}
&& $lev == $group_level )
{
# Do not delete if it is the last alignment of
# multiple tokens; this will prevent some
# undesirable alignments
if ( $imax > 0 && $i == $imax ) {
$delete_me = 0;
}
# Otherwise, set a flag to delete most
# remaining tokens
else { $deleted_assignment_token = $raw_tok }
}
}
}
#####################################
# Add this token to the deletion list
#####################################
if ($delete_me) {
push @idel, $i;
# update deletion propagation flags
if ( !defined($delete_above_level)
|| $lev < $delete_above_level )
{
# delete all following higher level alignments
$delete_above_level = $lev;
# but keep deleting after => to next lower level
# to avoid some bizarre alignments
if ( $raw_tok eq '=>' ) {
$delete_above_level = $lev - 1;
}
}
}
} # End loop over alignment tokens
# Process all deletion requests for this line
if (@idel) {
delete_selected_tokens( $line, \@idel );
}
} # End loopover lines
} # End loop over subgroups
#################################################
# PASS 2 over subgroups to remove null alignments
#################################################
# This pass is only used for testing. It is helping to identify
# alignment situations which might be improved with a future more
# general algorithm which adds a tail matching capability.
if (TEST_DELETE_NULL) {
delete_null_alignments( $rnew_lines, $rline_hashes, \@subgroups )
if ($saw_large_group);
}
# PASS 3: Construct a tree of matched lines and delete some small deeper
# levels of tokens. They also block good alignments.
prune_alignment_tree($rnew_lines) if ($max_lev_diff);
# PASS 4: compare all lines for common tokens
match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
return ( $max_lev_diff, $saw_side_comment );
}
}
sub delete_null_alignments {
my ( $rnew_lines, $rline_hashes, $rsubgroups ) = @_;
# This is an optional second pass for deleting alignment tokens which can
# occasionally improve alignment. We look for and remove 'null
# alignments', which are alignments that require no padding. So we can
# 'cheat' and delete them. For example, notice the '=~' alignment in the
# first two lines of the following code:
# $sysname .= 'del' if $self->label =~ /deletion/;
# $sysname .= 'ins' if $self->label =~ /insertion/;
# $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
# These '=~' tokens are already aligned because they are both the same
# distance from the previous alignment token, the 'if'. So we can
# eliminate them as alignments. The advantage is that in some cases, such
# as this one, this will allow other tokens to be aligned. In this case we
# then get the 'if' tokens to align:
# $sysname .= 'del' if $self->label =~ /deletion/;
# $sysname .= 'ins' if $self->label =~ /insertion/;
# $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
# The following rules for limiting this operation have been found to
# work well and avoid problems:
# Rule 1. We only consider a sequence of lines which have the same
# sequence of alignment tokens.
# Rule 2. We never eliminate the first alignment token. One reason is that
# lines may have different leading indentation spaces, so keeping the
# first alignment token insures that our length measurements start at
# a well-defined point. Another reason is that nothing is gained because
# the left-to-right sweep can always handle alignment of this token.
# Rule 3. We require that the first alignment token exist in either
# a previous line or a subsequent line. The reason is that this avoids
# changing two-line matches which go through special logic.
# Rule 4. Do not delete a token which occurs in a previous or subsequent
# line. For example, in the above example, it was ok to eliminate the '=~'
# token from two lines because it did not occur in a surrounding line.
# If it did occur in a surrounding line, the result could be confusing
# or even incorrectly aligned.
# A consequence of these rules is that we only need to consider subgroups
# with at least 3 lines and 2 alignment tokens.
# The subgroup line index range
my ( $jbeg, $jend );
# Vars to keep track of the start of a current sequence of matching
# lines.
my $rtokens_match;
my $rfield_lengths_match;
my $j_match_beg;
my $j_match_end;
my $imax_match;
my $rneed_pad;
# Vars for a line being tested
my $rtokens;
my $rfield_lengths;
my $imax;
my $start_match = sub {
my ($jj) = @_;
$rtokens_match = $rtokens;
$rfield_lengths_match = $rfield_lengths;
$j_match_beg = $jj;
$j_match_end = $jj;
$imax_match = $imax;
$rneed_pad = [];
return;
};
my $add_to_match = sub {
my ($jj) = @_;
$j_match_end = $jj;
# Keep track of any padding that would be needed for each token
for ( my $i = 0 ; $i <= $imax ; $i++ ) {
next if ( $rneed_pad->[$i] );
my $length = $rfield_lengths->[$i];
my $length_match = $rfield_lengths_match->[$i];
if ( $length ne $length_match ) { $rneed_pad->[$i] = 1 }
}
};
my $end_match = sub {
return unless ( $j_match_end > $j_match_beg );
my $nlines = $j_match_end - $j_match_beg + 1;
my $rhash_beg = $rline_hashes->[$j_match_beg];
my $rhash_end = $rline_hashes->[$j_match_end];
my @idel;
# Do not delete unless the first token also occurs in a surrounding line
my $tok0 = $rtokens_match->[0];
return
unless (
(
$j_match_beg > $jbeg
&& $rnew_lines->[ $j_match_beg - 1 ]->get_rtokens()->[0] eq
$tok0
)
|| ( $j_match_end < $jend
&& $rnew_lines->[ $j_match_end + 1 ]->get_rtokens()->[0] eq
$tok0 )
);
# Note that we are skipping the token at i=0
for ( my $i = 1 ; $i <= $imax_match ; $i++ ) {
# do not delete a token which requires padding to align
next if ( $rneed_pad->[$i] );
my $tok = $rtokens_match->[$i];
# Do not delete a token which occurs in a surrounding line
next
if ( $j_match_beg > $jbeg
&& defined( $rline_hashes->[ $j_match_beg - 1 ]->{$tok} ) );
next
if ( $j_match_end < $jend
&& defined( $rline_hashes->[ $j_match_end + 1 ]->{$tok} ) );
# ok to delete
push @idel, $i;
##print "ok to delete tok=$tok\n";
}
if (@idel) {
foreach my $j ( $j_match_beg .. $j_match_end ) {
delete_selected_tokens( $rnew_lines->[$j], \@idel );
}
}
};
foreach my $item ( @{$rsubgroups} ) {
( $jbeg, $jend ) = @{$item};
my $nlines = $jend - $jbeg + 1;
next unless ( $nlines > 2 );
for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
my $line = $rnew_lines->[$jj];
$rtokens = $line->get_rtokens();
$rfield_lengths = $line->get_rfield_lengths();
$imax = @{$rtokens} - 2;
# start a new match group
if ( $jj == $jbeg ) {
$start_match->($jj);
next;
}
# see if all tokens of this line match the current group
my $match;
if ( $imax == $imax_match ) {
for ( my $i = 0 ; $i <= $imax ; $i++ ) {
my $tok = $rtokens->[$i];
my $tok_match = $rtokens_match->[$i];
last if ( $tok ne $tok_match );
}
$match = 1;
}
# yes, they all match
if ($match) {
$add_to_match->($jj);
}
# now, this line does not match
else {
$end_match->();
$start_match->($jj);
}
} # End loopover lines
$end_match->();
} # End loop over subgroups
return;
} ## end sub delete_null_alignments
sub match_line_pairs {
my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
# Compare each pair of lines and save information about common matches
# $rlines = list of lines including hanging side comments
# $rnew_lines = list of lines without any hanging side comments
# $rsubgroups = list of subgroups of the new lines
# TODO:
# Maybe change: imax_pair => pair_match_info = ref to array
# = [$imax_align, $rMsg, ... ]
# This may eventually have multi-level match info
# Previous line vars
my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
$list_type_m, $ci_level_m );
# Current line vars
my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
$ci_level );
use constant EXPLAIN_COMPARE_PATTERNS => 0;
my $compare_patterns = sub {
# helper routine to decide if patterns match well enough..
# return code:
# 0 = patterns match, continue
# 1 = no match
# 2 = no match, and lines do not match at all
my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
my $GoToMsg = "";
my $return_code = 1;
my ( $alignment_token, $lev, $tag, $tok_count ) =
decode_alignment_token($tok);
# We have to be very careful about aligning commas
# when the pattern's don't match, because it can be
# worse to create an alignment where none is needed
# than to omit one. Here's an example where the ','s
# are not in named containers. The first line below
# should not match the next two:
# ( $a, $b ) = ( $b, $r );
# ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
# ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
if ( $alignment_token eq ',' ) {
# do not align commas unless they are in named
# containers
$GoToMsg = "do not align commas in unnamed containers";
goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ );
}
# do not align parens unless patterns match;
# large ugly spaces can occur in math expressions.
elsif ( $alignment_token eq '(' ) {
# But we can allow a match if the parens don't
# require any padding.
$GoToMsg = "do not align '(' unless patterns match or pad=0";
if ( $pad != 0 ) { goto NO_MATCH }
}
# Handle an '=' alignment with different patterns to
# the left.
elsif ( $alignment_token eq '=' ) {
# It is best to be a little restrictive when
# aligning '=' tokens. Here is an example of
# two lines that we will not align:
# my $variable=6;
# $bb=4;
# The problem is that one is a 'my' declaration,
# and the other isn't, so they're not very similar.
# We will filter these out by comparing the first
# letter of the pattern. This is crude, but works
# well enough.
if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
$GoToMsg = "first character before equals differ";
goto NO_MATCH;
}
# The introduction of sub 'prune_alignment_tree'
# enabled alignment of lists left of the equals with
# other scalar variables. For example:
# my ( $D, $s, $e ) = @_;
# my $d = length $D;
# my $c = $e - $s - $d;
# But this would change formatting of a lot of scripts,
# so for now we prevent alignment of comma lists on the
# left with scalars on the left. We will also prevent
# any partial alignments.
# set return code 2 if the = is at line level, but
# set return code 1 if the = is below line level, i.e.
# sub new { my ( $p, $v ) = @_; bless \$v, $p }
# sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
elsif (
( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
{
$GoToMsg = "mixed commas/no-commas before equals";
if ( $lev eq $group_level ) {
$return_code = 2;
}
goto NO_MATCH;
}
}
MATCH:
return ( 0, \$GoToMsg );
NO_MATCH:
EXPLAIN_COMPARE_PATTERNS
&& print STDERR "no match because $GoToMsg\n";
return ( $return_code, \$GoToMsg );
}; ## end of $compare_patterns->()
# loop over subgroups
foreach my $item ( @{$rsubgroups} ) {
my ( $jbeg, $jend ) = @{$item};
my $nlines = $jend - $jbeg + 1;
next unless ( $nlines > 1 );
# loop over lines in a subgroup
for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
$line_m = $line;
$rtokens_m = $rtokens;
$rpatterns_m = $rpatterns;
$rfield_lengths_m = $rfield_lengths;
$imax_m = $imax;
$list_type_m = $list_type;
$ci_level_m = $ci_level;
$line = $rnew_lines->[$jj];
$rtokens = $line->get_rtokens();
$rpatterns = $line->get_rpatterns();
$rfield_lengths = $line->get_rfield_lengths();
$imax = @{$rtokens} - 2;
$list_type = $line->get_list_type();
$ci_level = $line->get_ci_level();
# nothing to do for first line
next if ( $jj == $jbeg );
my $ci_jump = $ci_level - $ci_level_m;
my $imax_min = $imax_m < $imax ? $imax_m : $imax;
my $imax_align = -1;
# find number of leading common tokens
#################################
# No match to hanging side comment
#################################
if ( $line->get_is_hanging_side_comment() ) {
# Should not get here; HSC's have been filtered out
$imax_align = -1;
}
##############################
# Handle comma-separated lists
##############################
elsif ( $list_type && $list_type eq $list_type_m ) {
# do not align lists across a ci jump with new list method
if ($ci_jump) { $imax_min = -1 }
my $i_nomatch = $imax_min + 1;
for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
my $tok = $rtokens->[$i];
my $tok_m = $rtokens_m->[$i];
if ( $tok ne $tok_m ) {
$i_nomatch = $i;
last;
}
}
$imax_align = $i_nomatch - 1;
}
##################
# Handle non-lists
##################
else {
my $i_nomatch = $imax_min + 1;
for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
my $tok = $rtokens->[$i];
my $tok_m = $rtokens_m->[$i];
if ( $tok ne $tok_m ) {
$i_nomatch = $i;
last;
}
my $pat = $rpatterns->[$i];
my $pat_m = $rpatterns_m->[$i];
# If patterns don't match, we have to be careful...
if ( $pat_m ne $pat ) {
my $pad =
$rfield_lengths->[$i] - $rfield_lengths_m->[$i];
my ( $match_code, $rmsg ) = $compare_patterns->(
$tok, $tok_m, $pat, $pat_m, $pad
);
if ($match_code) {
if ( $match_code eq 1 ) { $i_nomatch = $i }
elsif ( $match_code eq 2 ) { $i_nomatch = 0 }
last;
}
}
}
$imax_align = $i_nomatch - 1;
}
$line_m->set_imax_pair($imax_align);
} ## end loop over lines
# Put fence at end of subgroup
$line->set_imax_pair(-1);
} ## end loop over subgroups
# if there are hanging side comments, propagate the pair info down to them
# so that lines can just look back one line for their pair info.
if ( @{$rlines} > @{$rnew_lines} ) {
my $last_pair_info = -1;
foreach my $line ( @{$rlines} ) {
if ( $line->get_is_hanging_side_comment() ) {
$line->set_imax_pair($last_pair_info);
}
else {
$last_pair_info = $line->get_imax_pair();
}
}
}
return;
}
sub fat_comma_to_comma {
my ($str) = @_;
# We are changing '=>' to ',' and removing any trailing decimal count
# because currently fat commas have a count and commas do not.
# For example, we will change '=>2+{-3.2' into ',2+{-3'
if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
return $str;
}
sub get_line_token_info {
# scan lines of tokens and return summary information about the range of
# levels and patterns.
my ($rlines) = @_;
# First scan to check monotonicity. Here is an example of several
# lines which are monotonic. The = is the lowest level, and
# the commas are all one level deeper. So this is not nonmonotonic.
# $$d{"weeks"} = [ "w", "wk", "wks", "week", "weeks" ];
# $$d{"days"} = [ "d", "day", "days" ];
# $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ];
my @all_token_info;
my $all_monotonic = 1;
for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
my ($line) = $rlines->[$jj];
my $rtokens = $line->get_rtokens();
my $last_lev;
my $is_monotonic = 1;
my $i = -1;
foreach my $tok ( @{$rtokens} ) {
$i++;
my ( $raw_tok, $lev, $tag, $tok_count ) =
decode_alignment_token($tok);
push @{ $all_token_info[$jj] },
[ $raw_tok, $lev, $tag, $tok_count ];
last if ( $tok eq '#' );
if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
$last_lev = $lev;
}
if ( !$is_monotonic ) { $all_monotonic = 0 }
}
my $rline_values = [];
for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
my ($line) = $rlines->[$jj];
my $rtokens = $line->get_rtokens();
my $i = -1;
my ( $lev_min, $lev_max );
my $token_pattern_max = "";
my %saw_level;
my @token_info;
my $is_monotonic = 1;
# find the index of the last token before the side comment
my $imax = @{$rtokens} - 2;
my $imax_true = $imax;
# If the entire group is monotonic, and the line ends in a comma list,
# walk it back to the first such comma. this will have the effect of
# making all trailing ragged comma lists match in the prune tree
# routine. these trailing comma lists can better be handled by later
# alignment rules.
# Treat fat commas the same as commas here by converting them to
# commas. This will improve the chance of aligning the leading parts
# of ragged lists.
my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
if ( $all_monotonic && $tok_end =~ /^,/ ) {
my $i = $imax - 1;
while ( $i >= 0
&& fat_comma_to_comma( $rtokens->[$i] ) eq $tok_end )
{
$imax = $i;
$i--;
}
}
# make a first pass to find level range
my $last_lev;
foreach my $tok ( @{$rtokens} ) {
$i++;
last if ( $i > $imax );
last if ( $tok eq '#' );
my ( $raw_tok, $lev, $tag, $tok_count ) =
@{ $all_token_info[$jj]->[$i] };
last if ( $tok eq '#' );
$token_pattern_max .= $tok;
$saw_level{$lev}++;
if ( !defined($lev_min) ) {
$lev_min = $lev;
$lev_max = $lev;
}
else {
if ( $lev < $lev_min ) { $lev_min = $lev; }
if ( $lev > $lev_max ) { $lev_max = $lev; }
if ( $lev < $last_lev ) { $is_monotonic = 0 }
}
$last_lev = $lev;
}
# handle no levels
my $rtoken_patterns = {};
my $rtoken_indexes = {};
my @levs = sort keys %saw_level;
if ( !defined($lev_min) ) {
$lev_min = -1;
$lev_max = -1;
$levs[0] = -1;
$rtoken_patterns->{$lev_min} = "";
$rtoken_indexes->{$lev_min} = [];
}
# handle one level
elsif ( $lev_max == $lev_min ) {
$rtoken_patterns->{$lev_max} = $token_pattern_max;
$rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
}
# handle multiple levels
else {
$rtoken_patterns->{$lev_max} = $token_pattern_max;
$rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
my $debug = 0;
my $lev_top = pop @levs; # alread did max level
my $itok = -1;
foreach my $tok ( @{$rtokens} ) {
$itok++;
last if ( $itok > $imax );
my ( $raw_tok, $lev, $tag, $tok_count ) =
@{ $all_token_info[$jj]->[$itok] };
last if ( $raw_tok eq '#' );
foreach my $lev_test (@levs) {
next if ( $lev > $lev_test );
$rtoken_patterns->{$lev_test} .= $tok;
push @{ $rtoken_indexes->{$lev_test} }, $itok;
}
}
push @levs, $lev_top;
}
push @{$rline_values},
[
$lev_min, $lev_max, $rtoken_patterns, \@levs,
$rtoken_indexes, $is_monotonic, $imax_true, $imax,
];
# debug
0 && do {
local $" = ')(';
print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
foreach my $key ( sort keys %{$rtoken_patterns} ) {
print "$key => $rtoken_patterns->{$key}\n";
print "$key => @{$rtoken_indexes->{$key}}\n";
}
};
} ## end loop over lines
return ( $rline_values, $all_monotonic );
}
sub prune_alignment_tree {
my ($rlines) = @_;
my $jmax = @{$rlines} - 1;
return unless $jmax > 0;
# Vertical alignment in perltidy is done as an iterative process. The
# starting point is to mark all possible alignment tokens ('=', ',', '=>',
# etc) for vertical alignment. Then we have to delete all alignments
# which, if actually made, would detract from overall alignment. This
# is done in several phases of which this is one.
# In this routine we look at the alignments of a group of lines as a
# hierarchical tree. We will 'prune' the tree to limited depths if that
# will improve overall alignment at the lower depths.
# For each line we will be looking at its alignment patterns down to
# different fixed depths. For each depth, we include all lower depths and
# ignore all higher depths. We want to see if we can get alignment of a
# larger group of lines if we ignore alignments at some lower depth.
# Here is an # example:
# for (
# [ '$var', sub { join $_, "bar" }, 0, "bar" ],
# [ 'CONSTANT', sub { join "foo", "bar" }, 0, "bar" ],
# [ 'CONSTANT', sub { join "foo", "bar", 3 }, 1, "barfoo3" ],
# [ '$myvar', sub { my $var; join $var, "bar" }, 0, "bar" ],
# );
# In the above example, all lines have three commas at the lowest depth
# (zero), so if there were no other alignements, these lines would all
# align considering only the zero depth alignment token. But some lines
# have additional comma alignments at the next depth, so we need to decide
# if we should drop those to keep the top level alignments, or keep those
# for some additional low level alignments at the expense losing some top
# level alignments. In this case we will drop the deeper level commas to
# keep the entire collection aligned. But in some cases the decision could
# go the other way.
# The tree for this example at the zero depth has one node containing
# all four lines, since they are identical at zero level (three commas).
# At depth one, there are three 'children' nodes, namely:
# - lines 1 and 2, which have a single comma in the 'sub' at depth 1
# - line 3, which has 2 commas at depth 1
# - line4, which has a ';' and a ',' at depth 1
# There are no deeper alignments in this example.
# so the tree structure for this example is:
#
# depth 0 depth 1 depth 2
# [lines 1-4] -- [line 1-2] - (empty)
# | [line 3] - (empty)
# | [line 4] - (empty)
# We can carry this to any depth, but it is not really useful to go below
# depth 2. To cleanly stop there, we will consider depth 2 to contain all
# alignments at depth >=2.
use constant EXPLAIN_PRUNE => 0;
####################################################################
# Prune Tree Step 1. Start by scanning the lines and collecting info
####################################################################
# Note that the caller had this info but we have to redo this now because
# alignment tokens may have been deleted.
my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
# If all the lines have levels which increase monotonically from left to
# right, then the sweep-left-to-right pass can do a better job of alignment
# than pruning, and without deleting alignments.
return if ($all_monotonic);
# Contents of $rline_values
# [
# $lev_min, $lev_max, $rtoken_patterns, \@levs,
# $rtoken_indexes, $is_monotonic, $imax_true, $imax,
# ];
# We can work to any depth, but there is little advantage to working
# to a a depth greater than 2
my $MAX_DEPTH = 2;
# This arrays will hold the tree of alignment tokens at different depths
# for these lines.
my @match_tree;
# Tree nodes contain these values:
# $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
# $nc_beg_p, $nc_end_p, $rindexes];
# where
# $depth = 0,1,2 = index of depth of the match
# $jbeg beginning index j of the range of lines in this match
# $jend ending index j of the range of lines in this match
# $n_parent = index of the containing group at $depth-1, if it exists
# $level = actual level of code being matched in this group
# $pattern = alignment pattern being matched
# $nc_beg_p = first child
# $nc_end_p = last child
# $rindexes = ref to token indexes
# the patterns and levels of the current group being formed at each depth
my ( @token_patterns_current, @levels_current, @token_indexes_current );
# the patterns and levels of the next line being tested at each depth
my ( @token_patterns_next, @levels_next, @token_indexes_next );
#########################################################
# define a recursive worker subroutine for tree construction
#########################################################
# This is a recursive routine which is called if a match condition changes
# at any depth when a new line is encountered. It ends the match node
# which changed plus all deeper nodes attached to it.
my $end_node;
$end_node = sub {
my ( $depth, $jl, $n_parent ) = @_;
# $depth is the tree depth
# $jl is the index of the line
# $n_parent is index of the parent node of this node
return if ( $depth > $MAX_DEPTH );
# end any current group at this depth
if ( $jl >= 0
&& defined( $match_tree[$depth] )
&& @{ $match_tree[$depth] }
&& defined( $levels_current[$depth] ) )
{
$match_tree[$depth]->[-1]->[1] = $jl;
}
# Define the index of the node we will create below
my $ng_self = 0;
if ( defined( $match_tree[$depth] ) ) {
$ng_self = @{ $match_tree[$depth] };
}
# end any next deeper child node(s)
$end_node->( $depth + 1, $jl, $ng_self );
# update the levels being matched
$token_patterns_current[$depth] = $token_patterns_next[$depth];
$token_indexes_current[$depth] = $token_indexes_next[$depth];
$levels_current[$depth] = $levels_next[$depth];
# Do not start a new group at this level if it is not being used
if ( !defined( $levels_next[$depth] )
|| $depth > 0
&& $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
{
return;
}
# Create a node for the next group at this depth. We initially assume
# that it will continue to $jmax, and correct that later if the node
# ends earlier.
push @{ $match_tree[$depth] },
[
$jl + 1, $jmax, $n_parent, $levels_current[$depth],
$token_patterns_current[$depth],
undef, undef, $token_indexes_current[$depth],
];
return;
}; ## end sub end_node
######################################################
# Prune Tree Step 2. Loop to form the tree of matches.
######################################################
for ( my $jp = 0 ; $jp <= $jmax ; $jp++ ) {
# working with two adjacent line indexes, 'm'=minus, 'p'=plus
my $jm = $jp - 1;
# Pull out needed values for the next line
my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
$is_monotonic, $imax_true, $imax )
= @{ $rline_values->[$jp] };
# Transfer levels and patterns for this line to the working arrays.
# If the number of levels differs from our chosen MAX_DEPTH ...
# if fewer than MAX_DEPTH: leave levels at missing depths undefined
# if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
@levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
if ( @{$rlevs} > $MAX_DEPTH ) {
$levels_next[$MAX_DEPTH] = $rlevs->[-1];
}
my $depth = 0;
foreach (@levels_next) {
$token_patterns_next[$depth] =
defined($_) ? $rtoken_patterns->{$_} : undef;
$token_indexes_next[$depth] =
defined($_) ? $rtoken_indexes->{$_} : undef;
$depth++;
}
# Look for a change in match groups...
# Initialize on the first line
if ( $jp == 0 ) {
my $n_parent;
$end_node->( 0, $jm, $n_parent );
}
# End groups if a hard flag has been set
elsif ( $rlines->[$jm]->get_end_group() ) {
my $n_parent;
$end_node->( 0, $jm, $n_parent );
}
# Continue at hanging side comment
elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) {
next;
}
# Otherwise see if anything changed and update the tree if so
else {
foreach my $depth ( 0 .. $MAX_DEPTH ) {
my $def_current = defined( $token_patterns_current[$depth] );
my $def_next = defined( $token_patterns_next[$depth] );
last unless ( $def_current || $def_next );
if ( !$def_current
|| !$def_next
|| $token_patterns_current[$depth] ne
$token_patterns_next[$depth] )
{
my $n_parent;
if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
$n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
}
$end_node->( $depth, $jm, $n_parent );
last;
}
}
}
} ## end loop to form tree of matches
##########################################################
# Prune Tree Step 3. Make links from parent to child nodes
##########################################################
# It seemed cleaner to do this as a separate step rather than during tree
# construction. The children nodes have links up to the parent node which
# created them. Now make links in the opposite direction, so the parents
# can find the children. We store the range of children nodes ($nc_beg,
# $nc_end) of each parent with two additional indexes in the orignal array.
# These will be undef if no children.
for ( my $depth = $MAX_DEPTH ; $depth > 0 ; $depth-- ) {
next unless defined( $match_tree[$depth] );
my $nc_max = @{ $match_tree[$depth] } - 1;
my $np_now;
foreach my $nc ( 0 .. $nc_max ) {
my $np = $match_tree[$depth]->[$nc]->[2];
if ( !defined($np) ) {
# shouldn't happen
#print STDERR "lost child $np at depth $depth\n";
next;
}
if ( !defined($np_now) || $np != $np_now ) {
$np_now = $np;
$match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
}
$match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
}
} ## end loop to make links down to the child nodes
EXPLAIN_PRUNE > 0 && do {
print "Tree complete. Found these groups:\n";
foreach my $depth ( 0 .. $MAX_DEPTH ) {
Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
}
};
#######################################################
# Prune Tree Step 4. Make a list of nodes to be deleted
#######################################################
# list of lines with tokens to be deleted:
# [$jbeg, $jend, $level_keep]
# $jbeg..$jend is the range of line indexes,
# $level_keep is the minimum level to keep
my @delete_list;
# Groups with ending comma lists and their range of sizes:
# $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
my %ragged_comma_group;
# Define a threshold line count for forcing a break
my $nlines_break = 3;
# We work with a list of nodes to visit at the next deeper depth.
my @todo_list;
if ( defined( $match_tree[0] ) ) {
@todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
}
for ( my $depth = 0 ; $depth <= $MAX_DEPTH ; $depth++ ) {
last unless (@todo_list);
my @todo_next;
foreach my $np (@todo_list) {
my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
$rindexes_p )
= @{ $match_tree[$depth]->[$np] };
my $nlines_p = $jend_p - $jbeg_p + 1;
# nothing to do if no children
next unless defined($nc_beg_p);
# Define the number of lines to either keep or delete a child node.
# This is the key decision we have to make. We want to delete
# short runs of matched lines, and keep long runs. It seems easier
# for the eye to follow breaks in monotonic level changes than
# non-monotonic level changes. For example, the following looks
# best if we delete the lower level alignments:
# [1] ~~ [];
# [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
# [ qr/o/, qr/a/ ] ~~ [ ["foo"], ["bar"] ];
# [ "foo", "bar" ] ~~ [ qr/o/, qr/a/ ];
# [ qr/o/, qr/a/ ] ~~ [ "foo", "bar" ];
# $deep1 ~~ $deep1;
# So we will use two thresholds.
my $nmin_mono = $depth + 2;
my $nmin_non_mono = $depth + 6;
if ( $nmin_mono > $nlines_p - 1 ) {
$nmin_mono = $nlines_p - 1;
}
if ( $nmin_non_mono > $nlines_p - 1 ) {
$nmin_non_mono = $nlines_p - 1;
}
# loop to keep or delete each child node
foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
$nc_end_c )
= @{ $match_tree[ $depth + 1 ]->[$nc] };
my $nlines_c = $jend_c - $jbeg_c + 1;
my $is_monotonic = $rline_values->[$jbeg_c]->[5];
my $nmin = $is_monotonic ? $nmin_mono : $nmin_non_mono;
if ( $nlines_c < $nmin ) {
##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
}
else {
##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
push @todo_next, $nc;
}
}
}
@todo_list = @todo_next;
} ## end loop to mark nodes to delete
#############################################################
# Prune Tree Step 5. Loop to delete selected alignment tokens
#############################################################
foreach my $item (@delete_list) {
my ( $jbeg, $jend, $level_keep ) = @{$item};
foreach my $jj ( $jbeg .. $jend ) {
my $line = $rlines->[$jj];
my @idel;
my $rtokens = $line->get_rtokens();
my $imax = @{$rtokens} - 2;
for ( my $i = 0 ; $i <= $imax ; $i++ ) {
my $tok = $rtokens->[$i];
my ( $raw_tok, $lev, $tag, $tok_count ) =
decode_alignment_token($tok);
if ( $lev > $level_keep ) {
push @idel, $i;
}
}
if (@idel) {
delete_selected_tokens( $line, \@idel );
}
}
} ## end loop to delete selected alignment tokens
return;
} ## end sub prune_alignment_tree
sub Dump_tree_groups {
my ( $rgroup, $msg ) = @_;
print "$msg\n";
local $" = ')(';
foreach my $item ( @{$rgroup} ) {
my @fix = @{$item};
foreach (@fix) { $_ = "undef" unless defined $_; }
$fix[4] = "...";
print "(@fix)\n";
}
return;
}
{ ## closure for sub is_marginal_match
my %is_if_or;
my %is_assignment;
my %is_good_alignment;
# This test did not give sufficiently better results to use as an update,
# but the flag is worth keeping as a starting point for future testing.
use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
BEGIN {
my @q = qw(
if unless or ||
);
@is_if_or{@q} = (1) x scalar(@q);
@q = qw(
= **= += *= &= <<= &&=
-= /= |= >>= ||= //=
.= %= ^=
x=
);
@is_assignment{@q} = (1) x scalar(@q);
# Vertically aligning on certain "good" tokens is usually okay
# so we can be less restrictive in marginal cases.
@q = qw( { ? => = );
push @q, (',');
@is_good_alignment{@q} = (1) x scalar(@q);
}
sub is_marginal_match {
my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
# Decide if we should undo some or all of the common alignments of a
# group of just two lines.
# Given:
# $line_0 and $line_1 - the two lines
# $group_level = the indentation level of the group being processed
# $imax_align = the maximum index of the common alignment tokens
# of the two lines
# $imax_prev = the maximum index of the common alignment tokens
# with the line before $line_0 (=-1 of does not exist)
# Return:
# $is_marginal = true if the two lines should NOT be fully aligned
# = false if the two lines can remain fully aligned
# $imax_align = the index of the highest alignment token shared by
# these two lines to keep if the match is marginal.
# When we have an alignment group of just two lines like this, we are
# working in the twilight zone of what looks good and what looks bad.
# This routine is a collection of rules which work have been found to
# work fairly well, but it will need to be updated from time to time.
my $is_marginal = 0;
# always keep alignments of a terminal else or ternary
goto RETURN if ( defined( $line_1->get_j_terminal_match() ) );
# always align lists
my $group_list_type = $line_0->get_list_type();
goto RETURN if ($group_list_type);
# always align hanging side comments
my $is_hanging_side_comment = $line_1->get_is_hanging_side_comment();
goto RETURN if ($is_hanging_side_comment);
my $jmax_0 = $line_0->get_jmax();
my $jmax_1 = $line_1->get_jmax();
my $rtokens_1 = $line_1->get_rtokens();
my $rtokens_0 = $line_0->get_rtokens();
my $rfield_lengths_0 = $line_0->get_rfield_lengths();
my $rfield_lengths_1 = $line_1->get_rfield_lengths();
my $rpatterns_0 = $line_0->get_rpatterns();
my $rpatterns_1 = $line_1->get_rpatterns();
my $imax_next = $line_1->get_imax_pair();
# We will scan the alignment tokens and set a flag '$is_marginal' if
# it seems that the an alignment would look bad.
my $max_pad = 0;
my $saw_good_alignment = 0;
my $saw_if_or; # if we saw an 'if' or 'or' at group level
my $raw_tokb = ""; # first token seen at group level
my $jfirst_bad;
my $line_ending_fat_comma; # is last token just a '=>' ?
my $j0_eq_pad;
my $j0_max_pad = 0;
for ( my $j = 0 ; $j < $jmax_1 - 1 ; $j++ ) {
my ( $raw_tok, $lev, $tag, $tok_count ) =
decode_alignment_token( $rtokens_1->[$j] );
if ( $raw_tok && $lev == $group_level ) {
if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
$saw_if_or ||= $is_if_or{$raw_tok};
}
# When the first of the two lines ends in a bare '=>' this will
# probably be marginal match. (For a bare =>, the next field length
# will be 2 or 3, depending on side comment)
$line_ending_fat_comma =
$j == $jmax_1 - 2
&& $raw_tok eq '=>'
&& $rfield_lengths_0->[ $j + 1 ] <= 3;
my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
if ( $j == 0 ) {
$pad += $line_1->get_leading_space_count() -
$line_0->get_leading_space_count();
# Remember the pad at a leading equals
if ( $raw_tok eq '=' && $lev == $group_level ) {
$j0_eq_pad = $pad;
$j0_max_pad =
0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
$j0_max_pad = 4 if ( $j0_max_pad < 4 );
}
}
if ( $pad < 0 ) { $pad = -$pad }
if ( $pad > $max_pad ) { $max_pad = $pad }
if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
$saw_good_alignment = 1;
}
else {
$jfirst_bad = $j unless defined($jfirst_bad);
}
if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
# Flag this as a marginal match since patterns differ.
# Normally, we will not allow just two lines to match if
# marginal. But we can allow matching in some specific cases.
$jfirst_bad = $j if ( !defined($jfirst_bad) );
$is_marginal = 1 if ( $is_marginal == 0 );
if ( $raw_tok eq '=' ) {
# Here is an example of a marginal match:
# $done{$$op} = 1;
# $op = compile_bblock($op);
# The left tokens are both identifiers, but
# one accesses a hash and the other doesn't.
# We'll let this be a tentative match and undo
# it later if we don't find more than 2 lines
# in the group.
$is_marginal = 2;
}
}
}
$is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
# Turn off the "marginal match" flag in some cases...
# A "marginal match" occurs when the alignment tokens agree
# but there are differences in the other tokens (patterns).
# If we leave the marginal match flag set, then the rule is that we
# will align only if there are more than two lines in the group.
# We will turn of the flag if we almost have a match
# and either we have seen a good alignment token or we
# just need a small pad (2 spaces) to fit. These rules are
# the result of experimentation. Tokens which misaligned by just
# one or two characters are annoying. On the other hand,
# large gaps to less important alignment tokens are also annoying.
if ( $is_marginal == 1
&& ( $saw_good_alignment || $max_pad < 3 ) )
{
$is_marginal = 0;
}
# We will use the line endings to help decide on alignments...
# See if the lines end with semicolons...
my $sc_term0;
my $sc_term1;
if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
# shouldn't happen
}
else {
my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
$sc_term0 = $pat0 =~ /;b?$/;
$sc_term1 = $pat1 =~ /;b?$/;
}
if ( !$is_marginal && !$sc_term0 ) {
# First line of assignment should be semicolon terminated.
# For example, do not align here:
# $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
# $$href{-NUM_DIRS} = 0;
if ( $is_assignment{$raw_tokb} ) {
$is_marginal = 1;
}
}
# Try to avoid some undesirable alignments of opening tokens
# for example, the space between grep and { here:
# return map { ( $_ => $_ ) }
# grep { /$handles/ } $self->_get_delegate_method_list;
$is_marginal ||=
( $raw_tokb eq '(' || $raw_tokb eq '{' )
&& $jmax_1 == 2
&& $sc_term0 ne $sc_term1;
########################################
# return unless this is a marginal match
########################################
goto RETURN if ( !$is_marginal );
# Undo the marginal match flag in certain cases,
# Two lines with a leading equals-like operator are allowed to
# align if the patterns to the left of the equals are the same.
# For example the following two lines are a marginal match but have
# the same left side patterns, so we will align the equals.
# my $orig = my $format = "^<<<<< ~~\n";
# my $abc = "abc";
# But these have a different left pattern so they will not be
# aligned
# $xmldoc .= $`;
# $self->{'leftovers'} .= "<bx-seq:seq" . $';
# First line semicolon terminated but second not, usually ok:
# my $want = "'ab', 'a', 'b'";
# my $got = join( ", ",
# map { defined($_) ? "'$_'" : "undef" }
# @got );
# First line not semicolon terminated, Not OK to match:
# $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
# $$href{-NUM_DIRS} = 0;
my $pat0 = $rpatterns_0->[0];
my $pat1 = $rpatterns_1->[0];
##########################################################
# Turn off the marginal flag for some types of assignments
##########################################################
if ( $is_assignment{$raw_tokb} ) {
# undo marginal flag if first line is semicolon terminated
# and leading patters match
if ($sc_term0) { # && $sc_term1) {
$is_marginal = $pat0 ne $pat1;
}
}
elsif ( $raw_tokb eq '=>' ) {
# undo marginal flag if patterns match
$is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
}
elsif ( $raw_tokb eq '=~' ) {
# undo marginal flag if both lines are semicolon terminated
# and leading patters match
if ( $sc_term1 && $sc_term0 ) {
$is_marginal = $pat0 ne $pat1;
}
}
######################################################
# Turn off the marginal flag if we saw an 'if' or 'or'
######################################################
# A trailing 'if' and 'or' often gives a good alignment
# For example, we can align these:
# return -1 if $_[0] =~ m/^CHAPT|APPENDIX/;
# return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
# or
# $d_in_m[2] = 29 if ( &Date_LeapYear($y) );
# $d = $d_in_m[$m] if ( $d > $d_in_m[$m] );
if ($saw_if_or) {
# undo marginal flag if both lines are semicolon terminated
if ( $sc_term0 && $sc_term1 ) {
$is_marginal = 0;
}
}
# For a marginal match, only keep matches before the first 'bad' match
if ( $is_marginal
&& defined($jfirst_bad)
&& $imax_align > $jfirst_bad - 1 )
{
$imax_align = $jfirst_bad - 1;
}
###########################################################
# Allow sweep to match lines with leading '=' in some cases
###########################################################
if ( $imax_align < 0 && defined($j0_eq_pad) ) {
if (
# If there is a following line with leading equals, or
# preceding line with leading equals, then let the sweep align
# them without restriction. For example, the first two lines
# here are a marginal match, but they are followed by a line
# with leading equals, so the sweep-lr logic can align all of
# the lines:
# $date[1] = $month_to_num{ $date[1] }; # <--line_0
# @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
# $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
# $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
# Likewise, if we reverse the two pairs we want the same result
# $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
# $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
# $date[1] = $month_to_num{ $date[1] }; # <--line_0
# @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
(
$imax_next >= 0
|| $imax_prev >= 0
|| TEST_MARGINAL_EQ_ALIGNMENT
)
&& $j0_eq_pad >= -$j0_max_pad
&& $j0_eq_pad <= $j0_max_pad
)
{
# But do not do this if there is a comma before the '='.
# For example, the first two lines below have commas and
# therefore are not allowed to align with lines 3 & 4:
# my ( $x, $y ) = $self->Size(); #<--line_0
# my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
# my $vx = $right - $left;
# my $vy = $bottom - $top;
if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
$imax_align = 0;
}
}
}
RETURN:
return ( $is_marginal, $imax_align );
}
}
sub get_extra_leading_spaces {
my ( $rlines, $rgroups ) = @_;
#----------------------------------------------------------
# Define any extra indentation space (for the -lp option).
# Here is why:
# If a list has side comments, sub scan_list must dump the
# list before it sees everything. When this happens, it sets
# the indentation to the standard scheme, but notes how
# many spaces it would have liked to use. We may be able
# to recover that space here in the event that all of the
# lines of a list are back together again.
#----------------------------------------------------------
return 0 unless ( @{$rlines} && @{$rgroups} );
my $object = $rlines->[0]->get_indentation();
return 0 unless ( ref($object) );
my $extra_leading_spaces = 0;
my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
my $min_spaces = $extra_indentation_spaces_wanted;
if ( $min_spaces > 0 ) { $min_spaces = 0 }
# loop over all groups
my $ng = -1;
my $ngroups = @{$rgroups};
foreach my $item ( @{$rgroups} ) {
$ng++;
my ( $jbeg, $jend ) = @{$item};
foreach my $j ( $jbeg .. $jend ) {
next if ( $j == 0 );
# all indentation objects must be the same
if ( $object != $rlines->[$j]->get_indentation() ) {
return 0;
}
}
# find the maximum space without exceeding the line length for this group
my $avail = $rlines->[$jbeg]->get_available_space_on_right();
my $spaces =
( $avail > $extra_indentation_spaces_wanted )
? $extra_indentation_spaces_wanted
: $avail;
#########################################################
# Note: min spaces can be negative; for example with -gnu
# f(
# do { 1; !!(my $x = bless []); }
# );
#########################################################
# The following rule is needed to match older formatting:
# For multiple groups, we will keep spaces non-negative.
# For a single group, we will allow a negative space.
if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
# update the minimum spacing
if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
$extra_leading_spaces = $spaces;
}
}
# update the indentation object because with -icp the terminal
# ');' will use the same adjustment.
$object->permanently_decrease_available_spaces( -$extra_leading_spaces );
return $extra_leading_spaces;
}
sub forget_side_comment {
my ($self) = @_;
$self->[_last_side_comment_column_] = 0;
return;
}
sub is_good_side_comment_column {
my ( $self, $line, $line_number, $level, $num5 ) = @_;
# Upon encountering the first side comment of a group, decide if
# a previous side comment should be forgotten. This involves
# checking several rules.
# Return true to keep old comment location
# Return false to forget old comment location
my $rfields = $line->get_rfields();
my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
# RULE1: Never forget comment before a hanging side comment
goto KEEP if ($is_hanging_side_comment);
# RULE2: Forget a side comment after a short line difference,
# where 'short line difference' is computed from a formula.
# Using a smooth formula helps minimize sudden large changes.
my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
# '$num5' is the number of comments in the first 5 lines after the first
# comment. It is needed to keep a compact group of side comments from
# being influenced by a more distant side comment.
$num5 = 1 unless ($num5);
# Some values:
# $adiff $num5 $short_diff
# 0 * 12
# 1 1 6
# 1 2 4
# 1 3 3
# 1 4 2
# 2 1 4
# 2 2 2
# 2 3 1
# 3 1 3
# 3 2 1
my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
goto FORGET
if ( $line_diff > $short_diff );
# RULE3: Forget a side comment if this line is at lower level and
# ends a block
my $last_sc_level = $self->[_last_side_comment_level_];
goto FORGET
if ( $level < $last_sc_level
&& $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
# RULE 4: Forget the last side comment if this comment might join a cached
# line ...
if ( my $cached_line_type = get_cached_line_type() ) {
# ... otherwise side comment alignment will get messed up.
# For example, in the following test script
# with using 'perltidy -sct -act=2', the last comment would try to
# align with the previous and then be in the wrong column when
# the lines are combined:
# foreach $line (
# [0, 1, 2], [3, 4, 5], [6, 7, 8], # rows
# [0, 3, 6], [1, 4, 7], [2, 5, 8], # columns
# [0, 4, 8], [2, 4, 6]
# ) # diagonals
goto FORGET
if ( $cached_line_type == 2 || $cached_line_type == 4 );
}
# Otherwise, keep it alive
goto KEEP;
FORGET:
return 0;
KEEP:
return 1;
}
sub align_side_comments {
my ( $self, $rlines, $rgroups ) = @_;
# Align any side comments in this batch of lines
# Given:
# $rlines - the lines
# $rgroups - the partition of the lines into groups
#
# We will be working group-by-group because all side comments
# (real or fake) in each group are already aligned. So we just have
# to make alignments between groups wherever possible.
# An unusual aspect is that within each group we have aligned both real
# and fake side comments. This has the consequence that the lengths of
# long lines without real side comments can cause 'push' all side comments
# to the right. This seems unusual, but testing with and without this
# feature shows that it is usually better this way. Othewise, side
# comments can be hidden between long lines without side comments and
# thus be harder to read.
my $group_level = $self->[_group_level_];
my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
&& $group_level == $self->[_last_level_written_];
# Find groups with side comments, and remember the first nonblank comment
my $j_sc_beg;
my @todo;
my $ng = -1;
foreach my $item ( @{$rgroups} ) {
$ng++;
my ( $jbeg, $jend ) = @{$item};
foreach my $j ( $jbeg .. $jend ) {
my $line = $rlines->[$j];
my $jmax = $line->get_jmax();
if ( $line->get_rfield_lengths()->[$jmax] ) {
# this group has a line with a side comment
push @todo, $ng;
if ( !defined($j_sc_beg) ) {
$j_sc_beg = $j;
}
last;
}
}
}
# done if no groups with side comments
return unless @todo;
# Count $num5 = number of comments in the 5 lines after the first comment
# This is an important factor in a decision formula
my $num5 = 1;
for ( my $jj = $j_sc_beg + 1 ; $jj < @{$rlines} ; $jj++ ) {
my $ldiff = $jj - $j_sc_beg;
last if ( $ldiff > 5 );
my $line = $rlines->[$jj];
my $jmax = $line->get_jmax();
my $sc_len = $line->get_rfield_lengths()->[$jmax];
next unless ($sc_len);
$num5++;
}
# Forget the old side comment location if necessary
my $line = $rlines->[$j_sc_beg];
my $lnum =
$j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
my $keep_it =
$self->is_good_side_comment_column( $line, $lnum, $group_level, $num5 );
my $last_side_comment_column =
$keep_it ? $self->[_last_side_comment_column_] : 0;
# If there are multiple groups we will do two passes
# so that we can find a common alignment for all groups.
my $MAX_PASS = @todo > 1 ? 2 : 1;
# Loop over passes
my $max_comment_column = $last_side_comment_column;
for ( my $PASS = 1 ; $PASS <= $MAX_PASS ; $PASS++ ) {
# If there are two passes, then on the last pass make the old column
# equal to the largest of the group. This will result in the comments
# being aligned if possible.
if ( $PASS == $MAX_PASS ) {
$last_side_comment_column = $max_comment_column;
}
# Loop over the groups with side comments
my $column_limit;
foreach my $ng (@todo) {
my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
# Note that since all lines in a group have common alignments, we
# just have to work on one of the lines (the first line).
my $line = $rlines->[$jbeg];
my $jmax = $line->get_jmax();
my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
last
if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
# the maximum space without exceeding the line length:
my $avail = $line->get_available_space_on_right();
# try to use the previous comment column
my $side_comment_column = $line->get_column( $jmax - 1 );
my $move = $last_side_comment_column - $side_comment_column;
# Remember the maximum possible column of the first line with
# side comment
if ( !defined($column_limit) ) {
$column_limit = $side_comment_column + $avail;
}
next if ( $jmax <= 0 );
# but if this doesn't work, give up and use the minimum space
my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
if ( $move > $avail ) {
$move = $min_move;
}
# but we want some minimum space to the comment
if ( $move >= 0
&& $j_sc_beg == 0
&& $continuing_sc_flow )
{
$min_move = 0;
}
# remove constraints on hanging side comments
if ($is_hanging_side_comment) { $min_move = 0 }
if ( $move < $min_move ) {
$move = $min_move;
}
# don't exceed the available space
if ( $move > $avail ) { $move = $avail }
# We can only increase space, never decrease.
if ( $move < 0 ) { $move = 0 }
# Discover the largest column on the preliminary pass
if ( $PASS < $MAX_PASS ) {
my $col = $line->get_column( $jmax - 1 ) + $move;
# but ignore columns too large for the starting line
if ( $col > $max_comment_column && $col < $column_limit ) {
$max_comment_column = $col;
}
}
# Make the changes on the final pass
else {
$line->increase_field_width( $jmax - 1, $move );
# remember this column for the next group
$last_side_comment_column = $line->get_column( $jmax - 1 );
}
} ## end loop over groups
} ## end loop over passes
# Find the last side comment
my $j_sc_last;
my $ng_last = $todo[-1];
my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
for ( my $jj = $jend ; $jj >= $jbeg ; $jj-- ) {
my $line = $rlines->[$jj];
my $jmax = $line->get_jmax();
if ( $line->get_rfield_lengths()->[$jmax] ) {
$j_sc_last = $jj;
last;
}
}
# Save final side comment info for possible use by the next batch
if ( defined($j_sc_last) ) {
my $line_number =
$self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
$self->[_last_side_comment_column_] = $last_side_comment_column;
$self->[_last_side_comment_line_number_] = $line_number;
$self->[_last_side_comment_level_] = $group_level;
}
return;
}
###############################
# CODE SECTION 6: Output Step A
###############################
sub valign_output_step_A {
###############################################################
# This is Step A in writing vertically aligned lines.
# The line is prepared according to the alignments which have
# been found. Then it is shipped to the next step.
###############################################################
my ( $self, $rinput_hash ) = @_;
my $line = $rinput_hash->{line};
my $min_ci_gap = $rinput_hash->{min_ci_gap};
my $do_not_align = $rinput_hash->{do_not_align};
my $group_leader_length = $rinput_hash->{group_leader_length};
my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
my $level = $rinput_hash->{level};
my $rfields = $line->get_rfields();
my $rfield_lengths = $line->get_rfield_lengths();
my $leading_space_count = $line->get_leading_space_count();
my $outdent_long_lines = $line->get_outdent_long_lines();
my $maximum_field_index = $line->get_jmax();
my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
my $Kend = $line->get_Kend();
# add any extra spaces
if ( $leading_space_count > $group_leader_length ) {
$leading_space_count += $min_ci_gap;
}
my $str = $rfields->[0];
my $str_len = $rfield_lengths->[0];
# loop to concatenate all fields of this line and needed padding
my $total_pad_count = 0;
for my $j ( 1 .. $maximum_field_index ) {
# skip zero-length side comments
last
if (
( $j == $maximum_field_index )
&& ( !defined( $rfields->[$j] )
|| ( $rfield_lengths->[$j] == 0 ) )
);
# compute spaces of padding before this field
my $col = $line->get_column( $j - 1 );
my $pad = $col - ( $str_len + $leading_space_count );
if ($do_not_align) {
$pad =
( $j < $maximum_field_index )
? 0
: $self->[_rOpts_minimum_space_to_comment_] - 1;
}
# if the -fpsc flag is set, move the side comment to the selected
# column if and only if it is possible, ignoring constraints on
# line length and minimum space to comment
if ( $self->[_rOpts_fixed_position_side_comment_]
&& $j == $maximum_field_index )
{
my $newpad =
$pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
if ( $newpad >= 0 ) { $pad = $newpad; }
}
# accumulate the padding
if ( $pad > 0 ) { $total_pad_count += $pad; }
# only add padding when we have a finite field;
# this avoids extra terminal spaces if we have empty fields
if ( $rfield_lengths->[$j] > 0 ) {
$str .= ' ' x $total_pad_count;
$str_len += $total_pad_count;
$total_pad_count = 0;
$str .= $rfields->[$j];
$str_len += $rfield_lengths->[$j];
}
else {
$total_pad_count = 0;
}
}
my $side_comment_length = $rfield_lengths->[$maximum_field_index];
# ship this line off
$self->valign_output_step_B(
{
leading_space_count => $leading_space_count + $extra_leading_spaces,
line => $str,
line_length => $str_len,
side_comment_length => $side_comment_length,
outdent_long_lines => $outdent_long_lines,
rvertical_tightness_flags => $rvertical_tightness_flags,
level => $level,
Kend => $Kend,
}
);
return;
}
sub combine_fields {
# We have a group of two lines for which we do not want to align tokens
# between index $imax_align and the side comment. So we will delete fields
# between $imax_align and the side comment. Alignments have already
# been set so we have to adjust them.
my ( $line_0, $line_1, $imax_align ) = @_;
if ( !defined($imax_align) ) { $imax_align = -1 }
# First delete the unwanted tokens
my $jmax_old = $line_0->get_jmax();
my @old_alignments = $line_0->get_alignments();
my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
return unless (@idel);
foreach my $line ( $line_0, $line_1 ) {
delete_selected_tokens( $line, \@idel );
}
# Now adjust the alignments. Note that the side comment alignment
# is always at jmax-1, and there is an ending alignment at jmax.
my @new_alignments;
if ( $imax_align >= 0 ) {
@new_alignments[ 0 .. $imax_align ] =
@old_alignments[ 0 .. $imax_align ];
}
my $jmax_new = $line_0->get_jmax();
$new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
$new_alignments[$jmax_new] = $old_alignments[$jmax_old];
$line_0->set_alignments(@new_alignments);
$line_1->set_alignments(@new_alignments);
return;
}
sub get_output_line_number {
# The output line number reported to a caller =
# the number of items still in the buffer +
# the number of items written.
return $_[0]->group_line_count() +
$_[0]->[_file_writer_object_]->get_output_line_number();
}
###############################
# CODE SECTION 7: Output Step B
###############################
{ ## closure for sub valign_output_step_B
# These are values for a cache used by valign_output_step_B.
my $cached_line_text;
my $cached_line_text_length;
my $cached_line_type;
my $cached_line_flag;
my $cached_seqno;
my $cached_line_valid;
my $cached_line_leading_space_count;
my $cached_seqno_string;
my $cached_line_Kend;
my $seqno_string;
my $last_nonblank_seqno_string;
sub get_seqno_string {
return $seqno_string;
}
sub get_last_nonblank_seqno_string {
return $last_nonblank_seqno_string;
}
sub set_last_nonblank_seqno_string {
my ($val) = @_;
$last_nonblank_seqno_string = $val;
return;
}
sub get_cached_line_flag {
return $cached_line_flag;
}
sub get_cached_line_type {
return $cached_line_type;
}
sub set_cached_line_valid {
my ($val) = @_;
$cached_line_valid = $val;
return;
}
sub get_cached_seqno {
return $cached_seqno;
}
sub initialize_step_B_cache {
# valign_output_step_B cache:
$cached_line_text = "";
$cached_line_text_length = 0;
$cached_line_type = 0;
$cached_line_flag = 0;
$cached_seqno = 0;
$cached_line_valid = 0;
$cached_line_leading_space_count = 0;
$cached_seqno_string = "";
$cached_line_Kend = undef;
# These vars hold a string of sequence numbers joined together used by
# the cache
$seqno_string = "";
$last_nonblank_seqno_string = "";
return;
}
sub _flush_cache {
my ($self) = @_;
if ($cached_line_type) {
$seqno_string = $cached_seqno_string;
$self->valign_output_step_C(
$cached_line_text,
$cached_line_leading_space_count,
$self->[_last_level_written_],
$cached_line_Kend,
);
$cached_line_type = 0;
$cached_line_text = "";
$cached_line_text_length = 0;
$cached_seqno_string = "";
$cached_line_Kend = undef;
}
return;
}
sub valign_output_step_B {
###############################################################
# This is Step B in writing vertically aligned lines.
# Vertical tightness is applied according to preset flags.
# In particular this routine handles stacking of opening
# and closing tokens.
###############################################################
my ( $self, $rinput ) = @_;
my $leading_space_count = $rinput->{leading_space_count};
my $str = $rinput->{line};
my $str_length = $rinput->{line_length};
my $side_comment_length = $rinput->{side_comment_length};
my $outdent_long_lines = $rinput->{outdent_long_lines};
my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
my $level = $rinput->{level};
my $Kend = $rinput->{Kend};
my $last_level_written = $self->[_last_level_written_];
# Useful -gcs test cases for wide characters are
# perl527/(method.t.2, reg_mesg.t, mime-header.t)
# handle outdenting of long lines:
if ($outdent_long_lines) {
my $excess =
$str_length -
$side_comment_length +
$leading_space_count -
$self->maximum_line_length_for_level($level);
if ( $excess > 0 ) {
$leading_space_count = 0;
my $file_writer_object = $self->[_file_writer_object_];
my $last_outdented_line_at =
$file_writer_object->get_output_line_number();
$self->[_last_outdented_line_at_] = $last_outdented_line_at;
my $outdented_line_count = $self->[_outdented_line_count_];
unless ($outdented_line_count) {
$self->[_first_outdented_line_at_] =
$last_outdented_line_at;
}
$outdented_line_count++;
$self->[_outdented_line_count_] = $outdented_line_count;
}
}
# Make preliminary leading whitespace. It could get changed
# later by entabbing, so we have to keep track of any changes
# to the leading_space_count from here on.
my $leading_string =
$leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
my $leading_string_length = length($leading_string);
# Unpack any recombination data; it was packed by
# sub send_lines_to_vertical_aligner. Contents:
#
# [0] type: 1=opening non-block 2=closing non-block
# 3=opening block brace 4=closing block brace
# [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
# if closing: spaces of padding to use
# [2] sequence number of container
# [3] valid flag: do not append if this flag is false
#
my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
$seqno_end );
if ($rvertical_tightness_flags) {
(
$open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
$seqno_end
) = @{$rvertical_tightness_flags};
}
$seqno_string = $seqno_end;
# handle any cached line ..
# either append this line to it or write it out
# Note: the function length() is used in this next test out of caution.
# All testing has shown that the variable $cached_line_text_length is
# correct, but its calculation is complex and a loss of cached text would
# be a disaster.
if ( length($cached_line_text) ) {
# Dump an invalid cached line
if ( !$cached_line_valid ) {
$self->valign_output_step_C(
$cached_line_text, $cached_line_leading_space_count,
$last_level_written, $cached_line_Kend
);
}
# Handle cached line ending in OPENING tokens
elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
my $gap = $leading_space_count - $cached_line_text_length;
# handle option of just one tight opening per line:
if ( $cached_line_flag == 1 ) {
if ( defined($open_or_close) && $open_or_close == 1 ) {
$gap = -1;
}
}
if ( $gap >= 0 && defined($seqno_beg) ) {
$leading_string = $cached_line_text . ' ' x $gap;
$leading_string_length = $cached_line_text_length + $gap;
$leading_space_count = $cached_line_leading_space_count;
$seqno_string = $cached_seqno_string . ':' . $seqno_beg;
$level = $last_level_written;
}
else {
$self->valign_output_step_C(
$cached_line_text, $cached_line_leading_space_count,
$last_level_written, $cached_line_Kend
);
}
}
# Handle cached line ending in CLOSING tokens
else {
my $test_line =
$cached_line_text . ' ' x $cached_line_flag . $str;
my $test_line_length =
$cached_line_text_length + $cached_line_flag + $str_length;
if (
# The new line must start with container
$seqno_beg
# The container combination must be okay..
&& (
# okay to combine like types
( $open_or_close == $cached_line_type )
# closing block brace may append to non-block
|| ( $cached_line_type == 2 && $open_or_close == 4 )
# something like ');'
|| ( !$open_or_close && $cached_line_type == 2 )
)
# The combined line must fit
&& (
$test_line_length <=
$self->maximum_line_length_for_level(
$last_level_written)
)
)
{
$seqno_string = $cached_seqno_string . ':' . $seqno_beg;
# Patch to outdent closing tokens ending # in ');' If we
# are joining a line like ');' to a previous stacked set of
# closing tokens, then decide if we may outdent the
# combined stack to the indentation of the ');'. Since we
# should not normally outdent any of the other tokens more
# than the indentation of the lines that contained them, we
# will only do this if all of the corresponding opening
# tokens were on the same line. This can happen with -sot
# and -sct.
# For example, it is ok here:
# __PACKAGE__->load_components( qw(
# PK::Auto
# Core
# ));
#
# But, for example, we do not outdent in this example
# because that would put the closing sub brace out farther
# than the opening sub brace:
#
# perltidy -sot -sct
# $c->Tk::bind(
# '<Control-f>' => sub {
# my ($c) = @_;
# my $e = $c->XEvent;
# itemsUnderArea $c;
# } );
#
if ( $str =~ /^\);/
&& $cached_line_text =~ /^[\)\}\]\s]*$/ )
{
# The way to tell this is if the stacked sequence
# numbers of this output line are the reverse of the
# stacked sequence numbers of the previous non-blank
# line of sequence numbers. So we can join if the
# previous nonblank string of tokens is the mirror
# image. For example if stack )}] is 13:8:6 then we
# are looking for a leading stack like [{( which
# is 6:8:13. We only need to check the two ends,
# because the intermediate tokens must fall in order.
# Note on speed: having to split on colons and
# eliminate multiple colons might appear to be slow,
# but it's not an issue because we almost never come
# through here. In a typical file we don't.
$seqno_string =~ s/^:+//;
$last_nonblank_seqno_string =~ s/^:+//;
$seqno_string =~ s/:+/:/g;
$last_nonblank_seqno_string =~ s/:+/:/g;
# how many spaces can we outdent?
my $diff =
$cached_line_leading_space_count -
$leading_space_count;
if ( $diff > 0
&& length($seqno_string)
&& length($last_nonblank_seqno_string) ==
length($seqno_string) )
{
my @seqno_last =
( split /:/, $last_nonblank_seqno_string );
my @seqno_now = ( split /:/, $seqno_string );
if ( @seqno_now
&& @seqno_last
&& $seqno_now[-1] == $seqno_last[0]
&& $seqno_now[0] == $seqno_last[-1] )
{
# OK to outdent ..
# for absolute safety, be sure we only remove
# whitespace
my $ws = substr( $test_line, 0, $diff );
if ( ( length($ws) == $diff )
&& $ws =~ /^\s+$/ )
{
$test_line = substr( $test_line, $diff );
$cached_line_leading_space_count -= $diff;
$last_level_written =
$self->level_change(
$cached_line_leading_space_count,
$diff, $last_level_written );
$self->reduce_valign_buffer_indentation(
$diff);
}
# shouldn't happen, but not critical:
##else {
## ERROR transferring indentation here
##}
}
}
}
$str = $test_line;
$str_length = $test_line_length;
$leading_string = "";
$leading_string_length = 0;
$leading_space_count = $cached_line_leading_space_count;
$level = $last_level_written;
}
else {
$self->valign_output_step_C(
$cached_line_text, $cached_line_leading_space_count,
$last_level_written, $cached_line_Kend
);
}
}
}
$cached_line_type = 0;
$cached_line_text = "";
$cached_line_text_length = 0;
$cached_line_Kend = undef;
# make the line to be written
my $line = $leading_string . $str;
my $line_length = $leading_string_length + $str_length;
# Safety check: be sure that a line to be cached as a stacked block
# brace line ends in the appropriate opening or closing block brace.
# This should always be the case if the caller set flags correctly.
# Code '3' is for -sobb, code '4' is for -scbb.
if ($open_or_close) {
if ( $open_or_close == 3 && $line !~ /\{\s*$/
|| $open_or_close == 4 && $line !~ /\}\s*$/ )
{
$open_or_close = 0;
}
}
# write or cache this line
if ( !$open_or_close || $side_comment_length > 0 ) {
$self->valign_output_step_C( $line, $leading_space_count, $level,
$Kend );
}
else {
$cached_line_text = $line;
$cached_line_text_length = $line_length;
$cached_line_type = $open_or_close;
$cached_line_flag = $tightness_flag;
$cached_seqno = $seqno;
$cached_line_valid = $valid;
$cached_line_leading_space_count = $leading_space_count;
$cached_seqno_string = $seqno_string;
$cached_line_Kend = $Kend;
}
$self->[_last_level_written_] = $level;
$self->[_last_side_comment_length_] = $side_comment_length;
$self->[_extra_indent_ok_] = 0;
return;
}
}
###############################
# CODE SECTION 8: Output Step C
###############################
{ ## closure for sub valign_output_step_C
# Vertical alignment buffer used by valign_output_step_C
my $valign_buffer_filling;
my @valign_buffer;
sub initialize_valign_buffer {
@valign_buffer = ();
$valign_buffer_filling = "";
return;
}
sub dump_valign_buffer {
my ($self) = @_;
if (@valign_buffer) {
foreach (@valign_buffer) {
$self->valign_output_step_D( @{$_} );
}
@valign_buffer = ();
}
$valign_buffer_filling = "";
return;
}
sub reduce_valign_buffer_indentation {
my ( $self, $diff ) = @_;
if ( $valign_buffer_filling && $diff ) {
my $max_valign_buffer = @valign_buffer;
foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
my ( $line, $leading_space_count, $level, $Kend ) =
@{ $valign_buffer[$i] };
my $ws = substr( $line, 0, $diff );
if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
$line = substr( $line, $diff );
}
if ( $leading_space_count >= $diff ) {
$leading_space_count -= $diff;
$level =
$self->level_change( $leading_space_count, $diff,
$level );
}
$valign_buffer[$i] =
[ $line, $leading_space_count, $level, $Kend ];
}
}
return;
}
sub valign_output_step_C {
###############################################################
# This is Step C in writing vertically aligned lines.
# Lines are either stored in a buffer or passed along to the next step.
# The reason for storing lines is that we may later want to reduce their
# indentation when -sot and -sct are both used.
###############################################################
my ( $self, @args ) = @_;
my $seqno_string = get_seqno_string();
my $last_nonblank_seqno_string = get_last_nonblank_seqno_string();
# Dump any saved lines if we see a line with an unbalanced opening or
# closing token.
$self->dump_valign_buffer()
if ( $seqno_string && $valign_buffer_filling );
# Either store or write this line
if ($valign_buffer_filling) {
push @valign_buffer, [@args];
}
else {
$self->valign_output_step_D(@args);
}
# For lines starting or ending with opening or closing tokens..
if ($seqno_string) {
$last_nonblank_seqno_string = $seqno_string;
set_last_nonblank_seqno_string($seqno_string);
# Start storing lines when we see a line with multiple stacked
# opening tokens.
# patch for RT #94354, requested by Colin Williams
if ( $seqno_string =~ /^\d+(\:+\d+)+$/
&& $args[0] !~ /^[\}\)\]\:\?]/ )
{
# This test is efficient but a little subtle: The first test
# says that we have multiple sequence numbers and hence
# multiple opening or closing tokens in this line. The second
# part of the test rejects stacked closing and ternary tokens.
# So if we get here then we should have stacked unbalanced
# opening tokens.
# Here is a complex example:
# Foo($Bar[0], { # (side comment)
# baz => 1,
# });
# The first line has sequence 6::4. It does not begin with
# a closing token or ternary, so it passes the test and must be
# stacked opening tokens.
# The last line has sequence 4:6 but is a stack of closing
# tokens, so it gets rejected.
# Note that the sequence number of an opening token for a qw
# quote is a negative number and will be rejected. For
# example, for the following line: skip_symbols([qw(
# $seqno_string='10:5:-1'. It would be okay to accept it but I
# decided not to do this after testing.
$valign_buffer_filling = $seqno_string;
}
}
return;
}
}
###############################
# CODE SECTION 9: Output Step D
###############################
sub valign_output_step_D {
###############################################################
# This is Step D in writing vertically aligned lines.
# It is the end of the vertical alignment pipeline.
# Write one vertically aligned line of code to the output object.
###############################################################
my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
# The line is currently correct if there is no tabbing (recommended!)
# We may have to lop off some leading spaces and replace with tabs.
if ( $leading_space_count > 0 ) {
my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
my $rOpts_tabs = $self->[_rOpts_tabs_];
my $rOpts_entab_leading_whitespace =
$self->[_rOpts_entab_leading_whitespace_];
# Nothing to do if no tabs
if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
|| $rOpts_indent_columns <= 0 )
{
# nothing to do
}
# Handle entab option
elsif ($rOpts_entab_leading_whitespace) {
# Patch 12-nov-2018 based on report from Glenn. Extra padding was
# not correctly entabbed, nor were side comments: Increase leading
# space count for a padded line to get correct tabbing
if ( $line =~ /^(\s+)(.*)$/ ) {
my $spaces = length($1);
if ( $spaces > $leading_space_count ) {
$leading_space_count = $spaces;
}
}
my $space_count =
$leading_space_count % $rOpts_entab_leading_whitespace;
my $tab_count =
int( $leading_space_count / $rOpts_entab_leading_whitespace );
my $leading_string = "\t" x $tab_count . ' ' x $space_count;
if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
substr( $line, 0, $leading_space_count ) = $leading_string;
}
else {
# shouldn't happen - program error counting whitespace
# - skip entabbing
DEBUG_TABS
&& $self->warning(
"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
);
}
}
# Handle option of one tab per level
else {
my $leading_string = ( "\t" x $level );
my $space_count =
$leading_space_count - $level * $rOpts_indent_columns;
# shouldn't happen:
if ( $space_count < 0 ) {
# But it could be an outdented comment
if ( $line !~ /^\s*#/ ) {
DEBUG_TABS
&& $self->warning(
"Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
);
}
$leading_string = ( ' ' x $leading_space_count );
}
else {
$leading_string .= ( ' ' x $space_count );
}
if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
substr( $line, 0, $leading_space_count ) = $leading_string;
}
else {
# shouldn't happen - program error counting whitespace
# we'll skip entabbing
DEBUG_TABS
&& $self->warning(
"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
);
}
}
}
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_code_line( $line . "\n", $Kend );
return;
}
{ ## closure for sub get_leading_string
my @leading_string_cache;
sub initialize_leading_string_cache {
@leading_string_cache = ();
return;
}
sub get_leading_string {
# define the leading whitespace string for this line..
my ( $self, $leading_whitespace_count, $group_level ) = @_;
# Handle case of zero whitespace, which includes multi-line quotes
# (which may have a finite level; this prevents tab problems)
if ( $leading_whitespace_count <= 0 ) {
return "";
}
# look for previous result
elsif ( $leading_string_cache[$leading_whitespace_count] ) {
return $leading_string_cache[$leading_whitespace_count];
}
# must compute a string for this number of spaces
my $leading_string;
# Handle simple case of no tabs
my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
my $rOpts_tabs = $self->[_rOpts_tabs_];
my $rOpts_entab_leading_whitespace =
$self->[_rOpts_entab_leading_whitespace_];
if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
|| $rOpts_indent_columns <= 0 )
{
$leading_string = ( ' ' x $leading_whitespace_count );
}
# Handle entab option
elsif ($rOpts_entab_leading_whitespace) {
my $space_count =
$leading_whitespace_count % $rOpts_entab_leading_whitespace;
my $tab_count = int(
$leading_whitespace_count / $rOpts_entab_leading_whitespace );
$leading_string = "\t" x $tab_count . ' ' x $space_count;
}
# Handle option of one tab per level
else {
$leading_string = ( "\t" x $group_level );
my $space_count =
$leading_whitespace_count - $group_level * $rOpts_indent_columns;
# shouldn't happen:
if ( $space_count < 0 ) {
DEBUG_TABS
&& $self->warning(
"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
);
# -- skip entabbing
$leading_string = ( ' ' x $leading_whitespace_count );
}
else {
$leading_string .= ( ' ' x $space_count );
}
}
$leading_string_cache[$leading_whitespace_count] = $leading_string;
return $leading_string;
}
} # end get_leading_string
##########################
# CODE SECTION 10: Summary
##########################
sub report_anything_unusual {
my $self = shift;
my $outdented_line_count = $self->[_outdented_line_count_];
if ( $outdented_line_count > 0 ) {
$self->write_logfile_entry(
"$outdented_line_count long lines were outdented:\n");
my $first_outdented_line_at = $self->[_first_outdented_line_at_];
$self->write_logfile_entry(
" First at output line $first_outdented_line_at\n");
if ( $outdented_line_count > 1 ) {
my $last_outdented_line_at = $self->[_last_outdented_line_at_];
$self->write_logfile_entry(
" Last at output line $last_outdented_line_at\n");
}
$self->write_logfile_entry(
" use -noll to prevent outdenting, -l=n to increase line length\n"
);
$self->write_logfile_entry("\n");
}
return;
}
1;
|