#####################################################################
#
# The Perl::Tidy::Formatter package adds indentation, whitespace, and
# line breaks to the token stream
#
#####################################################################
# Index...
# CODE SECTION 1: Preliminary code, global definitions and sub new
# sub new
# CODE SECTION 2: Some Basic Utilities
# CODE SECTION 3: Check and process options
# sub check_options
# CODE SECTION 4: Receive lines from the tokenizer
# sub write_line
# CODE SECTION 5: Pre-process the entire file
# sub finish_formatting
# CODE SECTION 6: Process line-by-line
# sub process_all_lines
# CODE SECTION 7: Process lines of code
# process_line_of_CODE
# CODE SECTION 8: Utilities for setting breakpoints
# sub set_forced_breakpoint
# CODE SECTION 9: Process batches of code
# sub grind_batch_of_CODE
# CODE SECTION 10: Code to break long statments
# sub set_continuation_breaks
# CODE SECTION 11: Code to break long lists
# sub scan_list
# CODE SECTION 12: Code for setting indentation
# CODE SECTION 13: Preparing batches for vertical alignment
# sub send_lines_to_vertical_aligner
# CODE SECTION 14: Code for creating closing side comments
# sub add_closing_side_comment
# CODE SECTION 15: Summarize
# sub wrapup
#######################################################################
# CODE SECTION 1: Preliminary code and global definitions up to sub new
#######################################################################
package Perl::Tidy::Formatter;
use strict;
use warnings;
# this can be turned on for extra checking during development
use constant DEVEL_MODE => 0;
{ #<<< A non-indenting brace to contain all lexical variables
use Carp;
our $VERSION = '20210111';
# The Tokenizer will be loaded with the Formatter
##use Perl::Tidy::Tokenizer; # for is_keyword()
sub AUTOLOAD {
# Catch any undefined sub calls so that we are sure to get
# some diagnostic information. This sub should never be called
# except for a programming error.
our $AUTOLOAD;
return if ( $AUTOLOAD =~ /\bDESTROY$/ );
my ( $pkg, $fname, $lno ) = caller();
my $my_package = __PACKAGE__;
print STDERR <<EOM;
======================================================================
Error detected in package '$my_package', version $VERSION
Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
Called from package: '$pkg'
Called from File '$fname' at line '$lno'
This error is probably due to a recent programming change
======================================================================
EOM
exit 1;
}
sub DESTROY {
my $self = shift;
$self->_decrement_count();
return;
}
sub Die {
my ($msg) = @_;
Perl::Tidy::Die($msg);
croak "unexpected return from Perl::Tidy::Die";
}
sub Warn {
my ($msg) = @_;
Perl::Tidy::Warn($msg);
return;
}
sub Fault {
my ($msg) = @_;
# This routine is called for errors that really should not occur
# except if there has been a bug introduced by a recent program change.
# Please add comments at calls to Fault to explain why the call
# should not occur, and where to look to fix it.
my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
my $input_stream_name = get_input_stream_name();
Die(<<EOM);
==============================================================================
While operating on input stream with name: '$input_stream_name'
A fault was detected at line $line0 of sub '$subroutine1'
in file '$filename1'
which was called from line $line1 of sub '$subroutine2'
Message: '$msg'
This is probably an error introduced by a recent programming change.
Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
==============================================================================
EOM
# We shouldn't get here, but this return is to keep Perl-Critic from
# complaining.
return;
}
sub Exit {
my ($msg) = @_;
Perl::Tidy::Exit($msg);
croak "unexpected return from Perl::Tidy::Exit";
}
# Global variables ...
my (
##################################################################
# Section 1: Global variables which are either always constant or
# are constant after being configured by user-supplied
# parameters. They remain constant as a file is being processed.
##################################################################
# user parameters and shortcuts
$rOpts,
$rOpts_closing_side_comment_maximum_text,
$rOpts_continuation_indentation,
$rOpts_indent_columns,
$rOpts_line_up_parentheses,
$rOpts_maximum_line_length,
$rOpts_variable_maximum_line_length,
$rOpts_block_brace_tightness,
$rOpts_block_brace_vertical_tightness,
$rOpts_stack_closing_block_brace,
$rOpts_maximum_consecutive_blank_lines,
$rOpts_recombine,
$rOpts_add_newlines,
$rOpts_break_at_old_comma_breakpoints,
$rOpts_ignore_old_breakpoints,
$rOpts_keep_interior_semicolons,
$rOpts_comma_arrow_breakpoints,
$rOpts_maximum_fields_per_table,
$rOpts_one_line_block_semicolons,
$rOpts_break_at_old_semicolon_breakpoints,
$rOpts_tee_side_comments,
$rOpts_tee_block_comments,
$rOpts_tee_pod,
$rOpts_delete_side_comments,
$rOpts_delete_closing_side_comments,
$rOpts_format_skipping,
$rOpts_indent_only,
$rOpts_static_block_comments,
# Static hashes initialized in a BEGIN block
%is_assignment,
%is_keyword_returning_list,
%is_if_unless_and_or_last_next_redo_return,
%is_if_elsif_else_unless_while_until_for_foreach,
%is_if_unless_while_until_for,
%is_last_next_redo_return,
%is_sort_map_grep,
%is_sort_map_grep_eval,
%is_if_unless,
%is_and_or,
%is_chain_operator,
%is_block_without_semicolon,
%ok_to_add_semicolon_for_block_type,
%is_opening_type,
%is_closing_type,
%is_opening_token,
%is_closing_token,
%is_equal_or_fat_comma,
%is_block_with_ci,
%is_counted_type,
%is_opening_sequence_token,
%is_closing_sequence_token,
%is_container_label_type,
# Initialized in check_options. These are constants and could
# just as well be initialized in a BEGIN block.
%is_do_follower,
%is_if_brace_follower,
%is_else_brace_follower,
%is_anon_sub_brace_follower,
%is_anon_sub_1_brace_follower,
%is_other_brace_follower,
# Initialized in sub initialize_whitespace_hashes;
# Some can be modified according to user parameters.
%binary_ws_rules,
%want_left_space,
%want_right_space,
# Configured in sub initialize_bond_strength_hashes
%right_bond_strength,
%left_bond_strength,
# Hashes for -kbb=s and -kba=s
%keep_break_before_type,
%keep_break_after_type,
# Initialized in check_options, modified by prepare_cuddled_block_types:
%want_one_line_block,
# Initialized in sub prepare_cuddled_block_types
$rcuddled_block_types,
# Initialized and configured in check_optioms
%outdent_keyword,
%keyword_paren_inner_tightness,
%want_break_before,
%break_before_container_types,
%container_indentation_options,
%space_after_keyword,
%tightness,
%matching_token,
%opening_vertical_tightness,
%closing_vertical_tightness,
%closing_token_indentation,
$some_closing_token_indentation,
%opening_token_right,
%stack_opening_token,
%stack_closing_token,
%weld_nested_exclusion_rules,
# regex patterns for text identification.
# Most are initialized in a sub make_**_pattern during configuration.
# Most can be configured by user parameters.
$SUB_PATTERN,
$ASUB_PATTERN,
$ANYSUB_PATTERN,
$static_block_comment_pattern,
$static_side_comment_pattern,
$format_skipping_pattern_begin,
$format_skipping_pattern_end,
$non_indenting_brace_pattern,
$bli_pattern,
$block_brace_vertical_tightness_pattern,
$blank_lines_after_opening_block_pattern,
$blank_lines_before_closing_block_pattern,
$keyword_group_list_pattern,
$keyword_group_list_comment_pattern,
$closing_side_comment_prefix_pattern,
$closing_side_comment_list_pattern,
# Table to efficiently find indentation and max line length
# from level. Initialized in sub 'find_nested_pairs'
@maximum_line_length,
# Total number of sequence items in a weld, for quick checks
$total_weld_count,
#########################################################
# Section 2: Work arrays for the current batch of tokens.
#########################################################
# These are re-initialized for each batch of code
# in sub initialize_batch_variables.
$max_index_to_go,
@block_type_to_go,
@type_sequence_to_go,
@container_environment_to_go,
@bond_strength_to_go,
@forced_breakpoint_to_go,
@token_lengths_to_go,
@summed_lengths_to_go,
@levels_to_go,
@leading_spaces_to_go,
@reduced_spaces_to_go,
@mate_index_to_go,
@ci_levels_to_go,
@nesting_depth_to_go,
@nobreak_to_go,
@old_breakpoint_to_go,
@tokens_to_go,
@K_to_go,
@types_to_go,
@inext_to_go,
@iprev_to_go,
);
BEGIN {
# Initialize constants...
# Array index names for token variables
my $i = 0;
use constant {
_BLOCK_TYPE_ => $i++,
_CI_LEVEL_ => $i++,
_CONTAINER_ENVIRONMENT_ => $i++,
_CUMULATIVE_LENGTH_ => $i++,
_LINE_INDEX_ => $i++,
_KNEXT_SEQ_ITEM_ => $i++,
_LEVEL_ => $i++,
_LEVEL_TRUE_ => $i++,
_SLEVEL_ => $i++,
_TOKEN_ => $i++,
_TOKEN_LENGTH_ => $i++,
_TYPE_ => $i++,
_TYPE_SEQUENCE_ => $i++,
# Number of token variables; must be last in list:
_NVARS => $i++,
};
# Array index names for $self (which is an array ref)
$i = 0;
use constant {
_rlines_ => $i++,
_rlines_new_ => $i++,
_rLL_ => $i++,
_Klimit_ => $i++,
_K_opening_container_ => $i++,
_K_closing_container_ => $i++,
_K_opening_ternary_ => $i++,
_K_closing_ternary_ => $i++,
_K_first_seq_item_ => $i++,
_rK_phantom_semicolons_ => $i++,
_rtype_count_by_seqno_ => $i++,
_ris_broken_container_ => $i++,
_rhas_broken_container_ => $i++,
_ris_bli_container_ => $i++,
_rparent_of_seqno_ => $i++,
_rchildren_of_seqno_ => $i++,
_ris_list_by_seqno_ => $i++,
_rbreak_container_ => $i++,
_rshort_nested_ => $i++,
_length_function_ => $i++,
_is_encoded_data_ => $i++,
_fh_tee_ => $i++,
_sink_object_ => $i++,
_file_writer_object_ => $i++,
_vertical_aligner_object_ => $i++,
_logger_object_ => $i++,
_radjusted_levels_ => $i++,
_this_batch_ => $i++,
_last_output_short_opening_token_ => $i++,
_last_line_leading_type_ => $i++,
_last_line_leading_level_ => $i++,
_last_last_line_leading_level_ => $i++,
_added_semicolon_count_ => $i++,
_first_added_semicolon_at_ => $i++,
_last_added_semicolon_at_ => $i++,
_deleted_semicolon_count_ => $i++,
_first_deleted_semicolon_at_ => $i++,
_last_deleted_semicolon_at_ => $i++,
_embedded_tab_count_ => $i++,
_first_embedded_tab_at_ => $i++,
_last_embedded_tab_at_ => $i++,
_first_tabbing_disagreement_ => $i++,
_last_tabbing_disagreement_ => $i++,
_tabbing_disagreement_count_ => $i++,
_in_tabbing_disagreement_ => $i++,
_first_brace_tabbing_disagreement_ => $i++,
_in_brace_tabbing_disagreement_ => $i++,
_saw_VERSION_in_this_file_ => $i++,
_saw_END_or_DATA_ => $i++,
_rweld_len_left_closing_ => $i++,
_rweld_len_right_closing_ => $i++,
_rweld_len_left_opening_ => $i++,
_rweld_len_right_opening_ => $i++,
_ris_welded_seqno_ => $i++,
_rspecial_side_comment_type_ => $i++,
_rseqno_controlling_my_ci_ => $i++,
_ris_seqno_controlling_ci_ => $i++,
_save_logfile_ => $i++,
_maximum_level_ => $i++,
_rKrange_code_without_comments_ => $i++,
_rbreak_before_Kfirst_ => $i++,
_rbreak_after_Klast_ => $i++,
_converged_ => $i++,
_rstarting_multiline_qw_seqno_by_K_ => $i++,
_rending_multiline_qw_seqno_by_K_ => $i++,
_rKrange_multiline_qw_by_seqno_ => $i++,
_rcontains_multiline_qw_by_seqno_ => $i++,
_rmultiline_qw_has_extra_level_ => $i++,
};
# Array index names for _this_batch_ (in above list)
# So _this_batch_ is a sub-array of $self for
# holding the batches of tokens being processed.
$i = 0;
use constant {
_starting_in_quote_ => $i++,
_ending_in_quote_ => $i++,
_is_static_block_comment_ => $i++,
_rlines_K_ => $i++,
_do_not_pad_ => $i++,
_ibeg0_ => $i++,
_peak_batch_size_ => $i++,
_max_index_to_go_ => $i++,
_rK_to_go_ => $i++,
_batch_count_ => $i++,
_rix_seqno_controlling_ci_ => $i++,
_batch_CODE_type_ => $i++,
};
# Sequence number assigned to the root of sequence tree.
# The minimum of the actual sequences numbers is 4, so we can use 1
use constant SEQ_ROOT => 1;
# Codes for insertion and deletion of blanks
use constant DELETE => 0;
use constant STABLE => 1;
use constant INSERT => 2;
# whitespace codes
use constant WS_YES => 1;
use constant WS_OPTIONAL => 0;
use constant WS_NO => -1;
# Token bond strengths.
use constant NO_BREAK => 10000;
use constant VERY_STRONG => 100;
use constant STRONG => 2.1;
use constant NOMINAL => 1.1;
use constant WEAK => 0.8;
use constant VERY_WEAK => 0.55;
# values for testing indexes in output array
use constant UNDEFINED_INDEX => -1;
# Maximum number of little messages; probably need not be changed.
use constant MAX_NAG_MESSAGES => 6;
# increment between sequence numbers for each type
# For example, ?: pairs might have numbers 7,11,15,...
use constant TYPE_SEQUENCE_INCREMENT => 4;
# Initialize constant hashes ...
my @q;
@q = qw(
= **= += *= &= <<= &&=
-= /= |= >>= ||= //=
.= %= ^=
x=
);
@is_assignment{@q} = (1) x scalar(@q);
@q = qw(
grep
keys
map
reverse
sort
split
);
@is_keyword_returning_list{@q} = (1) x scalar(@q);
@q = qw(is if unless and or err last next redo return);
@is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
# These block types may have text between the keyword and opening
# curly. Note: 'else' does not, but must be included to allow trailing
# if/elsif text to be appended.
# patch for SWITCH/CASE: added 'case' and 'when'
@q = qw(if elsif else unless while until for foreach case when catch);
@is_if_elsif_else_unless_while_until_for_foreach{@q} =
(1) x scalar(@q);
@q = qw(if unless while until for);
@is_if_unless_while_until_for{@q} =
(1) x scalar(@q);
@q = qw(last next redo return);
@is_last_next_redo_return{@q} = (1) x scalar(@q);
@q = qw(sort map grep);
@is_sort_map_grep{@q} = (1) x scalar(@q);
@q = qw(sort map grep eval);
@is_sort_map_grep_eval{@q} = (1) x scalar(@q);
@q = qw(if unless);
@is_if_unless{@q} = (1) x scalar(@q);
@q = qw(and or err);
@is_and_or{@q} = (1) x scalar(@q);
# Identify certain operators which often occur in chains.
# Note: the minus (-) causes a side effect of padding of the first line in
# something like this (by sub set_logical_padding):
# Checkbutton => 'Transmission checked',
# -variable => \$TRANS
# This usually improves appearance so it seems ok.
@q = qw(&& || and or : ? . + - * /);
@is_chain_operator{@q} = (1) x scalar(@q);
# We can remove semicolons after blocks preceded by these keywords
@q =
qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless while until for foreach given when default);
@is_block_without_semicolon{@q} = (1) x scalar(@q);
# We will allow semicolons to be added within these block types
# as well as sub and package blocks.
# NOTES:
# 1. Note that these keywords are omitted:
# switch case given when default sort map grep
# 2. It is also ok to add for sub and package blocks and a labeled block
# 3. But not okay for other perltidy types including:
# { } ; G t
# 4. Test files: blktype.t, blktype1.t, semicolon.t
@q =
qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless do while until eval for foreach );
@ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
# 'L' is token for opening { at hash key
@q = qw< L { ( [ >;
@is_opening_type{@q} = (1) x scalar(@q);
# 'R' is token for closing } at hash key
@q = qw< R } ) ] >;
@is_closing_type{@q} = (1) x scalar(@q);
@q = qw< { ( [ >;
@is_opening_token{@q} = (1) x scalar(@q);
@q = qw< } ) ] >;
@is_closing_token{@q} = (1) x scalar(@q);
@q = qw< { ( [ ? >;
@is_opening_sequence_token{@q} = (1) x scalar(@q);
@q = qw< } ) ] : >;
@is_closing_sequence_token{@q} = (1) x scalar(@q);
# a hash needed by sub scan_list for labeling containers
@q = qw( k => && || ? : . );
@is_container_label_type{@q} = (1) x scalar(@q);
# Braces -bbht etc must follow these. Note: experimentation with
# including a simple comma shows that it adds little and can lead
# to poor formatting in complex lists.
@q = qw( = => );
@is_equal_or_fat_comma{@q} = (1) x scalar(@q);
@q = qw( => ; );
push @q, ',';
@is_counted_type{@q} = (1) x scalar(@q);
# These block types can take ci. This is used by the -xci option.
# Note that the 'sub' in this list is an anonymous sub. To be more correct
# we could remove sub and use ASUB pattern to also handle a
# prototype/signature. But that would slow things down and would probably
# never be useful.
@q = qw( do sub eval sort map grep );
@is_block_with_ci{@q} = (1) x scalar(@q);
}
{ ## begin closure to count instanes
# methods to count instances
my $_count = 0;
sub get_count { return $_count; }
sub _increment_count { return ++$_count }
sub _decrement_count { return --$_count }
} ## end closure to count instanes
sub new {
my ( $class, @args ) = @_;
# we are given an object with a write_line() method to take lines
my %defaults = (
sink_object => undef,
diagnostics_object => undef,
logger_object => undef,
length_function => sub { return length( $_[0] ) },
is_encoded_data => "",
fh_tee => undef,
);
my %args = ( %defaults, @args );
my $length_function = $args{length_function};
my $is_encoded_data = $args{is_encoded_data};
my $fh_tee = $args{fh_tee};
my $logger_object = $args{logger_object};
my $diagnostics_object = $args{diagnostics_object};
# we create another object with a get_line() and peek_ahead() method
my $sink_object = $args{sink_object};
my $file_writer_object =
Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
# initialize closure variables...
set_logger_object($logger_object);
set_diagnostics_object($diagnostics_object);
initialize_gnu_vars();
initialize_csc_vars();
initialize_scan_list();
initialize_saved_opening_indentation();
initialize_undo_ci();
initialize_process_line_of_CODE();
initialize_grind_batch_of_CODE();
initialize_adjusted_indentation();
initialize_postponed_breakpoint();
initialize_batch_variables();
initialize_forced_breakpoint_vars();
initialize_gnu_batch_vars();
initialize_write_line();
my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
rOpts => $rOpts,
file_writer_object => $file_writer_object,
logger_object => $logger_object,
diagnostics_object => $diagnostics_object,
length_function => $length_function
);
if ( $rOpts->{'entab-leading-whitespace'} ) {
write_logfile_entry(
"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
);
}
elsif ( $rOpts->{'tabs'} ) {
write_logfile_entry("Indentation will be with a tab character\n");
}
else {
write_logfile_entry(
"Indentation will be with $rOpts->{'indent-columns'} spaces\n");
}
# Initialize the $self array reference.
# To add an item, first add a constant index in the BEGIN block above.
my $self = [];
# Basic data structures...
$self->[_rlines_] = []; # = ref to array of lines of the file
$self->[_rlines_new_] = []; # = ref to array of output lines
# (FOR FUTURE DEVELOPMENT)
$self->[_rLL_] = []; # = ref to array with all tokens
# in the file. LL originally meant
# 'Linked List'. Linked lists were a
# bad idea but LL is easy to type.
$self->[_Klimit_] = undef; # = maximum K index for rLL.
$self->[_K_opening_container_] = {}; # for quickly traversing structure
$self->[_K_closing_container_] = {}; # for quickly traversing structure
$self->[_K_opening_ternary_] = {}; # for quickly traversing structure
$self->[_K_closing_ternary_] = {}; # for quickly traversing structure
$self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
$self->[_rK_phantom_semicolons_] =
undef; # for undoing phantom semicolons if iterating
$self->[_rtype_count_by_seqno_] = {};
$self->[_ris_broken_container_] = {};
$self->[_rhas_broken_container_] = {};
$self->[_ris_bli_container_] = {};
$self->[_rparent_of_seqno_] = {};
$self->[_rchildren_of_seqno_] = {};
$self->[_ris_list_by_seqno_] = {};
$self->[_rbreak_container_] = {}; # prevent one-line blocks
$self->[_rshort_nested_] = {}; # blocks not forced open
$self->[_length_function_] = $length_function;
$self->[_is_encoded_data_] = $is_encoded_data;
# Some objects...
$self->[_fh_tee_] = $fh_tee;
$self->[_sink_object_] = $sink_object;
$self->[_file_writer_object_] = $file_writer_object;
$self->[_vertical_aligner_object_] = $vertical_aligner_object;
$self->[_logger_object_] = $logger_object;
# Reference to the batch being processed
$self->[_this_batch_] = [];
# Memory of processed text...
$self->[_last_last_line_leading_level_] = 0;
$self->[_last_line_leading_level_] = 0;
$self->[_last_line_leading_type_] = '#';
$self->[_last_output_short_opening_token_] = 0;
$self->[_added_semicolon_count_] = 0;
$self->[_first_added_semicolon_at_] = 0;
$self->[_last_added_semicolon_at_] = 0;
$self->[_deleted_semicolon_count_] = 0;
$self->[_first_deleted_semicolon_at_] = 0;
$self->[_last_deleted_semicolon_at_] = 0;
$self->[_embedded_tab_count_] = 0;
$self->[_first_embedded_tab_at_] = 0;
$self->[_last_embedded_tab_at_] = 0;
$self->[_first_tabbing_disagreement_] = 0;
$self->[_last_tabbing_disagreement_] = 0;
$self->[_tabbing_disagreement_count_] = 0;
$self->[_in_tabbing_disagreement_] = 0;
$self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
$self->[_saw_END_or_DATA_] = 0;
# Hashes related to container welding...
$self->[_radjusted_levels_] = [];
$self->[_rweld_len_left_closing_] = {};
$self->[_rweld_len_right_closing_] = {};
$self->[_rweld_len_left_opening_] = {};
$self->[_rweld_len_right_opening_] = {};
$self->[_ris_welded_seqno_] = {};
$self->[_rseqno_controlling_my_ci_] = {};
$self->[_ris_seqno_controlling_ci_] = {};
$self->[_rspecial_side_comment_type_] = {};
$self->[_maximum_level_] = 0;
$self->[_rKrange_code_without_comments_] = [];
$self->[_rbreak_before_Kfirst_] = {};
$self->[_rbreak_after_Klast_] = {};
$self->[_converged_] = 0;
$self->[_rstarting_multiline_qw_seqno_by_K_] = {};
$self->[_rending_multiline_qw_seqno_by_K_] = {};
$self->[_rKrange_multiline_qw_by_seqno_] = {};
$self->[_rcontains_multiline_qw_by_seqno_] = {};
$self->[_rmultiline_qw_has_extra_level_] = {};
# This flag will be updated later by a call to get_save_logfile()
$self->[_save_logfile_] = defined($logger_object);
bless $self, $class;
# Safety check..this is not a class yet
if ( _increment_count() > 1 ) {
confess
"Attempt to create more than 1 object in $class, which is not a true class yet\n";
}
return $self;
}
######################################
# CODE SECTION 2: Some Basic Utilities
######################################
{ ## begin closure for logger routines
my $logger_object;
# Called once per file to initialize the logger object
sub set_logger_object {
$logger_object = shift;
return;
}
sub get_logger_object {
return $logger_object;
}
sub get_input_stream_name {
my $input_stream_name = "";
if ($logger_object) {
$input_stream_name = $logger_object->get_input_stream_name();
}
return $input_stream_name;
}
# interface to Perl::Tidy::Logger routines
sub warning {
my ($msg) = @_;
if ($logger_object) { $logger_object->warning($msg); }
return;
}
sub complain {
my ($msg) = @_;
if ($logger_object) {
$logger_object->complain($msg);
}
return;
}
sub write_logfile_entry {
my @msg = @_;
if ($logger_object) {
$logger_object->write_logfile_entry(@msg);
}
return;
}
sub report_definite_bug {
if ($logger_object) {
$logger_object->report_definite_bug();
}
return;
}
sub get_saw_brace_error {
if ($logger_object) {
return $logger_object->get_saw_brace_error();
}
return;
}
sub we_are_at_the_last_line {
if ($logger_object) {
$logger_object->we_are_at_the_last_line();
}
return;
}
} ## end closure for logger routines
{ ## begin closure for diagnostics routines
my $diagnostics_object;
# Called once per file to initialize the diagnostics object
sub set_diagnostics_object {
$diagnostics_object = shift;
return;
}
sub write_diagnostics {
my ($msg) = @_;
if ($diagnostics_object) {
$diagnostics_object->write_diagnostics($msg);
}
return;
}
} ## end closure for diagnostics routines
sub get_convergence_check {
my ($self) = @_;
return $self->[_converged_];
}
sub get_added_semicolon_count {
my $self = shift;
return $self->[_added_semicolon_count_];
}
sub get_output_line_number {
my ($self) = @_;
my $vao = $self->[_vertical_aligner_object_];
return $vao->get_output_line_number();
}
sub check_token_array {
my $self = shift;
# Check for errors in the array of tokens. This is only called now
# when the DEVEL_MODE flag is set, so this Fault will only occur
# during code development.
my $rLL = $self->[_rLL_];
for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
my $nvars = @{ $rLL->[$KK] };
if ( $nvars != _NVARS ) {
my $NVARS = _NVARS;
my $type = $rLL->[$KK]->[_TYPE_];
$type = '*' unless defined($type);
# The number of variables per token node is _NVARS and was set when
# the array indexes were generated. So if the number of variables
# is different we have done something wrong, like not store all of
# them in sub 'write_line' when they were received from the
# tokenizer.
Fault(
"number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
);
}
foreach my $var ( _TOKEN_, _TYPE_ ) {
if ( !defined( $rLL->[$KK]->[$var] ) ) {
my $iline = $rLL->[$KK]->[_LINE_INDEX_];
# This is a simple check that each token has some basic
# variables. In other words, that there are no holes in the
# array of tokens. Sub 'write_line' pushes tokens into the
# $rLL array, so this should guarantee no gaps.
Fault("Undefined variable $var for K=$KK, line=$iline\n");
}
}
}
return;
}
sub want_blank_line {
my $self = shift;
$self->flush();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->want_blank_line();
return;
}
sub write_unindented_line {
my ( $self, $line ) = @_;
$self->flush();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_line($line);
return;
}
sub consecutive_nonblank_lines {
my ($self) = @_;
my $file_writer_object = $self->[_file_writer_object_];
my $vao = $self->[_vertical_aligner_object_];
return $file_writer_object->get_consecutive_nonblank_lines() +
$vao->get_cached_line_count();
}
sub trim {
# trim leading and trailing whitespace from a string
my $str = shift;
$str =~ s/\s+$//;
$str =~ s/^\s+//;
return $str;
}
sub max {
my (@vals) = @_;
my $max = shift @vals;
for (@vals) { $max = $_ > $max ? $_ : $max }
return $max;
}
sub min {
my (@vals) = @_;
my $min = shift @vals;
for (@vals) { $min = $_ < $min ? $_ : $min }
return $min;
}
sub split_words {
# given a string containing words separated by whitespace,
# return the list of words
my ($str) = @_;
return unless $str;
$str =~ s/\s+$//;
$str =~ s/^\s+//;
return split( /\s+/, $str );
}
###########################################
# CODE SECTION 3: Check and process options
###########################################
sub check_options {
# This routine is called to check the user-supplied run parameters
# and to configure the control hashes to them.
$rOpts = shift;
initialize_whitespace_hashes();
initialize_bond_strength_hashes();
# Make needed regex patterns for matching text.
# NOTE: sub_matching_patterns must be made first because later patterns use
# them; see RT #133130.
make_sub_matching_pattern();
make_static_block_comment_pattern();
make_static_side_comment_pattern();
make_closing_side_comment_prefix();
make_closing_side_comment_list_pattern();
$format_skipping_pattern_begin =
make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
$format_skipping_pattern_end =
make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
make_non_indenting_brace_pattern();
# If closing side comments ARE selected, then we can safely
# delete old closing side comments unless closing side comment
# warnings are requested. This is a good idea because it will
# eliminate any old csc's which fall below the line count threshold.
# We cannot do this if warnings are turned on, though, because we
# might delete some text which has been added. So that must
# be handled when comments are created. And we cannot do this
# with -io because -csc will be skipped altogether.
if ( $rOpts->{'closing-side-comments'} ) {
if ( !$rOpts->{'closing-side-comment-warnings'}
&& !$rOpts->{'indent-only'} )
{
$rOpts->{'delete-closing-side-comments'} = 1;
}
}
# If closing side comments ARE NOT selected, but warnings ARE
# selected and we ARE DELETING csc's, then we will pretend to be
# adding with a huge interval. This will force the comments to be
# generated for comparison with the old comments, but not added.
elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
if ( $rOpts->{'delete-closing-side-comments'} ) {
$rOpts->{'delete-closing-side-comments'} = 0;
$rOpts->{'closing-side-comments'} = 1;
$rOpts->{'closing-side-comment-interval'} = 100000000;
}
}
make_bli_pattern();
make_block_brace_vertical_tightness_pattern();
make_blank_line_pattern();
make_keyword_group_list_pattern();
# Make initial list of desired one line block types
# They will be modified by 'prepare_cuddled_block_types'
%want_one_line_block = %is_sort_map_grep_eval;
prepare_cuddled_block_types();
if ( $rOpts->{'dump-cuddled-block-list'} ) {
dump_cuddled_block_list(*STDOUT);
Exit(0);
}
if ( $rOpts->{'line-up-parentheses'} ) {
if ( $rOpts->{'indent-only'}
|| !$rOpts->{'add-newlines'}
|| !$rOpts->{'delete-old-newlines'} )
{
Warn(<<EOM);
-----------------------------------------------------------------------
Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
The -lp indentation logic requires that perltidy be able to coordinate
arbitrarily large numbers of line breakpoints. This isn't possible
with these flags.
-----------------------------------------------------------------------
EOM
$rOpts->{'line-up-parentheses'} = 0;
}
if ( $rOpts->{'whitespace-cycle'} ) {
Warn(<<EOM);
Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
EOM
$rOpts->{'whitespace-cycle'} = 0;
}
}
# At present, tabs are not compatible with the line-up-parentheses style
# (it would be possible to entab the total leading whitespace
# just prior to writing the line, if desired).
if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
Warn(<<EOM);
Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
# Likewise, tabs are not compatible with outdenting..
if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
Warn(<<EOM);
Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
Warn(<<EOM);
Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
if ( !$rOpts->{'space-for-semicolon'} ) {
$want_left_space{'f'} = -1;
}
if ( $rOpts->{'space-terminal-semicolon'} ) {
$want_left_space{';'} = 1;
}
# We should put an upper bound on any -sil=n value. Otherwise enormous
# files could be created by mistake.
for ( $rOpts->{'starting-indentation-level'} ) {
if ( $_ && $_ > 100 ) {
Warn(<<EOM);
The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
EOM
$_ = 0;
}
}
# implement outdenting preferences for keywords
%outdent_keyword = ();
my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
unless (@okw) {
@okw = qw(next last redo goto return); # defaults
}
# FUTURE: if not a keyword, assume that it is an identifier
foreach (@okw) {
if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
$outdent_keyword{$_} = 1;
}
else {
Warn("ignoring '$_' in -okwl list; not a perl keyword");
}
}
# setup hash for -kpit option
%keyword_paren_inner_tightness = ();
my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
if ( defined($kpit_value) && $kpit_value != 1 ) {
my @kpit =
split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
unless (@kpit) {
@kpit = qw(if elsif unless while until for foreach); # defaults
}
# we will allow keywords and user-defined identifiers
foreach (@kpit) {
$keyword_paren_inner_tightness{$_} = $kpit_value;
}
}
# implement user whitespace preferences
if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
@want_left_space{@q} = (1) x scalar(@q);
}
if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
@want_right_space{@q} = (1) x scalar(@q);
}
if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
@want_left_space{@q} = (-1) x scalar(@q);
}
if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
@want_right_space{@q} = (-1) x scalar(@q);
}
if ( $rOpts->{'dump-want-left-space'} ) {
dump_want_left_space(*STDOUT);
Exit(0);
}
if ( $rOpts->{'dump-want-right-space'} ) {
dump_want_right_space(*STDOUT);
Exit(0);
}
# default keywords for which space is introduced before an opening paren
# (at present, including them messes up vertical alignment)
my @sak = qw(my local our and or xor err eq ne if else elsif until
unless while for foreach return switch case given when catch);
%space_after_keyword = map { $_ => 1 } @sak;
# first remove any or all of these if desired
if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
# -nsak='*' selects all the above keywords
if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
@space_after_keyword{@q} = (0) x scalar(@q);
}
# then allow user to add to these defaults
if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
@space_after_keyword{@q} = (1) x scalar(@q);
}
# implement user break preferences
my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
. : ? && || and or err xor
);
my $break_after = sub {
my @toks = @_;
foreach my $tok (@toks) {
if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
my $lbs = $left_bond_strength{$tok};
my $rbs = $right_bond_strength{$tok};
if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
( $lbs, $rbs );
}
}
};
my $break_before = sub {
my @toks = @_;
foreach my $tok (@toks) {
my $lbs = $left_bond_strength{$tok};
my $rbs = $right_bond_strength{$tok};
if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
( $lbs, $rbs );
}
}
};
$break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
$break_before->(@all_operators)
if ( $rOpts->{'break-before-all-operators'} );
$break_after->( split_words( $rOpts->{'want-break-after'} ) );
$break_before->( split_words( $rOpts->{'want-break-before'} ) );
# make note if breaks are before certain key types
%want_break_before = ();
foreach my $tok ( @all_operators, ',' ) {
$want_break_before{$tok} =
$left_bond_strength{$tok} < $right_bond_strength{$tok};
}
# Coordinate ?/: breaks, which must be similar
if ( !$want_break_before{':'} ) {
$want_break_before{'?'} = $want_break_before{':'};
$right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
$left_bond_strength{'?'} = NO_BREAK;
}
# Only make a hash entry for the next parameters if values are defined.
# That allows a quick check to be made later.
%break_before_container_types = ();
for ( $rOpts->{'break-before-hash-brace'} ) {
$break_before_container_types{'{'} = $_ if $_ && $_ > 0;
}
for ( $rOpts->{'break-before-square-bracket'} ) {
$break_before_container_types{'['} = $_ if $_ && $_ > 0;
}
for ( $rOpts->{'break-before-paren'} ) {
$break_before_container_types{'('} = $_ if $_ && $_ > 0;
}
%container_indentation_options = ();
for ( $rOpts->{'break-before-hash-brace-and-indent'} ) {
my $tok = '{';
if ( defined($_) && $_ > 0 && $break_before_container_types{$tok} ) {
$container_indentation_options{$tok} = $_;
}
}
for ( $rOpts->{'break-before-square-bracket-and-indent'} ) {
my $tok = '[';
if ( defined($_) && $_ > 0 && $break_before_container_types{$tok} ) {
$container_indentation_options{$tok} = $_;
}
}
for ( $rOpts->{'break-before-paren-and-indent'} ) {
my $tok = '(';
if ( defined($_) && $_ > 0 && $break_before_container_types{$tok} ) {
$container_indentation_options{$tok} = $_;
}
}
# Define here tokens which may follow the closing brace of a do statement
# on the same line, as in:
# } while ( $something);
my @dof = qw(until while unless if ; : );
push @dof, ',';
@is_do_follower{@dof} = (1) x scalar(@dof);
# What tokens may follow the closing brace of an if or elsif block?
# Not used. Previously used for cuddled else, but no longer needed.
%is_if_brace_follower = ();
# nothing can follow the closing curly of an else { } block:
%is_else_brace_follower = ();
# what can follow a multi-line anonymous sub definition closing curly:
my @asf = qw# ; : => or and && || ~~ !~~ ) #;
push @asf, ',';
@is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
# what can follow a one-line anonymous sub closing curly:
# one-line anonymous subs also have ']' here...
# see tk3.t and PP.pm
my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
push @asf1, ',';
@is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
# What can follow a closing curly of a block
# which is not an if/elsif/else/do/sort/map/grep/eval/sub
# Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
my @obf = qw# ; : => or and && || ) #;
push @obf, ',';
@is_other_brace_follower{@obf} = (1) x scalar(@obf);
$right_bond_strength{'{'} = WEAK;
$left_bond_strength{'{'} = VERY_STRONG;
# make -l=0 equal to -l=infinite
if ( !$rOpts->{'maximum-line-length'} ) {
$rOpts->{'maximum-line-length'} = 1000000;
}
# make -lbl=0 equal to -lbl=infinite
if ( !$rOpts->{'long-block-line-count'} ) {
$rOpts->{'long-block-line-count'} = 1000000;
}
my $ole = $rOpts->{'output-line-ending'};
if ($ole) {
my %endings = (
dos => "\015\012",
win => "\015\012",
mac => "\015",
unix => "\012",
);
# Patch for RT #99514, a memoization issue.
# Normally, the user enters one of 'dos', 'win', etc, and we change the
# value in the options parameter to be the corresponding line ending
# character. But, if we are using memoization, on later passes through
# here the option parameter will already have the desired ending
# character rather than the keyword 'dos', 'win', etc. So
# we must check to see if conversion has already been done and, if so,
# bypass the conversion step.
my %endings_inverted = (
"\015\012" => 'dos',
"\015\012" => 'win',
"\015" => 'mac',
"\012" => 'unix',
);
if ( defined( $endings_inverted{$ole} ) ) {
# we already have valid line ending, nothing more to do
}
else {
$ole = lc $ole;
unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
my $str = join " ", keys %endings;
Die(<<EOM);
Unrecognized line ending '$ole'; expecting one of: $str
EOM
}
if ( $rOpts->{'preserve-line-endings'} ) {
Warn("Ignoring -ple; conflicts with -ole\n");
$rOpts->{'preserve-line-endings'} = undef;
}
}
}
# hashes used to simplify setting whitespace
%tightness = (
'{' => $rOpts->{'brace-tightness'},
'}' => $rOpts->{'brace-tightness'},
'(' => $rOpts->{'paren-tightness'},
')' => $rOpts->{'paren-tightness'},
'[' => $rOpts->{'square-bracket-tightness'},
']' => $rOpts->{'square-bracket-tightness'},
);
%matching_token = (
'{' => '}',
'(' => ')',
'[' => ']',
'?' => ':',
);
# note any requested old line breaks to keep
%keep_break_before_type = ();
%keep_break_after_type = ();
if ( !$rOpts->{'ignore-old-breakpoints'} ) {
# FIXME: could check for valid types here.
# Invalid types are harmless but probably not intended.
my @types;
@types = ( split_words( $rOpts->{'keep-old-breakpoints-before'} ) );
@keep_break_before_type{@types} = (1) x scalar(@types);
@types = ( split_words( $rOpts->{'keep-old-breakpoints-after'} ) );
@keep_break_after_type{@types} = (1) x scalar(@types);
}
else {
if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
);
}
if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
);
}
if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
Warn("Conflicting parameters: -iob and -bos; -bos will be ignored\n"
);
}
if ( $rOpts->{'keep-old-breakpoints-before'} ) {
Warn("Conflicting parameters: -iob and -kbb; -kbb will be ignored\n"
);
}
if ( $rOpts->{'keep-old-breakpoints-after'} ) {
Warn("Conflicting parameters: -iob and -kba; -kba will be ignored\n"
);
}
# Note: there are additional parameters that can be made inactive by
# -iob, but they are on by default so we would generate excessive
# warnings if we noted them. They are:
# $rOpts->{'break-at-old-keyword-breakpoints'}
# $rOpts->{'break-at-old-logical-breakpoints'}
# $rOpts->{'break-at-old-ternary-breakpoints'}
# $rOpts->{'break-at-old-attribute-breakpoints'}
}
# very frequently used parameters made global for efficiency
$rOpts_closing_side_comment_maximum_text =
$rOpts->{'closing-side-comment-maximum-text'};
$rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
$rOpts_indent_columns = $rOpts->{'indent-columns'};
$rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
$rOpts_variable_maximum_line_length =
$rOpts->{'variable-maximum-line-length'};
$rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
$rOpts_block_brace_vertical_tightness =
$rOpts->{'block-brace-vertical-tightness'};
$rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
$rOpts_maximum_consecutive_blank_lines =
$rOpts->{'maximum-consecutive-blank-lines'};
$rOpts_recombine = $rOpts->{'recombine'};
$rOpts_add_newlines = $rOpts->{'add-newlines'};
$rOpts_break_at_old_comma_breakpoints =
$rOpts->{'break-at-old-comma-breakpoints'};
$rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
$rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
$rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
$rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
$rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
$rOpts_break_at_old_semicolon_breakpoints =
$rOpts->{'break-at-old-semicolon-breakpoints'};
$rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
$rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
$rOpts_tee_pod = $rOpts->{'tee-pod'};
$rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
$rOpts_delete_closing_side_comments =
$rOpts->{'delete-closing-side-comments'};
$rOpts_format_skipping = $rOpts->{'format-skipping'};
$rOpts_indent_only = $rOpts->{'indent-only'};
$rOpts_static_block_comments = $rOpts->{'static-block-comments'};
# Note that both opening and closing tokens can access the opening
# and closing flags of their container types.
%opening_vertical_tightness = (
'(' => $rOpts->{'paren-vertical-tightness'},
'{' => $rOpts->{'brace-vertical-tightness'},
'[' => $rOpts->{'square-bracket-vertical-tightness'},
')' => $rOpts->{'paren-vertical-tightness'},
'}' => $rOpts->{'brace-vertical-tightness'},
']' => $rOpts->{'square-bracket-vertical-tightness'},
);
%closing_vertical_tightness = (
'(' => $rOpts->{'paren-vertical-tightness-closing'},
'{' => $rOpts->{'brace-vertical-tightness-closing'},
'[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
')' => $rOpts->{'paren-vertical-tightness-closing'},
'}' => $rOpts->{'brace-vertical-tightness-closing'},
']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
);
# assume flag for '>' same as ')' for closing qw quotes
%closing_token_indentation = (
')' => $rOpts->{'closing-paren-indentation'},
'}' => $rOpts->{'closing-brace-indentation'},
']' => $rOpts->{'closing-square-bracket-indentation'},
'>' => $rOpts->{'closing-paren-indentation'},
);
# flag indicating if any closing tokens are indented
$some_closing_token_indentation =
$rOpts->{'closing-paren-indentation'}
|| $rOpts->{'closing-brace-indentation'}
|| $rOpts->{'closing-square-bracket-indentation'}
|| $rOpts->{'indent-closing-brace'};
%opening_token_right = (
'(' => $rOpts->{'opening-paren-right'},
'{' => $rOpts->{'opening-hash-brace-right'},
'[' => $rOpts->{'opening-square-bracket-right'},
);
%stack_opening_token = (
'(' => $rOpts->{'stack-opening-paren'},
'{' => $rOpts->{'stack-opening-hash-brace'},
'[' => $rOpts->{'stack-opening-square-bracket'},
);
%stack_closing_token = (
')' => $rOpts->{'stack-closing-paren'},
'}' => $rOpts->{'stack-closing-hash-brace'},
']' => $rOpts->{'stack-closing-square-bracket'},
);
# Create a table of maximum line length vs level for later efficient use.
# This avoids continually checking the -vmll flag. We will make the
# table very long to be sure it will not be exceeded. But we have to
# choose a fixed length. A check will be made at the start of sub
# 'finish_formatting' to be sure it is not exceeded. Note, some
# of my standard test problems have indentation levels of about 150,
# so this should be fairly large.
my $level_max = 1000;
foreach my $level ( 0 .. $level_max ) {
$maximum_line_length[$level] = $rOpts_maximum_line_length;
}
if ($rOpts_variable_maximum_line_length) {
foreach my $level ( 0 .. $level_max ) {
$maximum_line_length[$level] += $level * $rOpts_indent_columns;
}
}
initialize_weld_nested_exclusion_rules($rOpts);
return;
}
sub initialize_weld_nested_exclusion_rules {
my ($rOpts) = @_;
%weld_nested_exclusion_rules = ();
my $opt_name = 'weld-nested-exclusion-list';
my $str = $rOpts->{$opt_name};
return unless ($str);
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return unless ($str);
# There are four container tokens.
my %token_keys = (
'(' => '(',
'[' => '[',
'{' => '{',
'q' => 'q',
);
# We are parsing an exclusion list for nested welds. The list is a string
# with spaces separating any number of items. Each item consists of three
# pieces of information:
# <optional position> <optional type> <type of container>
# < ^ or . > < k or K > < ( [ { >
# The last character is the required container type and must be one of:
# ( = paren
# [ = square bracket
# { = brace
# An optional leading position indicator:
# ^ means the leading token position in the weld
# . means a secondary token position in the weld
# no position indicator means all positions match
# An optional alphanumeric character between the position and container
# token selects to which the rule applies:
# k = any keyword
# K = any non-keyword
# f = function call
# F = not a function call
# w = function or keyword
# W = not a function or keyword
# no letter means any preceding type matches
# Examples:
# ^( - the weld must not start with a paren
# .( - the second and later tokens may not be parens
# ( - no parens in weld
# ^K( - exclude a leading paren not preceded by a keyword
# .k( - exclude a secondary paren preceded by a keyword
# [ { - exclude all brackets and braces
my @items = split /\s+/, $str;
my $msg1;
my $msg2;
foreach my $item (@items) {
my $item_save = $item;
my $tok = chop($item);
my $key = $token_keys{$tok};
if ( !defined($key) ) {
$msg1 .= " '$item_save'";
next;
}
if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
$weld_nested_exclusion_rules{$key} = [];
}
my $rflags = $weld_nested_exclusion_rules{$key};
# A 'q' means do not weld quotes
if ( $tok eq 'q' ) {
$rflags->[0] = '*';
$rflags->[1] = '*';
next;
}
my $pos = '*';
my $select = '*';
if ($item) {
if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
$pos = $1 if ($1);
$select = $2 if ($2);
}
else {
$msg1 .= " '$item_save'";
next;
}
}
if ( $pos eq '^' || $pos eq '*' ) {
if ( defined( $rflags->[0] ) && $rflags ne $select ) {
$msg1 .= " '$item_save'";
}
$rflags->[0] = $select;
}
if ( $pos eq '.' || $pos eq '*' ) {
if ( defined( $rflags->[1] ) && $rflags ne $select ) {
$msg1 .= " '$item_save'";
}
$rflags->[1] = $select;
}
}
if ($msg1) {
Warn(<<EOM);
Unexpecting symbol(s) encountered in --$opt_name will be ignored:
$msg1
EOM
}
if ($msg2) {
Warn(<<EOM);
Multiple specifications were encountered in the --weld-nested-exclusion-list for:
$msg2
Only the last will be used.
EOM
}
return;
}
sub initialize_whitespace_hashes {
# This is called once before formatting begins to initialize these global
# hashes, which control the use of whitespace around tokens:
#
# %binary_ws_rules
# %want_left_space
# %want_right_space
# %space_after_keyword
#
# Many token types are identical to the tokens themselves.
# See the tokenizer for a complete list. Here are some special types:
# k = perl keyword
# f = semicolon in for statement
# m = unary minus
# p = unary plus
# Note that :: is excluded since it should be contained in an identifier
# Note that '->' is excluded because it never gets space
# parentheses and brackets are excluded since they are handled specially
# curly braces are included but may be overridden by logic, such as
# newline logic.
# NEW_TOKENS: create a whitespace rule here. This can be as
# simple as adding your new letter to @spaces_both_sides, for
# example.
my @opening_type = qw< L { ( [ >;
@is_opening_type{@opening_type} = (1) x scalar(@opening_type);
my @closing_type = qw< R } ) ] >;
@is_closing_type{@closing_type} = (1) x scalar(@closing_type);
my @spaces_both_sides = qw#
+ - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
.= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
&&= ||= //= <=> A k f w F n C Y U G v
#;
my @spaces_left_side = qw<
t ! ~ m p { \ h pp mm Z j
>;
push( @spaces_left_side, '#' ); # avoids warning message
my @spaces_right_side = qw<
; } ) ] R J ++ -- **=
>;
push( @spaces_right_side, ',' ); # avoids warning message
%want_left_space = ();
%want_right_space = ();
%binary_ws_rules = ();
# Note that we setting defaults here. Later in processing
# the values of %want_left_space and %want_right_space
# may be overridden by any user settings specified by the
# -wls and -wrs parameters. However the binary_whitespace_rules
# are hardwired and have priority.
@want_left_space{@spaces_both_sides} =
(1) x scalar(@spaces_both_sides);
@want_right_space{@spaces_both_sides} =
(1) x scalar(@spaces_both_sides);
@want_left_space{@spaces_left_side} =
(1) x scalar(@spaces_left_side);
@want_right_space{@spaces_left_side} =
(-1) x scalar(@spaces_left_side);
@want_left_space{@spaces_right_side} =
(-1) x scalar(@spaces_right_side);
@want_right_space{@spaces_right_side} =
(1) x scalar(@spaces_right_side);
$want_left_space{'->'} = WS_NO;
$want_right_space{'->'} = WS_NO;
$want_left_space{'**'} = WS_NO;
$want_right_space{'**'} = WS_NO;
$want_right_space{'CORE::'} = WS_NO;
# These binary_ws_rules are hardwired and have priority over the above
# settings. It would be nice to allow adjustment by the user,
# but it would be complicated to specify.
#
# hash type information must stay tightly bound
# as in : ${xxxx}
$binary_ws_rules{'i'}{'L'} = WS_NO;
$binary_ws_rules{'i'}{'{'} = WS_YES;
$binary_ws_rules{'k'}{'{'} = WS_YES;
$binary_ws_rules{'U'}{'{'} = WS_YES;
$binary_ws_rules{'i'}{'['} = WS_NO;
$binary_ws_rules{'R'}{'L'} = WS_NO;
$binary_ws_rules{'R'}{'{'} = WS_NO;
$binary_ws_rules{'t'}{'L'} = WS_NO;
$binary_ws_rules{'t'}{'{'} = WS_NO;
$binary_ws_rules{'}'}{'L'} = WS_NO;
$binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
$binary_ws_rules{'$'}{'L'} = WS_NO;
$binary_ws_rules{'$'}{'{'} = WS_NO;
$binary_ws_rules{'@'}{'L'} = WS_NO;
$binary_ws_rules{'@'}{'{'} = WS_NO;
$binary_ws_rules{'='}{'L'} = WS_YES;
$binary_ws_rules{'J'}{'J'} = WS_YES;
# the following includes ') {'
# as in : if ( xxx ) { yyy }
$binary_ws_rules{']'}{'L'} = WS_NO;
$binary_ws_rules{']'}{'{'} = WS_NO;
$binary_ws_rules{')'}{'{'} = WS_YES;
$binary_ws_rules{')'}{'['} = WS_NO;
$binary_ws_rules{']'}{'['} = WS_NO;
$binary_ws_rules{']'}{'{'} = WS_NO;
$binary_ws_rules{'}'}{'['} = WS_NO;
$binary_ws_rules{'R'}{'['} = WS_NO;
$binary_ws_rules{']'}{'++'} = WS_NO;
$binary_ws_rules{']'}{'--'} = WS_NO;
$binary_ws_rules{')'}{'++'} = WS_NO;
$binary_ws_rules{')'}{'--'} = WS_NO;
$binary_ws_rules{'R'}{'++'} = WS_NO;
$binary_ws_rules{'R'}{'--'} = WS_NO;
$binary_ws_rules{'i'}{'Q'} = WS_YES;
$binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
# FIXME: we could to split 'i' into variables and functions
# and have no space for functions but space for variables. For now,
# I have a special patch in the special rules below
$binary_ws_rules{'i'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'{'} = WS_YES;
return;
} ## end initialize_whitespace_hashes
sub set_whitespace_flags {
# This routine is called once per file to set whitespace flags for that
# file. This routine examines each pair of nonblank tokens and sets a flag
# indicating if white space is needed.
#
# $rwhitespace_flags->[$j] is a flag indicating whether a white space
# BEFORE token $j is needed, with the following values:
#
# WS_NO = -1 do not want a space BEFORE token $j
# WS_OPTIONAL= 0 optional space or $j is a whitespace
# WS_YES = 1 want a space BEFORE token $j
#
my $self = shift;
my $rLL = $self->[_rLL_];
use constant DEBUG_WHITE => 0;
my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
my $rwhitespace_flags = [];
my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
my ( $token, $type, $block_type, $seqno, $input_line_no );
my (
$last_token, $last_type, $last_block_type,
$last_seqno, $last_input_line_no
);
my $j_tight_closing_paren = -1;
$token = ' ';
$type = 'b';
$block_type = '';
$seqno = '';
$input_line_no = 0;
$last_token = ' ';
$last_type = 'b';
$last_block_type = '';
$last_seqno = '';
$last_input_line_no = 0;
my $jmax = @{$rLL} - 1;
my ($ws);
# This is some logic moved to a sub to avoid deep nesting of if stmts
my $ws_in_container = sub {
my ($j) = @_;
my $ws = WS_YES;
if ( $j + 1 > $jmax ) { return (WS_NO) }
# Patch to count '-foo' as single token so that
# each of $a{-foo} and $a{foo} and $a{'foo'} do
# not get spaces with default formatting.
my $j_here = $j;
++$j_here
if ( $token eq '-'
&& $last_token eq '{'
&& $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
# Patch to count a sign separated from a number as a single token, as
# in the following line. Otherwise, it takes two steps to converge:
# deg2rad(- 0.5)
if ( ( $type eq 'm' || $type eq 'p' )
&& $j < $jmax + 1
&& $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
&& $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
&& $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
{
$j_here = $j + 2;
}
# $j_next is where a closing token should be if
# the container has a single token
if ( $j_here + 1 > $jmax ) { return (WS_NO) }
my $j_next =
( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
? $j_here + 2
: $j_here + 1;
if ( $j_next > $jmax ) { return WS_NO }
my $tok_next = $rLL->[$j_next]->[_TOKEN_];
my $type_next = $rLL->[$j_next]->[_TYPE_];
# for tightness = 1, if there is just one token
# within the matching pair, we will keep it tight
if (
$tok_next eq $matching_token{$last_token}
# but watch out for this: [ [ ] (misc.t)
&& $last_token ne $token
# double diamond is usually spaced
&& $token ne '<<>>'
)
{
# remember where to put the space for the closing paren
$j_tight_closing_paren = $j_next;
return (WS_NO);
}
return (WS_YES);
};
# Local hashes to set spaces around container tokens according to their
# sequence numbers. These are set as keywords are examined.
# They are controlled by the -kpit and -kpitl flags.
my %opening_container_inside_ws;
my %closing_container_inside_ws;
my $set_container_ws_by_keyword = sub {
return unless (%keyword_paren_inner_tightness);
my ( $word, $sequence_number ) = @_;
# We just saw a keyword (or other function name) followed by an opening
# paren. Now check to see if the following paren should have special
# treatment for its inside space. If so we set a hash value using the
# sequence number as key.
if ( $word && $sequence_number ) {
my $tightness = $keyword_paren_inner_tightness{$word};
if ( defined($tightness) && $tightness != 1 ) {
my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
$opening_container_inside_ws{$sequence_number} = $ws_flag;
$closing_container_inside_ws{$sequence_number} = $ws_flag;
}
}
};
my $ws_opening_container_override = sub {
my ( $ws, $sequence_number ) = @_;
return $ws unless (%opening_container_inside_ws);
if ($sequence_number) {
my $ws_override = $opening_container_inside_ws{$sequence_number};
if ($ws_override) { $ws = $ws_override }
}
return $ws;
};
my $ws_closing_container_override = sub {
my ( $ws, $sequence_number ) = @_;
return $ws unless (%closing_container_inside_ws);
if ($sequence_number) {
my $ws_override = $closing_container_inside_ws{$sequence_number};
if ($ws_override) { $ws = $ws_override }
}
return $ws;
};
# main loop over all tokens to define the whitespace flags
for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
my $rtokh = $rLL->[$j];
# Set a default
$rwhitespace_flags->[$j] = WS_OPTIONAL;
if ( $rtokh->[_TYPE_] eq 'b' ) {
next;
}
# set a default value, to be changed as needed
$ws = undef;
$last_token = $token;
$last_type = $type;
$last_block_type = $block_type;
$last_seqno = $seqno;
$last_input_line_no = $input_line_no;
$token = $rtokh->[_TOKEN_];
$type = $rtokh->[_TYPE_];
$block_type = $rtokh->[_BLOCK_TYPE_];
$seqno = $rtokh->[_TYPE_SEQUENCE_];
$input_line_no = $rtokh->[_LINE_INDEX_];
#---------------------------------------------------------------
# Whitespace Rules Section 1:
# Handle space on the inside of opening braces.
#---------------------------------------------------------------
# /^[L\{\(\[]$/
if ( $is_opening_type{$last_type} ) {
$j_tight_closing_paren = -1;
# let us keep empty matched braces together: () {} []
# except for BLOCKS
if ( $token eq $matching_token{$last_token} ) {
if ($block_type) {
$ws = WS_YES;
}
else {
$ws = WS_NO;
}
}
else {
# we're considering the right of an opening brace
# tightness = 0 means always pad inside with space
# tightness = 1 means pad inside if "complex"
# tightness = 2 means never pad inside with space
my $tightness;
if ( $last_type eq '{'
&& $last_token eq '{'
&& $last_block_type )
{
$tightness = $rOpts_block_brace_tightness;
}
else { $tightness = $tightness{$last_token} }
#=============================================================
# Patch for test problem <<snippets/fabrice_bug.in>>
# We must always avoid spaces around a bare word beginning
# with ^ as in:
# my $before = ${^PREMATCH};
# Because all of the following cause an error in perl:
# my $before = ${ ^PREMATCH };
# my $before = ${ ^PREMATCH};
# my $before = ${^PREMATCH };
# So if brace tightness flag is -bt=0 we must temporarily reset
# to bt=1. Note that here we must set tightness=1 and not 2 so
# that the closing space
# is also avoided (via the $j_tight_closing_paren flag in coding)
if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
#=============================================================
if ( $tightness <= 0 ) {
$ws = WS_YES;
}
elsif ( $tightness > 1 ) {
$ws = WS_NO;
}
else {
$ws = $ws_in_container->($j);
}
}
# check for special cases which override the above rules
$ws = $ws_opening_container_override->( $ws, $last_seqno );
} # end setting space flag inside opening tokens
my $ws_1;
$ws_1 = $ws
if DEBUG_WHITE;
#---------------------------------------------------------------
# Whitespace Rules Section 2:
# Handle space on inside of closing brace pairs.
#---------------------------------------------------------------
# /[\}\)\]R]/
if ( $is_closing_type{$type} ) {
if ( $j == $j_tight_closing_paren ) {
$j_tight_closing_paren = -1;
$ws = WS_NO;
}
else {
if ( !defined($ws) ) {
my $tightness;
if ( $type eq '}' && $token eq '}' && $block_type ) {
$tightness = $rOpts_block_brace_tightness;
}
else { $tightness = $tightness{$token} }
$ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
}
}
# check for special cases which override the above rules
$ws = $ws_closing_container_override->( $ws, $seqno );
} # end setting space flag inside closing tokens
my $ws_2;
$ws_2 = $ws
if DEBUG_WHITE;
#---------------------------------------------------------------
# Whitespace Rules Section 3:
# Use the binary rule table.
#---------------------------------------------------------------
if ( !defined($ws) ) {
$ws = $binary_ws_rules{$last_type}{$type};
}
my $ws_3;
$ws_3 = $ws
if DEBUG_WHITE;
#---------------------------------------------------------------
# Whitespace Rules Section 4:
# Handle some special cases.
#---------------------------------------------------------------
if ( $token eq '(' ) {
# This will have to be tweaked as tokenization changes.
# We usually want a space at '} (', for example:
# <<snippets/space1.in>>
# map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
#
# But not others:
# &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
# At present, the above & block is marked as type L/R so this case
# won't go through here.
if ( $last_type eq '}' ) { $ws = WS_YES }
# NOTE: some older versions of Perl had occasional problems if
# spaces are introduced between keywords or functions and opening
# parens. So the default is not to do this except is certain
# cases. The current Perl seems to tolerate spaces.
# Space between keyword and '('
elsif ( $last_type eq 'k' ) {
$ws = WS_NO
unless ( $rOpts_space_keyword_paren
|| $space_after_keyword{$last_token} );
# Set inside space flag if requested
$set_container_ws_by_keyword->( $last_token, $seqno );
}
# Space between function and '('
# -----------------------------------------------------
# 'w' and 'i' checks for something like:
# myfun( &myfun( ->myfun(
# -----------------------------------------------------
elsif (( $last_type =~ /^[wUG]$/ )
|| ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
{
$ws = WS_NO unless ($rOpts_space_function_paren);
$set_container_ws_by_keyword->( $last_token, $seqno );
}
# space between something like $i and ( in <<snippets/space2.in>>
# for $i ( 0 .. 20 ) {
# FIXME: eventually, type 'i' needs to be split into multiple
# token types so this can be a hardwired rule.
elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
$ws = WS_YES;
}
# allow constant function followed by '()' to retain no space
elsif ($last_type eq 'C'
&& $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
{
$ws = WS_NO;
}
}
# patch for SWITCH/CASE: make space at ']{' optional
# since the '{' might begin a case or when block
elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
$ws = WS_OPTIONAL;
}
# keep space between 'sub' and '{' for anonymous sub definition
if ( $type eq '{' ) {
if ( $last_token eq 'sub' ) {
$ws = WS_YES;
}
# this is needed to avoid no space in '){'
if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
# avoid any space before the brace or bracket in something like
# @opts{'a','b',...}
if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
$ws = WS_NO;
}
}
elsif ( $type eq 'i' ) {
# never a space before ->
if ( substr( $token, 0, 2 ) eq '->' ) {
$ws = WS_NO;
}
}
# retain any space between '-' and bare word
elsif ( $type eq 'w' || $type eq 'C' ) {
$ws = WS_OPTIONAL if $last_type eq '-';
# never a space before ->
if ( substr( $token, 0, 2 ) eq '->' ) {
$ws = WS_NO;
}
}
# retain any space between '-' and bare word; for example
# avoid space between 'USER' and '-' here: <<snippets/space2.in>>
# $myhash{USER-NAME}='steve';
elsif ( $type eq 'm' || $type eq '-' ) {
$ws = WS_OPTIONAL if ( $last_type eq 'w' );
}
# always space before side comment
elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
# always preserver whatever space was used after a possible
# filehandle (except _) or here doc operator
if (
$type ne '#'
&& ( ( $last_type eq 'Z' && $last_token ne '_' )
|| $last_type eq 'h' )
)
{
$ws = WS_OPTIONAL;
}
# space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
# allow a space between a backslash and single or double quote
# to avoid fooling html formatters
elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
if ($rOpts_space_backslash_quote) {
if ( $rOpts_space_backslash_quote == 1 ) {
$ws = WS_OPTIONAL;
}
elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
else { } # shouldnt happen
}
else {
$ws = WS_NO;
}
}
elsif ( $type eq 'k' ) {
# Keywords 'for', 'foreach' are special cases for -kpit since the
# opening paren does not always immediately follow the keyword. So
# we have to search forward for the paren in this case. I have
# limited the search to 10 tokens ahead, just in case somebody
# has a big file and no opening paren. This should be enough for
# all normal code.
if ( $is_for_foreach{$token}
&& %keyword_paren_inner_tightness
&& defined( $keyword_paren_inner_tightness{$token} )
&& $j < $jmax )
{
my $jp = $j;
for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
$jp++;
last if ( $jp > $jmax );
next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
my $seqno = $rLL->[$jp]->[_TYPE_SEQUENCE_];
$set_container_ws_by_keyword->( $token, $seqno );
last;
}
}
}
my $ws_4;
$ws_4 = $ws
if DEBUG_WHITE;
#---------------------------------------------------------------
# Whitespace Rules Section 5:
# Apply default rules not covered above.
#---------------------------------------------------------------
# If we fall through to here, look at the pre-defined hash tables for
# the two tokens, and:
# if (they are equal) use the common value
# if (either is zero or undef) use the other
# if (either is -1) use it
# That is,
# left vs right
# 1 vs 1 --> 1
# 0 vs 0 --> 0
# -1 vs -1 --> -1
#
# 0 vs -1 --> -1
# 0 vs 1 --> 1
# 1 vs 0 --> 1
# -1 vs 0 --> -1
#
# -1 vs 1 --> -1
# 1 vs -1 --> -1
if ( !defined($ws) ) {
my $wl = $want_left_space{$type};
my $wr = $want_right_space{$last_type};
if ( !defined($wl) ) { $wl = 0 }
if ( !defined($wr) ) { $wr = 0 }
$ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
}
if ( !defined($ws) ) {
$ws = 0;
write_diagnostics(
"WS flag is undefined for tokens $last_token $token\n");
}
# Treat newline as a whitespace. Otherwise, we might combine
# 'Send' and '-recipients' here according to the above rules:
# <<snippets/space3.in>>
# my $msg = new Fax::Send
# -recipients => $to,
# -data => $data;
if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
$rwhitespace_flags->[$j] = $ws;
DEBUG_WHITE && do {
my $str = substr( $last_token, 0, 15 );
$str .= ' ' x ( 16 - length($str) );
if ( !defined($ws_1) ) { $ws_1 = "*" }
if ( !defined($ws_2) ) { $ws_2 = "*" }
if ( !defined($ws_3) ) { $ws_3 = "*" }
if ( !defined($ws_4) ) { $ws_4 = "*" }
print STDOUT
"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
};
} ## end main loop
if ( $rOpts->{'tight-secret-operators'} ) {
new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
}
return $rwhitespace_flags;
} ## end sub set_whitespace_flags
sub dump_want_left_space {
my $fh = shift;
local $" = "\n";
$fh->print(<<EOM);
These values are the main control of whitespace to the left of a token type;
They may be altered with the -wls parameter.
For a list of token types, use perltidy --dump-token-types (-dtt)
1 means the token wants a space to its left
-1 means the token does not want a space to its left
------------------------------------------------------------------------
EOM
foreach my $key ( sort keys %want_left_space ) {
$fh->print("$key\t$want_left_space{$key}\n");
}
return;
}
sub dump_want_right_space {
my $fh = shift;
local $" = "\n";
$fh->print(<<EOM);
These values are the main control of whitespace to the right of a token type;
They may be altered with the -wrs parameter.
For a list of token types, use perltidy --dump-token-types (-dtt)
1 means the token wants a space to its right
-1 means the token does not want a space to its right
------------------------------------------------------------------------
EOM
foreach my $key ( sort keys %want_right_space ) {
$fh->print("$key\t$want_right_space{$key}\n");
}
return;
}
{ ## begin closure is_essential_whitespace
my %is_sort_grep_map;
my %is_for_foreach;
my %is_digraph;
my %is_trigraph;
my %essential_whitespace_filter_l1;
my %essential_whitespace_filter_r1;
my %essential_whitespace_filter_l2;
my %essential_whitespace_filter_r2;
BEGIN {
my @q;
@q = qw(sort grep map);
@is_sort_grep_map{@q} = (1) x scalar(@q);
@q = qw(for foreach);
@is_for_foreach{@q} = (1) x scalar(@q);
@q = qw(
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
<= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
);
@is_digraph{@q} = (1) x scalar(@q);
@q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
@is_trigraph{@q} = (1) x scalar(@q);
# These are used as a speedup filters for sub is_essential_whitespace.
# Filter 1:
# These left side token types USUALLY do not require a space:
@q = qw( ; { } [ ] L R );
push @q, ',';
push @q, ')';
push @q, '(';
@essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
# BUT some might if followed by these right token types
@q = qw( pp mm << <<= h );
@essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
# Filter 2:
# These right side filters usually do not require a space
@q = qw( ; ] R } );
push @q, ',';
push @q, ')';
@essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
# BUT some might if followed by these left token types
@q = qw( h Z );
@essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
}
sub is_essential_whitespace {
# Essential whitespace means whitespace which cannot be safely deleted
# without risking the introduction of a syntax error.
# We are given three tokens and their types:
# ($tokenl, $typel) is the token to the left of the space in question
# ($tokenr, $typer) is the token to the right of the space in question
# ($tokenll, $typell) is previous nonblank token to the left of $tokenl
#
# Note1: This routine should almost never need to be changed. It is
# for avoiding syntax problems rather than for formatting.
# Note2: The -mangle option causes large numbers of calls to this
# routine and therefore is a good test. So if a change is made, be sure
# to run a large number of files with the -mangle option and check for
# differences.
my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
# This is potentially a very slow routine but the following quick
# filters typically catch and handle over 90% of the calls.
# Filter 1: usually no space required after common types ; , [ ] { } ( )
return
if ( $essential_whitespace_filter_l1{$typel}
&& !$essential_whitespace_filter_r1{$typer} );
# Filter 2: usually no space before common types ; ,
return
if ( $essential_whitespace_filter_r2{$typer}
&& !$essential_whitespace_filter_l2{$typel} );
# Filter 3: Handle side comments: a space is only essential if the left
# token ends in '$' For example, we do not want to create $#foo below:
# sub t086
# ( #foo)))
# $ #foo)))
# a #foo)))
# ) #foo)))
# { ... }
# Also, I prefer not to put a ? and # together because ? used to be
# a pattern delmiter and spacing was used if guessing was needed.
if ( $typer eq '#' ) {
return 1
if ( $tokenl
&& ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
return;
}
my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
my $tokenr_is_open_paren = $tokenr eq '(';
my $token_joined = $tokenl . $tokenr;
my $tokenl_is_dash = $tokenl eq '-';
my $result =
# never combine two bare words or numbers
# examples: and ::ok(1)
# return ::spw(...)
# for bla::bla:: abc
# example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
# $input eq"quit" to make $inputeq"quit"
# my $size=-s::SINK if $file; <==OK but we won't do it
# don't join something like: for bla::bla:: abc
# example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
&& ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
# do not combine a number with a concatenation dot
# example: pom.caputo:
# $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
|| $typel eq 'n' && $tokenr eq '.'
|| $typer eq 'n'
&& $tokenl eq '.'
# cases of a space before a bareword...
|| (
$tokenr_is_bareword && (
# do not join a minus with a bare word, because you might form
# a file test operator. Example from Complex.pm:
# if (CORE::abs($z - i) < $eps);
# "z-i" would be taken as a file test.
$tokenl_is_dash && length($tokenr) == 1
# and something like this could become ambiguous without space
# after the '-':
# use constant III=>1;
# $a = $b - III;
# and even this:
# $a = - III;
|| $tokenl_is_dash && $typer =~ /^[wC]$/
# keep a space between a quote and a bareword to prevent the
# bareword from becoming a quote modifier.
|| $typel eq 'Q'
# keep a space between a token ending in '$' and any word;
# this caused trouble: "die @$ if $@"
|| $typel eq 'i' && $tokenl =~ /\$$/
# do not remove space between an '&' and a bare word because
# it may turn into a function evaluation, like here
# between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
# $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
|| $typel eq '&'
# don't combine $$ or $# with any alphanumeric
# (testfile mangle.t with --mangle)
|| $tokenl =~ /^\$[\$\#]$/
)
) ## end $tokenr_is_bareword
# OLD, not used
# '= -' should not become =- or you will get a warning
# about reversed -=
# || ($tokenr eq '-')
# do not join a bare word with a minus, like between 'Send' and
# '-recipients' here <<snippets/space3.in>>
# my $msg = new Fax::Send
# -recipients => $to,
# -data => $data;
# This is the safest thing to do. If we had the token to the right of
# the minus we could do a better check.
#
# And do not combine a bareword and a quote, like this:
# oops "Your login, $Bad_Login, is not valid";
# It can cause a syntax error if oops is a sub
|| $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
# perl is very fussy about spaces before <<
|| $tokenr =~ /^\<\</
# avoid combining tokens to create new meanings. Example:
# $a+ +$b must not become $a++$b
|| ( $is_digraph{$token_joined} )
|| $is_trigraph{$token_joined}
# another example: do not combine these two &'s:
# allow_options & &OPT_EXECCGI
|| $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
# retain any space after possible filehandle
# (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
|| $typel eq 'Z'
# Perl is sensitive to whitespace after the + here:
# $b = xvals $a + 0.1 * yvals $a;
|| $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
|| (
$tokenr_is_open_paren && (
# keep paren separate in 'use Foo::Bar ()'
( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
# keep any space between filehandle and paren:
# file mangle.t with --mangle:
|| $typel eq 'Y'
# must have space between grep and left paren; "grep(" will fail
|| $is_sort_grep_map{$tokenl}
# don't stick numbers next to left parens, as in:
#use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
|| $typel eq 'n'
)
) ## end $tokenr_is_open_paren
# retain any space after here doc operator ( hereerr.t)
|| $typel eq 'h'
# be careful with a space around ++ and --, to avoid ambiguity as to
# which token it applies
|| $typer =~ /^(pp|mm)$/ && $tokenl !~ /^[\;\{\(\[]/
|| $typel =~ /^(\+\+|\-\-)$/
&& $tokenr !~ /^[\;\}\)\]]/
# need space after foreach my; for example, this will fail in
# older versions of Perl:
# foreach my$ft(@filetypes)...
|| (
$tokenl eq 'my'
# /^(for|foreach)$/
&& $is_for_foreach{$tokenll}
&& $tokenr =~ /^\$/
)
# We must be sure that a space between a ? and a quoted string
# remains if the space before the ? remains. [Loca.pm, lockarea]
# ie,
# $b=join $comma ? ',' : ':', @_; # ok
# $b=join $comma?',' : ':', @_; # ok!
# $b=join $comma ?',' : ':', @_; # error!
# Not really required:
## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
# space stacked labels (TODO: check if really necessary)
|| $typel eq 'J' && $typer eq 'J'
; # the value of this long logic sequence is the result we want
return $result;
}
} ## end closure is_essential_whitespace
{ ## begin closure new_secret_operator_whitespace
my %secret_operators;
my %is_leading_secret_token;
BEGIN {
# token lists for perl secret operators as compiled by Philippe Bruhat
# at: https://metacpan.org/module/perlsecret
%secret_operators = (
'Goatse' => [qw#= ( ) =#], #=( )=
'Venus1' => [qw#0 +#], # 0+
'Venus2' => [qw#+ 0#], # +0
'Enterprise' => [qw#) x ! !#], # ()x!!
'Kite1' => [qw#~ ~ <>#], # ~~<>
'Kite2' => [qw#~~ <>#], # ~~<>
'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
'Bang bang ' => [qw#! !#], # !!
);
# The following operators and constants are not included because they
# are normally kept tight by perltidy:
# ~~ <~>
#
# Make a lookup table indexed by the first token of each operator:
# first token => [list, list, ...]
foreach my $value ( values(%secret_operators) ) {
my $tok = $value->[0];
push @{ $is_leading_secret_token{$tok} }, $value;
}
}
sub new_secret_operator_whitespace {
my ( $rlong_array, $rwhitespace_flags ) = @_;
# Loop over all tokens in this line
my ( $token, $type );
my $jmax = @{$rlong_array} - 1;
foreach my $j ( 0 .. $jmax ) {
$token = $rlong_array->[$j]->[_TOKEN_];
$type = $rlong_array->[$j]->[_TYPE_];
# Skip unless this token might start a secret operator
next if ( $type eq 'b' );
next unless ( $is_leading_secret_token{$token} );
# Loop over all secret operators with this leading token
foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
my $jend = $j - 1;
foreach my $tok ( @{$rpattern} ) {
$jend++;
$jend++
if ( $jend <= $jmax
&& $rlong_array->[$jend]->[_TYPE_] eq 'b' );
if ( $jend > $jmax
|| $tok ne $rlong_array->[$jend]->[_TOKEN_] )
{
$jend = undef;
last;
}
}
if ($jend) {
# set flags to prevent spaces within this operator
foreach my $jj ( $j + 1 .. $jend ) {
$rwhitespace_flags->[$jj] = WS_NO;
}
$j = $jend;
last;
}
} ## End Loop over all operators
} ## End loop over all tokens
return;
} # End sub
} ## end closure new_secret_operator_whitespace
{ ## begin closure set_bond_strengths
# These routines and variables are involved in deciding where to break very
# long lines.
my %is_good_keyword_breakpoint;
my %is_lt_gt_le_ge;
my %is_container_token;
my %binary_bond_strength_nospace;
my %binary_bond_strength;
my %nobreak_lhs;
my %nobreak_rhs;
my @bias_tokens;
my %bias_hash;
my %bias;
my $delta_bias;
sub initialize_bond_strength_hashes {
my @q;
@q = qw(if unless while until for foreach);
@is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
@q = qw(lt gt le ge);
@is_lt_gt_le_ge{@q} = (1) x scalar(@q);
@q = qw/ ( [ { } ] ) /;
@is_container_token{@q} = (1) x scalar(@q);
# The decision about where to break a line depends upon a "bond
# strength" between tokens. The LOWER the bond strength, the MORE
# likely a break. A bond strength may be any value but to simplify
# things there are several pre-defined strength levels:
# NO_BREAK => 10000;
# VERY_STRONG => 100;
# STRONG => 2.1;
# NOMINAL => 1.1;
# WEAK => 0.8;
# VERY_WEAK => 0.55;
# The strength values are based on trial-and-error, and need to be
# tweaked occasionally to get desired results. Some comments:
#
# 1. Only relative strengths are important. small differences
# in strengths can make big formatting differences.
# 2. Each indentation level adds one unit of bond strength.
# 3. A value of NO_BREAK makes an unbreakable bond
# 4. A value of VERY_WEAK is the strength of a ','
# 5. Values below NOMINAL are considered ok break points.
# 6. Values above NOMINAL are considered poor break points.
#
# The bond strengths should roughly follow precedence order where
# possible. If you make changes, please check the results very
# carefully on a variety of scripts. Testing with the -extrude
# options is particularly helpful in exercising all of the rules.
# Wherever possible, bond strengths are defined in the following
# tables. There are two main stages to setting bond strengths and
# two types of tables:
#
# The first stage involves looking at each token individually and
# defining left and right bond strengths, according to if we want
# to break to the left or right side, and how good a break point it
# is. For example tokens like =, ||, && make good break points and
# will have low strengths, but one might want to break on either
# side to put them at the end of one line or beginning of the next.
#
# The second stage involves looking at certain pairs of tokens and
# defining a bond strength for that particular pair. This second
# stage has priority.
#---------------------------------------------------------------
# Bond Strength BEGIN Section 1.
# Set left and right bond strengths of individual tokens.
#---------------------------------------------------------------
# NOTE: NO_BREAK's set in this section first are HINTS which will
# probably not be honored. Essential NO_BREAKS's should be set in
# BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
# of this subroutine.
# Note that we are setting defaults in this section. The user
# cannot change bond strengths but can cause the left and right
# bond strengths of any token type to be swapped through the use of
# the -wba and -wbb flags. In this way the user can determine if a
# breakpoint token should appear at the end of one line or the
# beginning of the next line.
%right_bond_strength = ();
%left_bond_strength = ();
%binary_bond_strength_nospace = ();
%binary_bond_strength = ();
%nobreak_lhs = ();
%nobreak_rhs = ();
# The hash keys in this section are token types, plus the text of
# certain keywords like 'or', 'and'.
# no break around possible filehandle
$left_bond_strength{'Z'} = NO_BREAK;
$right_bond_strength{'Z'} = NO_BREAK;
# never put a bare word on a new line:
# example print (STDERR, "bla"); will fail with break after (
$left_bond_strength{'w'} = NO_BREAK;
# blanks always have infinite strength to force breaks after
# real tokens
$right_bond_strength{'b'} = NO_BREAK;
# try not to break on exponentation
@q = qw# ** .. ... <=> #;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} = (STRONG) x scalar(@q);
# The comma-arrow has very low precedence but not a good break point
$left_bond_strength{'=>'} = NO_BREAK;
$right_bond_strength{'=>'} = NOMINAL;
# ok to break after label
$left_bond_strength{'J'} = NO_BREAK;
$right_bond_strength{'J'} = NOMINAL;
$left_bond_strength{'j'} = STRONG;
$right_bond_strength{'j'} = STRONG;
$left_bond_strength{'A'} = STRONG;
$right_bond_strength{'A'} = STRONG;
$left_bond_strength{'->'} = STRONG;
$right_bond_strength{'->'} = VERY_STRONG;
$left_bond_strength{'CORE::'} = NOMINAL;
$right_bond_strength{'CORE::'} = NO_BREAK;
# breaking AFTER modulus operator is ok:
@q = qw< % >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
# Break AFTER math operators * and /
@q = qw< * / x >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} = (NOMINAL) x scalar(@q);
# Break AFTER weakest math operators + and -
# Make them weaker than * but a bit stronger than '.'
@q = qw< + - >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
# breaking BEFORE these is just ok:
@q = qw# >> << #;
@right_bond_strength{@q} = (STRONG) x scalar(@q);
@left_bond_strength{@q} = (NOMINAL) x scalar(@q);
# breaking before the string concatenation operator seems best
# because it can be hard to see at the end of a line
$right_bond_strength{'.'} = STRONG;
$left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
@q = qw< } ] ) R >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} = (NOMINAL) x scalar(@q);
# make these a little weaker than nominal so that they get
# favored for end-of-line characters
@q = qw< != == =~ !~ ~~ !~~ >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
# break AFTER these
@q = qw# < > | & >= <= #;
@left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
# breaking either before or after a quote is ok
# but bias for breaking before a quote
$left_bond_strength{'Q'} = NOMINAL;
$right_bond_strength{'Q'} = NOMINAL + 0.02;
$left_bond_strength{'q'} = NOMINAL;
$right_bond_strength{'q'} = NOMINAL;
# starting a line with a keyword is usually ok
$left_bond_strength{'k'} = NOMINAL;
# we usually want to bond a keyword strongly to what immediately
# follows, rather than leaving it stranded at the end of a line
$right_bond_strength{'k'} = STRONG;
$left_bond_strength{'G'} = NOMINAL;
$right_bond_strength{'G'} = STRONG;
# assignment operators
@q = qw(
= **= += *= &= <<= &&=
-= /= |= >>= ||= //=
.= %= ^=
x=
);
# Default is to break AFTER various assignment operators
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
# Default is to break BEFORE '&&' and '||' and '//'
# set strength of '||' to same as '=' so that chains like
# $a = $b || $c || $d will break before the first '||'
$right_bond_strength{'||'} = NOMINAL;
$left_bond_strength{'||'} = $right_bond_strength{'='};
# same thing for '//'
$right_bond_strength{'//'} = NOMINAL;
$left_bond_strength{'//'} = $right_bond_strength{'='};
# set strength of && a little higher than ||
$right_bond_strength{'&&'} = NOMINAL;
$left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
$left_bond_strength{';'} = VERY_STRONG;
$right_bond_strength{';'} = VERY_WEAK;
$left_bond_strength{'f'} = VERY_STRONG;
# make right strength of for ';' a little less than '='
# to make for contents break after the ';' to avoid this:
# for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
# $number_of_fields )
# and make it weaker than ',' and 'and' too
$right_bond_strength{'f'} = VERY_WEAK - 0.03;
# The strengths of ?/: should be somewhere between
# an '=' and a quote (NOMINAL),
# make strength of ':' slightly less than '?' to help
# break long chains of ? : after the colons
$left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
$right_bond_strength{':'} = NO_BREAK;
$left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
$right_bond_strength{'?'} = NO_BREAK;
$left_bond_strength{','} = VERY_STRONG;
$right_bond_strength{','} = VERY_WEAK;
# remaining digraphs and trigraphs not defined above
@q = qw( :: <> ++ --);
@left_bond_strength{@q} = (WEAK) x scalar(@q);
@right_bond_strength{@q} = (STRONG) x scalar(@q);
# Set bond strengths of certain keywords
# make 'or', 'err', 'and' slightly weaker than a ','
$left_bond_strength{'and'} = VERY_WEAK - 0.01;
$left_bond_strength{'or'} = VERY_WEAK - 0.02;
$left_bond_strength{'err'} = VERY_WEAK - 0.02;
$left_bond_strength{'xor'} = VERY_WEAK - 0.01;
$right_bond_strength{'and'} = NOMINAL;
$right_bond_strength{'or'} = NOMINAL;
$right_bond_strength{'err'} = NOMINAL;
$right_bond_strength{'xor'} = NOMINAL;
#---------------------------------------------------------------
# Bond Strength BEGIN Section 2.
# Set binary rules for bond strengths between certain token types.
#---------------------------------------------------------------
# We have a little problem making tables which apply to the
# container tokens. Here is a list of container tokens and
# their types:
#
# type tokens // meaning
# { {, [, ( // indent
# } }, ], ) // outdent
# [ [ // left non-structural [ (enclosing an array index)
# ] ] // right non-structural square bracket
# ( ( // left non-structural paren
# ) ) // right non-structural paren
# L { // left non-structural curly brace (enclosing a key)
# R } // right non-structural curly brace
#
# Some rules apply to token types and some to just the token
# itself. We solve the problem by combining type and token into a
# new hash key for the container types.
#
# If a rule applies to a token 'type' then we need to make rules
# for each of these 'type.token' combinations:
# Type Type.Token
# { {{, {[, {(
# [ [[
# ( ((
# L L{
# } }}, }], })
# ] ]]
# ) ))
# R R}
#
# If a rule applies to a token then we need to make rules for
# these 'type.token' combinations:
# Token Type.Token
# { {{, L{
# [ {[, [[
# ( {(, ((
# } }}, R}
# ] }], ]]
# ) }), ))
# allow long lines before final { in an if statement, as in:
# if (..........
# ..........)
# {
#
# Otherwise, the line before the { tends to be too short.
$binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
$binary_bond_strength{'(('}{'{{'} = NOMINAL;
# break on something like '} (', but keep this stronger than a ','
# example is in 'howe.pl'
$binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
$binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
# keep matrix and hash indices together
# but make them a little below STRONG to allow breaking open
# something like {'some-word'}{'some-very-long-word'} at the }{
# (bracebrk.t)
$binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
$binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
$binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
$binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
# increase strength to the point where a break in the following
# will be after the opening paren rather than at the arrow:
# $a->$b($c);
$binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
# Note that the following alternative strength would make the break at the
# '->' rather than opening the '('. Both have advantages and disadvantages.
# $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
$binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
$binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
$binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
$binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
#---------------------------------------------------------------
# Binary NO_BREAK rules
#---------------------------------------------------------------
# use strict requires that bare word and => not be separated
$binary_bond_strength{'C'}{'=>'} = NO_BREAK;
$binary_bond_strength{'U'}{'=>'} = NO_BREAK;
# Never break between a bareword and a following paren because
# perl may give an error. For example, if a break is placed
# between 'to_filehandle' and its '(' the following line will
# give a syntax error [Carp.pm]: my( $no) =fileno(
# to_filehandle( $in)) ;
$binary_bond_strength{'C'}{'(('} = NO_BREAK;
$binary_bond_strength{'C'}{'{('} = NO_BREAK;
$binary_bond_strength{'U'}{'(('} = NO_BREAK;
$binary_bond_strength{'U'}{'{('} = NO_BREAK;
# use strict requires that bare word within braces not start new
# line
$binary_bond_strength{'L{'}{'w'} = NO_BREAK;
$binary_bond_strength{'w'}{'R}'} = NO_BREAK;
# The following two rules prevent a syntax error caused by breaking up
# a construction like '{-y}'. The '-' quotes the 'y' and prevents
# it from being taken as a transliteration. We have to keep
# token types 'L m w' together to prevent this error.
$binary_bond_strength{'L{'}{'m'} = NO_BREAK;
$binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
# keep 'bareword-' together, but only if there is no space between
# the word and dash. Do not keep together if there is a space.
# example 'use perl6-alpha'
$binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
# use strict requires that bare word and => not be separated
$binary_bond_strength{'w'}{'=>'} = NO_BREAK;
# use strict does not allow separating type info from trailing { }
# testfile is readmail.pl
$binary_bond_strength{'t'}{'L{'} = NO_BREAK;
$binary_bond_strength{'i'}{'L{'} = NO_BREAK;
# As a defensive measure, do not break between a '(' and a
# filehandle. In some cases, this can cause an error. For
# example, the following program works:
# my $msg="hi!\n";
# print
# ( STDOUT
# $msg
# );
#
# But this program fails:
# my $msg="hi!\n";
# print
# (
# STDOUT
# $msg
# );
#
# This is normally only a problem with the 'extrude' option
$binary_bond_strength{'(('}{'Y'} = NO_BREAK;
$binary_bond_strength{'{('}{'Y'} = NO_BREAK;
# never break between sub name and opening paren
$binary_bond_strength{'w'}{'(('} = NO_BREAK;
$binary_bond_strength{'w'}{'{('} = NO_BREAK;
# keep '}' together with ';'
$binary_bond_strength{'}}'}{';'} = NO_BREAK;
# Breaking before a ++ can cause perl to guess wrong. For
# example the following line will cause a syntax error
# with -extrude if we break between '$i' and '++' [fixstyle2]
# print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
$nobreak_lhs{'++'} = NO_BREAK;
# Do not break before a possible file handle
$nobreak_lhs{'Z'} = NO_BREAK;
# use strict hates bare words on any new line. For
# example, a break before the underscore here provokes the
# wrath of use strict:
# if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
$nobreak_rhs{'F'} = NO_BREAK;
$nobreak_rhs{'CORE::'} = NO_BREAK;
#---------------------------------------------------------------
# Bond Strength BEGIN Section 3.
# Define tables and values for applying a small bias to the above
# values.
#---------------------------------------------------------------
# Adding a small 'bias' to strengths is a simple way to make a line
# break at the first of a sequence of identical terms. For
# example, to force long string of conditional operators to break
# with each line ending in a ':', we can add a small number to the
# bond strength of each ':' (colon.t)
@bias_tokens = qw( : && || f and or . ); # tokens which get bias
%bias_hash = map { $_ => 0 } @bias_tokens;
$delta_bias = 0.0001; # a very small strength level
return;
} ## end sub initialize_bond_strength_hashes
use constant DEBUG_BOND => 0;
sub set_bond_strengths {
my ($self) = @_;
# patch-its always ok to break at end of line
$nobreak_to_go[$max_index_to_go] = 0;
my $rOpts_short_concatenation_item_length =
$rOpts->{'short-concatenation-item-length'};
# we start a new set of bias values for each line
%bias = %bias_hash;
my $code_bias = -.01; # bias for closing block braces
my $type = 'b';
my $token = ' ';
my $token_length = 1;
my $last_type;
my $last_nonblank_type = $type;
my $last_nonblank_token = $token;
my $list_str = $left_bond_strength{'?'};
my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
$next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
);
# main loop to compute bond strengths between each pair of tokens
foreach my $i ( 0 .. $max_index_to_go ) {
$last_type = $type;
if ( $type ne 'b' ) {
$last_nonblank_type = $type;
$last_nonblank_token = $token;
}
$type = $types_to_go[$i];
# strength on both sides of a blank is the same
if ( $type eq 'b' && $last_type ne 'b' ) {
$bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
next;
}
$token = $tokens_to_go[$i];
$token_length = $token_lengths_to_go[$i];
$block_type = $block_type_to_go[$i];
$i_next = $i + 1;
$next_type = $types_to_go[$i_next];
$next_token = $tokens_to_go[$i_next];
$total_nesting_depth = $nesting_depth_to_go[$i_next];
$i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
my $seqno = $type_sequence_to_go[$i];
my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
# We are computing the strength of the bond between the current
# token and the NEXT token.
#---------------------------------------------------------------
# Bond Strength Section 1:
# First Approximation.
# Use minimum of individual left and right tabulated bond
# strengths.
#---------------------------------------------------------------
my $bsr = $right_bond_strength{$type};
my $bsl = $left_bond_strength{$next_nonblank_type};
# define right bond strengths of certain keywords
if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
$bsr = $right_bond_strength{$token};
}
elsif ( $token eq 'ne' or $token eq 'eq' ) {
$bsr = NOMINAL;
}
# set terminal bond strength to the nominal value
# this will cause good preceding breaks to be retained
if ( $i_next_nonblank > $max_index_to_go ) {
$bsl = NOMINAL;
}
# define right bond strengths of certain keywords
if ( $next_nonblank_type eq 'k'
&& defined( $left_bond_strength{$next_nonblank_token} ) )
{
$bsl = $left_bond_strength{$next_nonblank_token};
}
elsif ($next_nonblank_token eq 'ne'
or $next_nonblank_token eq 'eq' )
{
$bsl = NOMINAL;
}
elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
$bsl = 0.9 * NOMINAL + 0.1 * STRONG;
}
# Use the minimum of the left and right strengths. Note: it might
# seem that we would want to keep a NO_BREAK if either token has
# this value. This didn't work, for example because in an arrow
# list, it prevents the comma from separating from the following
# bare word (which is probably quoted by its arrow). So necessary
# NO_BREAK's have to be handled as special cases in the final
# section.
if ( !defined($bsr) ) { $bsr = VERY_STRONG }
if ( !defined($bsl) ) { $bsl = VERY_STRONG }
my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
my $bond_str_1 = $bond_str;
#---------------------------------------------------------------
# Bond Strength Section 2:
# Apply hardwired rules..
#---------------------------------------------------------------
# Patch to put terminal or clauses on a new line: Weaken the bond
# at an || followed by die or similar keyword to make the terminal
# or clause fall on a new line, like this:
#
# my $class = shift
# || die "Cannot add broadcast: No class identifier found";
#
# Otherwise the break will be at the previous '=' since the || and
# = have the same starting strength and the or is biased, like
# this:
#
# my $class =
# shift || die "Cannot add broadcast: No class identifier found";
#
# In any case if the user places a break at either the = or the ||
# it should remain there.
if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
if ( $want_break_before{$token} && $i > 0 ) {
$bond_strength_to_go[ $i - 1 ] -= $delta_bias;
# keep bond strength of a token and its following blank
# the same
if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
$bond_strength_to_go[ $i - 2 ] -= $delta_bias;
}
}
else {
$bond_str -= $delta_bias;
}
}
}
# good to break after end of code blocks
if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
$bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
$code_bias += $delta_bias;
}
if ( $type eq 'k' ) {
# allow certain control keywords to stand out
if ( $next_nonblank_type eq 'k'
&& $is_last_next_redo_return{$token} )
{
$bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
}
# Don't break after keyword my. This is a quick fix for a
# rare problem with perl. An example is this line from file
# Container.pm:
# foreach my $question( Debian::DebConf::ConfigDb::gettree(
# $this->{'question'} ) )
if ( $token eq 'my' ) {
$bond_str = NO_BREAK;
}
}
# good to break before 'if', 'unless', etc
if ( $is_if_brace_follower{$next_nonblank_token} ) {
$bond_str = VERY_WEAK;
}
if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
# FIXME: needs more testing
if ( $is_keyword_returning_list{$next_nonblank_token} ) {
$bond_str = $list_str if ( $bond_str > $list_str );
}
# keywords like 'unless', 'if', etc, within statements
# make good breaks
if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
$bond_str = VERY_WEAK / 1.05;
}
}
# try not to break before a comma-arrow
elsif ( $next_nonblank_type eq '=>' ) {
if ( $bond_str < STRONG ) { $bond_str = STRONG }
}
#---------------------------------------------------------------
# Additional hardwired NOBREAK rules
#---------------------------------------------------------------
# map1.t -- correct for a quirk in perl
if ( $token eq '('
&& $next_nonblank_type eq 'i'
&& $last_nonblank_type eq 'k'
&& $is_sort_map_grep{$last_nonblank_token} )
# /^(sort|map|grep)$/ )
{
$bond_str = NO_BREAK;
}
# extrude.t: do not break before paren at:
# -l pid_filename(
if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
$bond_str = NO_BREAK;
}
# in older version of perl, use strict can cause problems with
# breaks before bare words following opening parens. For example,
# this will fail under older versions if a break is made between
# '(' and 'MAIL': use strict; open( MAIL, "a long filename or
# command"); close MAIL;
if ( $type eq '{' ) {
if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
# but it's fine to break if the word is followed by a '=>'
# or if it is obviously a sub call
my $i_next_next_nonblank = $i_next_nonblank + 1;
my $next_next_type = $types_to_go[$i_next_next_nonblank];
if ( $next_next_type eq 'b'
&& $i_next_nonblank < $max_index_to_go )
{
$i_next_next_nonblank++;
$next_next_type = $types_to_go[$i_next_next_nonblank];
}
# We'll check for an old breakpoint and keep a leading
# bareword if it was that way in the input file.
# Presumably it was ok that way. For example, the
# following would remain unchanged:
#
# @months = (
# January, February, March, April,
# May, June, July, August,
# September, October, November, December,
# );
#
# This should be sufficient:
if (
!$old_breakpoint_to_go[$i]
&& ( $next_next_type eq ','
|| $next_next_type eq '}' )
)
{
$bond_str = NO_BREAK;
}
}
}
# Do not break between a possible filehandle and a ? or / and do
# not introduce a break after it if there is no blank
# (extrude.t)
elsif ( $type eq 'Z' ) {
# don't break..
if (
# if there is no blank and we do not want one. Examples:
# print $x++ # do not break after $x
# print HTML"HELLO" # break ok after HTML
(
$next_type ne 'b'
&& defined( $want_left_space{$next_type} )
&& $want_left_space{$next_type} == WS_NO
)
# or we might be followed by the start of a quote
|| $next_nonblank_type =~ /^[\/\?]$/
)
{
$bond_str = NO_BREAK;
}
}
# Breaking before a ? before a quote can cause trouble if
# they are not separated by a blank.
# Example: a syntax error occurs if you break before the ? here
# my$logic=join$all?' && ':' || ',@regexps;
# From: Professional_Perl_Programming_Code/multifind.pl
if ( $next_nonblank_type eq '?' ) {
$bond_str = NO_BREAK
if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
}
# Breaking before a . followed by a number
# can cause trouble if there is no intervening space
# Example: a syntax error occurs if you break before the .2 here
# $str .= pack($endian.2, ensurrogate($ord));
# From: perl58/Unicode.pm
elsif ( $next_nonblank_type eq '.' ) {
$bond_str = NO_BREAK
if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
}
my $bond_str_2 = $bond_str;
#---------------------------------------------------------------
# End of hardwired rules
#---------------------------------------------------------------
#---------------------------------------------------------------
# Bond Strength Section 3:
# Apply table rules. These have priority over the above
# hardwired rules.
#---------------------------------------------------------------
my $tabulated_bond_str;
my $ltype = $type;
my $rtype = $next_nonblank_type;
if ( $seqno && $is_container_token{$token} ) {
$ltype = $type . $token;
}
if ( $next_nonblank_seqno
&& $is_container_token{$next_nonblank_token} )
{
$rtype = $next_nonblank_type . $next_nonblank_token;
}
# apply binary rules which apply regardless of space between tokens
if ( $binary_bond_strength{$ltype}{$rtype} ) {
$bond_str = $binary_bond_strength{$ltype}{$rtype};
$tabulated_bond_str = $bond_str;
}
# apply binary rules which apply only if no space between tokens
if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
$bond_str = $binary_bond_strength{$ltype}{$next_type};
$tabulated_bond_str = $bond_str;
}
if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
$bond_str = NO_BREAK;
$tabulated_bond_str = $bond_str;
}
my $bond_str_3 = $bond_str;
# If the hardwired rules conflict with the tabulated bond
# strength then there is an inconsistency that should be fixed
DEBUG_BOND
&& $tabulated_bond_str
&& $bond_str_1
&& $bond_str_1 != $bond_str_2
&& $bond_str_2 != $tabulated_bond_str
&& do {
print STDERR
"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
};
#-----------------------------------------------------------------
# Bond Strength Section 4:
# Modify strengths of certain tokens which often occur in sequence
# by adding a small bias to each one in turn so that the breaks
# occur from left to right.
#
# Note that we only changing strengths by small amounts here,
# and usually increasing, so we should not be altering any NO_BREAKs.
# Other routines which check for NO_BREAKs will use a tolerance
# of one to avoid any problem.
#-----------------------------------------------------------------
# The bias tables use special keys:
# $type - if not keyword
# $token - if keyword, but map some keywords together
my $left_key =
$type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
my $right_key =
$next_nonblank_type eq 'k'
? $next_nonblank_token eq 'err'
? 'or'
: $next_nonblank_token
: $next_nonblank_type;
# add any bias set by sub scan_list at old comma break points.
if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
# bias left token
elsif ( defined( $bias{$left_key} ) ) {
if ( !$want_break_before{$left_key} ) {
$bias{$left_key} += $delta_bias;
$bond_str += $bias{$left_key};
}
}
# bias right token
if ( defined( $bias{$right_key} ) ) {
if ( $want_break_before{$right_key} ) {
# for leading '.' align all but 'short' quotes; the idea
# is to not place something like "\n" on a single line.
if ( $right_key eq '.' ) {
unless (
$last_nonblank_type eq '.'
&& ( $token_length <=
$rOpts_short_concatenation_item_length )
&& ( !$is_closing_token{$token} )
)
{
$bias{$right_key} += $delta_bias;
}
}
else {
$bias{$right_key} += $delta_bias;
}
$bond_str += $bias{$right_key};
}
}
my $bond_str_4 = $bond_str;
#---------------------------------------------------------------
# Bond Strength Section 5:
# Fifth Approximation.
# Take nesting depth into account by adding the nesting depth
# to the bond strength.
#---------------------------------------------------------------
my $strength;
if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
if ( $total_nesting_depth > 0 ) {
$strength = $bond_str + $total_nesting_depth;
}
else {
$strength = $bond_str;
}
}
else {
$strength = NO_BREAK;
# For critical code such as lines with here targets we must
# be absolutely sure that we do not allow a break. So for
# these the nobreak flag exceeds 1 as a signal. Otherwise we
# can run into trouble when small tolerances are added.
$strength += 1 if ( $nobreak_to_go[$i] > 1 );
}
#---------------------------------------------------------------
# Bond Strength Section 6:
# Sixth Approximation. Welds.
#---------------------------------------------------------------
# Do not allow a break within welds,
if ( $seqno && $total_weld_count ) {
if ( $self->weld_len_right( $seqno, $type ) ) {
$strength = NO_BREAK;
}
# But encourage breaking after opening welded tokens
elsif ($is_opening_token{$token}
&& $self->weld_len_left( $seqno, $type ) )
{
$strength -= 1;
}
}
# always break after side comment
if ( $type eq '#' ) { $strength = 0 }
$bond_strength_to_go[$i] = $strength;
DEBUG_BOND && do {
my $str = substr( $token, 0, 15 );
$str .= ' ' x ( 16 - length($str) );
print STDOUT
"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
};
} ## end main loop
return;
} ## end sub set_bond_strengths
} ## end closure set_bond_strengths
sub bad_pattern {
# See if a pattern will compile. We have to use a string eval here,
# but it should be safe because the pattern has been constructed
# by this program.
my ($pattern) = @_;
eval "'##'=~/$pattern/";
return $@;
}
{ ## begin closure prepare_cuddled_block_types
my %no_cuddle;
# Add keywords here which really should not be cuddled
BEGIN {
my @q = qw(if unless for foreach while);
@no_cuddle{@q} = (1) x scalar(@q);
}
sub prepare_cuddled_block_types {
# the cuddled-else style, if used, is controlled by a hash that
# we construct here
# Include keywords here which should not be cuddled
my $cuddled_string = "";
if ( $rOpts->{'cuddled-else'} ) {
# set the default
$cuddled_string = 'elsif else continue catch finally'
unless ( $rOpts->{'cuddled-block-list-exclusive'} );
# This is the old equivalent but more complex version
# $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
# Add users other blocks to be cuddled
my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
if ($cuddled_block_list) {
$cuddled_string .= " " . $cuddled_block_list;
}
}
# If we have a cuddled string of the form
# 'try-catch-finally'
# we want to prepare a hash of the form
# $rcuddled_block_types = {
# 'try' => {
# 'catch' => 1,
# 'finally' => 1
# },
# };
# use -dcbl to dump this hash
# Multiple such strings are input as a space or comma separated list
# If we get two lists with the same leading type, such as
# -cbl = "-try-catch-finally -try-catch-otherwise"
# then they will get merged as follows:
# $rcuddled_block_types = {
# 'try' => {
# 'catch' => 1,
# 'finally' => 2,
# 'otherwise' => 1,
# },
# };
# This will allow either type of chain to be followed.
$cuddled_string =~ s/,/ /g; # allow space or comma separated lists
my @cuddled_strings = split /\s+/, $cuddled_string;
$rcuddled_block_types = {};
# process each dash-separated string...
my $string_count = 0;
foreach my $string (@cuddled_strings) {
next unless $string;
my @words = split /-+/, $string; # allow multiple dashes
# we could look for and report possible errors here...
next unless ( @words > 0 );
# allow either '-continue' or *-continue' for arbitrary starting type
my $start = '*';
# a single word without dashes is a secondary block type
if ( @words > 1 ) {
$start = shift @words;
}
# always make an entry for the leading word. If none follow, this
# will still prevent a wildcard from matching this word.
if ( !defined( $rcuddled_block_types->{$start} ) ) {
$rcuddled_block_types->{$start} = {};
}
# The count gives the original word order in case we ever want it.
$string_count++;
my $word_count = 0;
foreach my $word (@words) {
next unless $word;
if ( $no_cuddle{$word} ) {
Warn(
"## Ignoring keyword '$word' in -cbl; does not seem right\n"
);
next;
}
$word_count++;
$rcuddled_block_types->{$start}->{$word} =
1; #"$string_count.$word_count";
# git#9: Remove this word from the list of desired one-line
# blocks
$want_one_line_block{$word} = 0;
}
}
return;
}
} ## begin closure prepare_cuddled_block_types
sub dump_cuddled_block_list {
my ($fh) = @_;
# ORIGINAL METHOD: Here is the format of the cuddled block type hash
# which controls this routine
# my $rcuddled_block_types = {
# 'if' => {
# 'else' => 1,
# 'elsif' => 1
# },
# 'try' => {
# 'catch' => 1,
# 'finally' => 1
# },
# };
# SIMPLFIED METHOD: the simplified method uses a wildcard for
# the starting block type and puts all cuddled blocks together:
# my $rcuddled_block_types = {
# '*' => {
# 'else' => 1,
# 'elsif' => 1
# 'catch' => 1,
# 'finally' => 1
# },
# };
# Both methods work, but the simplified method has proven to be adequate and
# easier to manage.
my $cuddled_string = $rOpts->{'cuddled-block-list'};
$cuddled_string = '' unless $cuddled_string;
my $flags = "";
$flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
$flags .= " -cbl='$cuddled_string'";
unless ( $rOpts->{'cuddled-else'} ) {
$flags .= "\nNote: You must specify -ce to generate a cuddled hash";
}
$fh->print(<<EOM);
------------------------------------------------------------------------
Hash of cuddled block types prepared for a run with these parameters:
$flags
------------------------------------------------------------------------
EOM
use Data::Dumper;
$fh->print( Dumper($rcuddled_block_types) );
$fh->print(<<EOM);
------------------------------------------------------------------------
EOM
return;
}
sub make_static_block_comment_pattern {
# create the pattern used to identify static block comments
$static_block_comment_pattern = '^\s*##';
# allow the user to change it
if ( $rOpts->{'static-block-comment-prefix'} ) {
my $prefix = $rOpts->{'static-block-comment-prefix'};
$prefix =~ s/^\s*//;
my $pattern = $prefix;
# user may give leading caret to force matching left comments only
if ( $prefix !~ /^\^#/ ) {
if ( $prefix !~ /^#/ ) {
Die(
"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
);
}
$pattern = '^\s*' . $prefix;
}
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
);
}
$static_block_comment_pattern = $pattern;
}
return;
}
sub make_format_skipping_pattern {
my ( $opt_name, $default ) = @_;
my $param = $rOpts->{$opt_name};
unless ($param) { $param = $default }
$param =~ s/^\s*//;
if ( $param !~ /^#/ ) {
Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
}
my $pattern = '^' . $param . '\s';
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
);
}
return $pattern;
}
sub make_non_indenting_brace_pattern {
# Create the pattern used to identify static side comments.
# Note that we are ending the pattern in a \s. This will allow
# the pattern to be followed by a space and some text, or a newline.
# The pattern is used in sub 'non_indenting_braces'
$non_indenting_brace_pattern = '^#<<<\s';
# allow the user to change it
if ( $rOpts->{'non-indenting-brace-prefix'} ) {
my $prefix = $rOpts->{'non-indenting-brace-prefix'};
$prefix =~ s/^\s*//;
if ( $prefix !~ /^#/ ) {
Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
}
my $pattern = '^' . $prefix . '\s';
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
);
}
$non_indenting_brace_pattern = $pattern;
}
return;
}
sub make_closing_side_comment_list_pattern {
# turn any input list into a regex for recognizing selected block types
$closing_side_comment_list_pattern = '^\w+';
if ( defined( $rOpts->{'closing-side-comment-list'} )
&& $rOpts->{'closing-side-comment-list'} )
{
$closing_side_comment_list_pattern =
make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
}
return;
}
sub make_sub_matching_pattern {
# Patterns for standardizing matches to block types for regular subs and
# anonymous subs. Examples
# 'sub process' is a named sub
# 'sub ::m' is a named sub
# 'sub' is an anonymous sub
# 'sub:' is a label, not a sub
# 'substr' is a keyword
$SUB_PATTERN = '^sub\s+(::|\w)'; # match normal sub
$ASUB_PATTERN = '^sub$'; # match anonymous sub
$ANYSUB_PATTERN = '^sub\b'; # match either type of sub
# Note (see also RT #133130): These patterns are used by
# sub make_block_pattern, which is used for making most patterns.
# So this sub needs to be called before other pattern-making routines.
if ( $rOpts->{'sub-alias-list'} ) {
# Note that any 'sub-alias-list' has been preprocessed to
# be a trimmed, space-separated list which includes 'sub'
# for example, it might be 'sub method fun'
my $sub_alias_list = $rOpts->{'sub-alias-list'};
$sub_alias_list =~ s/\s+/\|/g;
$SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
$ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
$ANYSUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
}
return;
}
sub make_bli_pattern {
# default list of block types for which -bli would apply
my $bli_list_string = 'if else elsif unless while for foreach do : sub';
if ( defined( $rOpts->{'brace-left-and-indent-list'} )
&& $rOpts->{'brace-left-and-indent-list'} )
{
$bli_list_string = $rOpts->{'brace-left-and-indent-list'};
}
$bli_pattern = make_block_pattern( '-blil', $bli_list_string );
return;
}
sub make_keyword_group_list_pattern {
# turn any input list into a regex for recognizing selected block types.
# Here are the defaults:
$keyword_group_list_pattern = '^(our|local|my|use|require|)$';
$keyword_group_list_comment_pattern = '';
if ( defined( $rOpts->{'keyword-group-blanks-list'} )
&& $rOpts->{'keyword-group-blanks-list'} )
{
my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
my @keyword_list;
my @comment_list;
foreach my $word (@words) {
if ( $word =~ /^(BC|SBC)$/ ) {
push @comment_list, $word;
if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
}
else {
push @keyword_list, $word;
}
}
$keyword_group_list_pattern =
make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
$keyword_group_list_comment_pattern =
make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
}
return;
}
sub make_block_brace_vertical_tightness_pattern {
# turn any input list into a regex for recognizing selected block types
$block_brace_vertical_tightness_pattern =
'^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
&& $rOpts->{'block-brace-vertical-tightness-list'} )
{
$block_brace_vertical_tightness_pattern =
make_block_pattern( '-bbvtl',
$rOpts->{'block-brace-vertical-tightness-list'} );
}
return;
}
sub make_blank_line_pattern {
$blank_lines_before_closing_block_pattern = $SUB_PATTERN;
my $key = 'blank-lines-before-closing-block-list';
if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
$blank_lines_before_closing_block_pattern =
make_block_pattern( '-blbcl', $rOpts->{$key} );
}
$blank_lines_after_opening_block_pattern = $SUB_PATTERN;
$key = 'blank-lines-after-opening-block-list';
if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
$blank_lines_after_opening_block_pattern =
make_block_pattern( '-blaol', $rOpts->{$key} );
}
return;
}
sub make_block_pattern {
# given a string of block-type keywords, return a regex to match them
# The only tricky part is that labels are indicated with a single ':'
# and the 'sub' token text may have additional text after it (name of
# sub).
#
# Example:
#
# input string: "if else elsif unless while for foreach do : sub";
# pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
# Minor Update:
#
# To distinguish between anonymous subs and named subs, use 'sub' to
# indicate a named sub, and 'asub' to indicate an anonymous sub
my ( $abbrev, $string ) = @_;
my @list = split_words($string);
my @words = ();
my %seen;
for my $i (@list) {
if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
next if $seen{$i};
$seen{$i} = 1;
if ( $i eq 'sub' ) {
}
elsif ( $i eq 'asub' ) {
}
elsif ( $i eq ';' ) {
push @words, ';';
}
elsif ( $i eq '{' ) {
push @words, '\{';
}
elsif ( $i eq ':' ) {
push @words, '\w+:';
}
elsif ( $i =~ /^\w/ ) {
push @words, $i;
}
else {
Warn("unrecognized block type $i after $abbrev, ignoring\n");
}
}
my $pattern = '(' . join( '|', @words ) . ')$';
my $sub_patterns = "";
if ( $seen{'sub'} ) {
$sub_patterns .= '|' . $SUB_PATTERN;
}
if ( $seen{'asub'} ) {
$sub_patterns .= '|' . $ASUB_PATTERN;
}
if ($sub_patterns) {
$pattern = '(' . $pattern . $sub_patterns . ')';
}
$pattern = '^' . $pattern;
return $pattern;
}
sub make_static_side_comment_pattern {
# create the pattern used to identify static side comments
$static_side_comment_pattern = '^##';
# allow the user to change it
if ( $rOpts->{'static-side-comment-prefix'} ) {
my $prefix = $rOpts->{'static-side-comment-prefix'};
$prefix =~ s/^\s*//;
my $pattern = '^' . $prefix;
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
);
}
$static_side_comment_pattern = $pattern;
}
return;
}
sub make_closing_side_comment_prefix {
# Be sure we have a valid closing side comment prefix
my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
my $csc_prefix_pattern;
if ( !defined($csc_prefix) ) {
$csc_prefix = '## end';
$csc_prefix_pattern = '^##\s+end';
}
else {
my $test_csc_prefix = $csc_prefix;
if ( $test_csc_prefix !~ /^#/ ) {
$test_csc_prefix = '#' . $test_csc_prefix;
}
# make a regex to recognize the prefix
my $test_csc_prefix_pattern = $test_csc_prefix;
# escape any special characters
$test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
$test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
# allow exact number of intermediate spaces to vary
$test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
# make sure we have a good pattern
# if we fail this we probably have an error in escaping
# characters.
if ( bad_pattern($test_csc_prefix_pattern) ) {
# shouldn't happen..must have screwed up escaping, above
report_definite_bug();
Warn(
"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
);
# just warn and keep going with defaults
Warn("Please consider using a simpler -cscp prefix\n");
Warn("Using default -cscp instead; please check output\n");
}
else {
$csc_prefix = $test_csc_prefix;
$csc_prefix_pattern = $test_csc_prefix_pattern;
}
}
$rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
$closing_side_comment_prefix_pattern = $csc_prefix_pattern;
return;
}
##################################################
# CODE SECTION 4: receive lines from the tokenizer
##################################################
{ ## begin closure write_line
my $Last_line_had_side_comment;
my $In_format_skipping_section;
my $Saw_VERSION_in_this_file;
sub initialize_write_line {
$Last_line_had_side_comment = 0;
$In_format_skipping_section = 0;
$Saw_VERSION_in_this_file = 0;
return;
}
sub write_line {
# This routine originally received lines of code and immediately processed
# them. That was efficient when memory was limited, but now it just saves
# the lines it receives. They get processed all together after the last
# line is received.
# As tokenized lines are received they are converted to the format needed
# for the final formatting.
my ( $self, $line_of_tokens_old ) = @_;
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
my $rlines_new = $self->[_rlines_];
my $maximum_level = $self->[_maximum_level_];
my $Kfirst;
my $line_of_tokens = {};
foreach my $key (
qw(
_curly_brace_depth
_ending_in_quote
_guessed_indentation_level
_line_number
_line_text
_line_type
_paren_depth
_quote_character
_square_bracket_depth
_starting_in_quote
)
)
{
$line_of_tokens->{$key} = $line_of_tokens_old->{$key};
}
# Data needed by Logger
$line_of_tokens->{_level_0} = 0;
$line_of_tokens->{_ci_level_0} = 0;
$line_of_tokens->{_nesting_blocks_0} = "";
$line_of_tokens->{_nesting_tokens_0} = "";
# Needed to avoid trimming quotes
$line_of_tokens->{_ended_in_blank_token} = undef;
my $line_type = $line_of_tokens_old->{_line_type};
my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
my $CODE_type = "";
my $tee_output;
# Handle line of non-code
if ( $line_type ne 'CODE' ) {
$tee_output ||= $rOpts_tee_pod
&& substr( $line_type, 0, 3 ) eq 'POD';
}
# Handle line of code
else {
my $rtokens = $line_of_tokens_old->{_rtokens};
my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
my $rblock_type = $line_of_tokens_old->{_rblock_type};
my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
my $rcontainer_environment =
$line_of_tokens_old->{_rcontainer_environment};
my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
my $rlevels = $line_of_tokens_old->{_rlevels};
my $rslevels = $line_of_tokens_old->{_rslevels};
my $rci_levels = $line_of_tokens_old->{_rci_levels};
my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
my $jmax = @{$rtokens} - 1;
if ( $jmax >= 0 ) {
$Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
foreach my $j ( 0 .. $jmax ) {
# Clip negative nesting depths to zero to avoid problems.
# Negative values can occur in files with unbalanced containers
my $slevel = $rslevels->[$j];
if ( $slevel < 0 ) { $slevel = 0 }
if ( $rlevels->[$j] > $maximum_level ) {
$maximum_level = $rlevels->[$j];
}
# But do not clip the 'level' variable yet. We will do this
# later, in sub 'store_token_to_go'. The reason is that in
# files with level errors, the logic in 'weld_cuddled_else'
# uses a stack logic that will give bad welds if we clip
# levels here.
## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
my @tokary;
@tokary[
_TOKEN_, _TYPE_,
_BLOCK_TYPE_, _CONTAINER_ENVIRONMENT_,
_TYPE_SEQUENCE_, _LEVEL_,
_LEVEL_TRUE_, _SLEVEL_,
_CI_LEVEL_, _LINE_INDEX_,
]
= (
$rtokens->[$j], $rtoken_type->[$j],
$rblock_type->[$j], $rcontainer_environment->[$j],
$rtype_sequence->[$j], $rlevels->[$j],
$rlevels->[$j], $slevel,
$rci_levels->[$j], $input_line_no,
);
push @{$rLL}, \@tokary;
} ## end foreach my $j ( 0 .. $jmax )
$Klimit = @{$rLL} - 1;
# Need to remember if we can trim the input line
$line_of_tokens->{_ended_in_blank_token} =
$rtoken_type->[$jmax] eq 'b';
$line_of_tokens->{_level_0} = $rlevels->[0];
$line_of_tokens->{_ci_level_0} = $rci_levels->[0];
$line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
$line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
} ## end if ( $jmax >= 0 )
$CODE_type =
$self->get_CODE_type( $line_of_tokens, $Kfirst, $Klimit );
$tee_output ||=
$rOpts_tee_block_comments
&& $jmax == 0
&& $rLL->[$Kfirst]->[_TYPE_] eq '#';
$tee_output ||=
$rOpts_tee_side_comments
&& defined($Kfirst)
&& $Klimit > $Kfirst
&& $rLL->[$Klimit]->[_TYPE_] eq '#';
# Handle any requested side comment deletions. It is easier to get
# this done here rather than farther down the pipeline because IO
# lines take a different route, and because lines with deleted HSC
# become BL lines. An since we are deleting now, we have to also
# handle any tee- requests before the side comments vanish.
my $delete_side_comment =
$rOpts_delete_side_comments
&& defined($Kfirst)
&& $rLL->[$Klimit]->[_TYPE_] eq '#'
&& ( $Klimit > $Kfirst || $CODE_type eq 'HSC' )
&& ( !$CODE_type || $CODE_type eq 'HSC' || $CODE_type eq 'IO' );
if ( $rOpts_delete_closing_side_comments
&& !$delete_side_comment
&& defined($Kfirst)
&& $Klimit > $Kfirst
&& $rLL->[$Klimit]->[_TYPE_] eq '#'
&& ( !$CODE_type || $CODE_type eq 'HSC' || $CODE_type eq 'IO' )
)
{
my $token = $rLL->[$Klimit]->[_TOKEN_];
my $K_m = $Klimit - 1;
my $type_m = $rLL->[$K_m]->[_TYPE_];
if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
my $last_nonblank_block_type = $rLL->[$K_m]->[_BLOCK_TYPE_];
if ( $token =~ /$closing_side_comment_prefix_pattern/
&& $last_nonblank_block_type =~
/$closing_side_comment_list_pattern/ )
{
$delete_side_comment = 1;
}
} ## end if ( $rOpts_delete_closing_side_comments...)
if ($delete_side_comment) {
pop @{$rLL};
$Klimit -= 1;
if ( $Klimit > $Kfirst
&& $rLL->[$Klimit]->[_TYPE_] eq 'b' )
{
pop @{$rLL};
$Klimit -= 1;
}
# The -io option outputs the line text, so we have to update
# the line text so that the comment does not reappear.
if ( $CODE_type eq 'IO' ) {
my $line = "";
foreach my $KK ( $Kfirst .. $Klimit ) {
$line .= $rLL->[$KK]->[_TOKEN_];
}
$line_of_tokens->{_line_text} = $line . "\n";
}
# If we delete a hanging side comment the line becomes blank.
if ( $CODE_type eq 'HSC' ) { $CODE_type = 'BL' }
}
} ## end if ( $line_type eq 'CODE')
# Finish storing line variables
if ($tee_output) {
my $fh_tee = $self->[_fh_tee_];
my $line_text = $line_of_tokens_old->{_line_text};
$fh_tee->print($line_text) if ($fh_tee);
}
$line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
$line_of_tokens->{_code_type} = $CODE_type;
$self->[_Klimit_] = $Klimit;
$self->[_maximum_level_] = $maximum_level;
push @{$rlines_new}, $line_of_tokens;
return;
}
sub get_CODE_type {
my ( $self, $line_of_tokens, $Kfirst, $Klast ) = @_;
# We are looking at a line of code and setting a flag to
# describe any special processing that it requires
# Possible CODE_types
# 'VB' = Verbatim - line goes out verbatim (a quote)
# 'FS' = Format Skipping - line goes out verbatim
# 'BL' = Blank Line
# 'HSC' = Hanging Side Comment - fix this hanging side comment
# 'SBCX'= Static Block Comment Without Leading Space
# 'SBC' = Static Block Comment
# 'BC' = Block Comment - an ordinary full line comment
# 'IO' = Indent Only - line goes out unchanged except for indentation
# 'NIN' = No Internal Newlines - line does not get broken
# 'VER' = VERSION statement
# '' = ordinary line of code with no restructions
my $rLL = $self->[_rLL_];
my $CODE_type = "";
my $input_line = $line_of_tokens->{_line_text};
my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
my $is_block_comment = 0;
my $has_side_comment = 0;
if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
if ( $jmax == 0 ) { $is_block_comment = 1; }
else { $has_side_comment = 1 }
}
# Write line verbatim if we are in a formatting skip section
if ($In_format_skipping_section) {
# Note: extra space appended to comment simplifies pattern matching
if ( $is_block_comment
&& ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
/$format_skipping_pattern_end/ )
{
$In_format_skipping_section = 0;
write_logfile_entry("Exiting formatting skip section\n");
}
$CODE_type = 'FS';
goto RETURN;
}
# Check for a continued quote..
if ( $line_of_tokens->{_starting_in_quote} ) {
# A line which is entirely a quote or pattern must go out
# verbatim. Note: the \n is contained in $input_line.
if ( $jmax <= 0 ) {
if ( ( $input_line =~ "\t" ) ) {
my $input_line_number = $line_of_tokens->{_line_number};
$self->note_embedded_tab($input_line_number);
}
$CODE_type = 'VB';
goto RETURN;
}
}
# See if we are entering a formatting skip section
if ( $rOpts_format_skipping
&& $is_block_comment
&& ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
/$format_skipping_pattern_begin/ )
{
$In_format_skipping_section = 1;
write_logfile_entry("Entering formatting skip section\n");
$CODE_type = 'FS';
goto RETURN;
}
# ignore trailing blank tokens (they will get deleted later)
if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
$jmax--;
}
# blank line..
if ( $jmax < 0 ) {
$CODE_type = 'BL';
goto RETURN;
}
# see if this is a static block comment (starts with ## by default)
my $is_static_block_comment = 0;
my $is_static_block_comment_without_leading_space = 0;
if ( $is_block_comment
&& $rOpts->{'static-block-comments'}
&& $input_line =~ /$static_block_comment_pattern/ )
{
$is_static_block_comment = 1;
$is_static_block_comment_without_leading_space =
substr( $input_line, 0, 1 ) eq '#';
}
# Check for comments which are line directives
# Treat exactly as static block comments without leading space
# reference: perlsyn, near end, section Plain Old Comments (Not!)
# example: '# line 42 "new_filename.plx"'
if (
$is_block_comment
&& $input_line =~ /^\# \s*
line \s+ (\d+) \s*
(?:\s("?)([^"]+)\2)? \s*
$/x
)
{
$is_static_block_comment = 1;
$is_static_block_comment_without_leading_space = 1;
}
# look for hanging side comment
if (
$is_block_comment
&& $Last_line_had_side_comment # last line had side comment
&& $input_line =~ /^\s/ # there is some leading space
&& !$is_static_block_comment # do not make static comment hanging
&& $rOpts->{'hanging-side-comments'} # user is allowing
# hanging side comments
# like this
)
{
$has_side_comment = 1;
$CODE_type = 'HSC';
goto RETURN;
}
# Handle a block (full-line) comment..
if ($is_block_comment) {
if ($is_static_block_comment_without_leading_space) {
$CODE_type = 'SBCX';
goto RETURN;
}
elsif ($is_static_block_comment) {
$CODE_type = 'SBC';
goto RETURN;
}
elsif ($Last_line_had_side_comment
&& !$rOpts_maximum_consecutive_blank_lines
&& $rLL->[$Kfirst]->[_LEVEL_] > 0 )
{
# Emergency fix to keep a block comment from becoming a hanging
# side comment. This fix is for the case that blank lines
# cannot be inserted. There is related code in sub
# 'process_line_of_CODE'
$CODE_type = 'SBCX';
goto RETURN;
}
else {
$CODE_type = 'BC';
goto RETURN;
}
}
# End of comments. Handle a line of normal code:
if ($rOpts_indent_only) {
$CODE_type = 'IO';
goto RETURN;
}
if ( !$rOpts_add_newlines ) {
$CODE_type = 'NIN';
goto RETURN;
}
# Patch needed for MakeMaker. Do not break a statement
# in which $VERSION may be calculated. See MakeMaker.pm;
# this is based on the coding in it.
# The first line of a file that matches this will be eval'd:
# /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
# Examples:
# *VERSION = \'1.01';
# ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
# We will pass such a line straight through without breaking
# it unless -npvl is used.
# Patch for problem reported in RT #81866, where files
# had been flattened into a single line and couldn't be
# tidied without -npvl. There are two parts to this patch:
# First, it is not done for a really long line (80 tokens for now).
# Second, we will only allow up to one semicolon
# before the VERSION. We need to allow at least one semicolon
# for statements like this:
# require Exporter; our $VERSION = $Exporter::VERSION;
# where both statements must be on a single line for MakeMaker
my $is_VERSION_statement = 0;
if ( !$Saw_VERSION_in_this_file
&& $jmax < 80
&& $input_line =~
/^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
{
$Saw_VERSION_in_this_file = 1;
write_logfile_entry("passing VERSION line; -npvl deactivates\n");
# This code type has lower priority than others
$CODE_type = 'VER';
goto RETURN;
}
RETURN:
$Last_line_had_side_comment = $has_side_comment;
return $CODE_type;
}
} ## end closure write_line
#############################################
# CODE SECTION 5: Pre-process the entire file
#############################################
sub finish_formatting {
my ( $self, $severe_error ) = @_;
# The file has been tokenized and is ready to be formatted.
# All of the relevant data is stored in $self, ready to go.
# Check the maximum level. If it is extremely large we will
# give up and output the file verbatim.
my $maximum_level = $self->[_maximum_level_];
my $maximum_table_index = $#maximum_line_length;
if ( !$severe_error && $maximum_level > $maximum_table_index ) {
$severe_error ||= 1;
Warn(<<EOM);
The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
Something may be wrong; formatting will be skipped.
EOM
}
# output file verbatim if severe error or no formatting requested
if ( $severe_error || $rOpts->{notidy} ) {
$self->dump_verbatim();
$self->wrapup();
return;
}
# Update the 'save_logfile' flag based to include any tokenization errors.
# We can save time by skipping logfile calls if it is not going to be saved.
my $logger_object = $self->[_logger_object_];
if ($logger_object) {
$self->[_save_logfile_] = $logger_object->get_save_logfile();
}
# Make a pass through all tokens, adding or deleting any whitespace as
# required. Also make any other changes, such as adding semicolons.
# All token changes must be made here so that the token data structure
# remains fixed for the rest of this iteration.
$self->respace_tokens();
$self->find_multiline_qw();
$self->keep_old_line_breaks();
# Implement any welding needed for the -wn or -cb options
$self->weld_containers();
# Locate small nested blocks which should not be broken
$self->mark_short_nested_blocks();
$self->adjust_indentation_levels();
# Finishes formatting and write the result to the line sink.
# Eventually this call should just change the 'rlines' data according to the
# new line breaks and then return so that we can do an internal iteration
# before continuing with the next stages of formatting.
$self->process_all_lines();
# A final routine to tie up any loose ends
$self->wrapup();
return;
}
sub dump_verbatim {
my $self = shift;
my $rlines = $self->[_rlines_];
foreach my $line ( @{$rlines} ) {
my $input_line = $line->{_line_text};
$self->write_unindented_line($input_line);
}
return;
}
sub respace_tokens {
my $self = shift;
return if $rOpts->{'indent-only'};
# This routine is called once per file to do as much formatting as possible
# before new line breaks are set.
# This routine makes all necessary and possible changes to the tokenization
# after the initial tokenization of the file. This is a tedious routine,
# but basically it consists of inserting and deleting whitespace between
# nonblank tokens according to the selected parameters. In a few cases
# non-space characters are added, deleted or modified.
# The goal of this routine is to create a new token array which only needs
# the definition of new line breaks and padding to complete formatting. In
# a few cases we have to cheat a little to achieve this goal. In
# particular, we may not know if a semicolon will be needed, because it
# depends on how the line breaks go. To handle this, we include the
# semicolon as a 'phantom' which can be displayed as normal or as an empty
# string.
# Method: The old tokens are copied one-by-one, with changes, from the old
# linear storage array $rLL to a new array $rLL_new.
my $rLL = $self->[_rLL_];
my $Klimit_old = $self->[_Klimit_];
my $rlines = $self->[_rlines_];
my $length_function = $self->[_length_function_];
my $is_encoded_data = $self->[_is_encoded_data_];
my $rLL_new = []; # This is the new array
my $KK = 0;
my $rtoken_vars;
my $Kmax = @{$rLL} - 1;
my $CODE_type = "";
my $line_type = "";
my $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
my $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
my $rOpts_ignore_side_comment_lengths =
$rOpts->{'ignore-side-comment-lengths'};
# Set the whitespace flags, which indicate the token spacing preference.
my $rwhitespace_flags = $self->set_whitespace_flags();
# we will be setting token lengths as we go
my $cumulative_length = 0;
# We also define these hash indexes giving container token array indexes
# as a function of the container sequence numbers. For example,
my $K_opening_container = {}; # opening [ { or (
my $K_closing_container = {}; # closing ] } or )
my $K_opening_ternary = {}; # opening ? of ternary
my $K_closing_ternary = {}; # closing : of ternary
# List of new K indexes of phantom semicolons
# This will be needed if we want to undo them for iterations
my $rK_phantom_semicolons = [];
my %seqno_stack;
my %KK_stack; # Note: old K index
my %K_opening_by_seqno = (); # Note: old K index
my $depth_next = 0;
my $depth_next_max = 0;
my $rtype_count_by_seqno = {};
my $ris_broken_container = {};
my $rhas_broken_container = {};
my $rparent_of_seqno = {};
my $rchildren_of_seqno = {};
my $last_nonblank_type = ';';
my $last_nonblank_token = ';';
my $last_nonblank_block_type = '';
my $nonblank_token_count = 0;
my $store_token = sub {
my ($item) = @_;
# This will be the index of this item in the new array
my $KK_new = @{$rLL_new};
my $type = $item->[_TYPE_];
my $is_blank = $type eq 'b';
# Do not output consecutive blanks. This should not happen, but
# is worth checking because later routines make this assumption.
if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
return;
}
# check for a sequenced item (i.e., container or ?/:)
my $type_sequence = $item->[_TYPE_SEQUENCE_];
if ($type_sequence) {
my $token = $item->[_TOKEN_];
if ( $is_opening_token{$token} ) {
$K_opening_container->{$type_sequence} = $KK_new;
}
elsif ( $is_closing_token{$token} ) {
$K_closing_container->{$type_sequence} = $KK_new;
}
# These are not yet used but could be useful
else {
if ( $token eq '?' ) {
$K_opening_ternary->{$type_sequence} = $KK_new;
}
elsif ( $token eq ':' ) {
$K_closing_ternary->{$type_sequence} = $KK_new;
}
else {
# We really shouldn't arrive here, just being cautious:
# The only sequenced types output by the tokenizer are the
# opening & closing containers and the ternary types. Each
# of those was checked above. So we would only get here
# if the tokenizer has been changed to mark some other
# tokens with sequence numbers.
my $type = $item->[_TYPE_];
Fault(
"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
);
}
}
}
# Find the length of this token. Later it may be adjusted if phantom
# or ignoring side comment lengths.
my $token_length =
$is_encoded_data
? $length_function->( $item->[_TOKEN_] )
: length( $item->[_TOKEN_] );
# handle comments
my $is_comment = $type eq '#';
if ($is_comment) {
# trim comments if necessary
if ( $item->[_TOKEN_] =~ s/\s+$// ) {
$token_length = $length_function->( $item->[_TOKEN_] );
}
# Mark length of side comments as just 1 if their lengths are ignored
if ( $rOpts_ignore_side_comment_lengths
&& ( !$CODE_type || $CODE_type eq 'HSC' ) )
{
$token_length = 1;
}
}
$item->[_TOKEN_LENGTH_] = $token_length;
# and update the cumulative length
$cumulative_length += $token_length;
# Save the length sum to just AFTER this token
$item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
if ( !$is_blank && !$is_comment ) {
$last_nonblank_type = $type;
$last_nonblank_token = $item->[_TOKEN_];
$last_nonblank_block_type = $item->[_BLOCK_TYPE_];
$nonblank_token_count++;
# count selected types
if ( $is_counted_type{$type} ) {
my $seqno = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno) ) {
$rtype_count_by_seqno->{$seqno}->{$type}++;
}
}
}
# For reference, here is how to get the parent sequence number.
# This is not used because it is slower than finding it on the fly
# in sub parent_seqno_by_K:
# my $seqno_parent =
# $type_sequence && $is_opening_token{$token}
# ? $seqno_stack{ $depth_next - 2 }
# : $seqno_stack{ $depth_next - 1 };
# my $KK = @{$rLL_new};
# $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
# and finally, add this item to the new array
push @{$rLL_new}, $item;
};
my $store_token_and_space = sub {
my ( $item, $want_space ) = @_;
# store a token with preceding space if requested and needed
# First store the space
if ( $want_space
&& @{$rLL_new}
&& $rLL_new->[-1]->[_TYPE_] ne 'b'
&& $rOpts_add_whitespace )
{
my $rcopy = copy_token_as_type( $item, 'b', ' ' );
$rcopy->[_LINE_INDEX_] =
$rLL_new->[-1]->[_LINE_INDEX_];
$store_token->($rcopy);
}
# then the token
$store_token->($item);
};
my $K_end_q = sub {
my ($KK) = @_;
my $K_end = $KK;
my $Kn = $KK + 1;
if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
while ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
$K_end = $Kn;
$Kn += 1;
if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
}
return $K_end;
};
my $add_phantom_semicolon = sub {
my ($KK) = @_;
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
# we are only adding semicolons for certain block types
my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
return
unless ( $ok_to_add_semicolon_for_block_type{$block_type}
|| $block_type =~ /^(sub|package)/
|| $block_type =~ /^\w+\:$/ );
my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
# Do not add a semicolon if...
return
if (
# it would follow a comment (and be isolated)
$previous_nonblank_type eq '#'
# it follows a code block ( because they are not always wanted
# there and may add clutter)
|| $rLL_new->[$Kp]->[_BLOCK_TYPE_]
# it would follow a label
|| $previous_nonblank_type eq 'J'
# it would be inside a 'format' statement (and cause syntax error)
|| ( $previous_nonblank_type eq 'k'
&& $previous_nonblank_token =~ /format/ )
);
# Do not add a semicolon if it would impede a weld with an immediately
# following closing token...like this
# { ( some code ) }
# ^--No semicolon can go here
# look at the previous token... note use of the _NEW rLL array here,
# but sequence numbers are invariant.
my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
# If it is also a CLOSING token we have to look closer...
if (
$seqno_inner
&& $is_closing_token{$previous_nonblank_token}
# we only need to look if there is just one inner container..
&& defined( $rchildren_of_seqno->{$type_sequence} )
&& @{ $rchildren_of_seqno->{$type_sequence} } == 1
)
{
# Go back and see if the corresponding two OPENING tokens are also
# together. Note that we are using the OLD K indexing here:
my $K_outer_opening = $K_opening_by_seqno{$type_sequence};
if ( defined($K_outer_opening) ) {
my $K_nxt = $self->K_next_nonblank($K_outer_opening);
if ( defined($K_nxt) ) {
my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
# Is the next token after the outer opening the same as
# our inner closing (i.e. same sequence number)?
# If so, do not insert a semicolon here.
return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
}
}
}
# We will insert an empty semicolon here as a placeholder. Later, if
# it becomes the last token on a line, we will bring it to life. The
# advantage of doing this is that (1) we just have to check line
# endings, and (2) the phantom semicolon has zero width and therefore
# won't cause needless breaks of one-line blocks.
my $Ktop = -1;
if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
&& $want_left_space{';'} == WS_NO )
{
# convert the blank into a semicolon..
# be careful: we are working on the new stack top
# on a token which has been stored.
my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
# Convert the existing blank to:
# a phantom semicolon for one_line_block option = 0 or 1
# a real semicolon for one_line_block option = 2
my $tok = '';
my $len_tok = 0;
if ( $rOpts_one_line_block_semicolons == 2 ) {
$tok = ';';
$len_tok = 1;
}
$rLL_new->[$Ktop]->[_TOKEN_] = $tok;
$rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
$rLL_new->[$Ktop]->[_TYPE_] = ';';
$rLL_new->[$Ktop]->[_SLEVEL_] =
$rLL->[$KK]->[_SLEVEL_];
push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
# Then store a new blank
$store_token->($rcopy);
}
else {
# insert a new token
my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
$rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
$store_token->($rcopy);
push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
}
};
my $check_Q = sub {
# Check that a quote looks okay
# This sub works but needs to by sync'd with the log file output
# before it can be used.
my ( $KK, $Kfirst, $line_number ) = @_;
my $token = $rLL->[$KK]->[_TOKEN_];
$self->note_embedded_tab($line_number) if ( $token =~ "\t" );
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
my $previous_nonblank_type_2 = 'b';
my $previous_nonblank_token_2 = "";
my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
if ( defined($Kpp) ) {
$previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
$previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
}
my $next_nonblank_token = "";
my $Kn = $KK + 1;
if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
if ( $Kn <= $Kmax ) {
$next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
}
my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
# make note of something like '$var = s/xxx/yyy/;'
# in case it should have been '$var =~ s/xxx/yyy/;'
if (
$token =~ /^(s|tr|y|m|\/)/
&& $previous_nonblank_token =~ /^(=|==|!=)$/
# preceded by simple scalar
&& $previous_nonblank_type_2 eq 'i'
&& $previous_nonblank_token_2 =~ /^\$/
# followed by some kind of termination
# (but give complaint if we can not see far enough ahead)
&& $next_nonblank_token =~ /^[; \)\}]$/
# scalar is not declared
&& !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
)
{
my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
complain(
"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
);
}
};
# Main loop over all lines of the file
my $last_K_out;
# Testing option to break qw. Do not use; it can make a mess.
my $ALLOW_BREAK_MULTILINE_QW = 0;
my $in_multiline_qw;
foreach my $line_of_tokens ( @{$rlines} ) {
my $input_line_number = $line_of_tokens->{_line_number};
my $last_line_type = $line_type;
$line_type = $line_of_tokens->{_line_type};
next unless ( $line_type eq 'CODE' );
my $last_CODE_type = $CODE_type;
$CODE_type = $line_of_tokens->{_code_type};
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
next unless defined($Kfirst);
# Check for correct sequence of token indexes...
# An error here means that sub write_line() did not correctly
# package the tokenized lines as it received them. If we
# get a fault here it has not output a continuous sequence
# of K values. Or a line of CODE may have been mismarked as
# something else.
if ( defined($last_K_out) ) {
if ( $Kfirst != $last_K_out + 1 ) {
Fault(
"Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
);
}
}
else {
# The first token should always have been given index 0 by sub
# write_line()
if ( $Kfirst != 0 ) {
Fault("Program Bug: first K is $Kfirst but should be 0");
}
}
$last_K_out = $Klast;
# Handle special lines of code
if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
# CODE_types are as follows.
# 'BL' = Blank Line
# 'VB' = Verbatim - line goes out verbatim
# 'FS' = Format Skipping - line goes out verbatim, no blanks
# 'IO' = Indent Only - only indentation may be changed
# 'NIN' = No Internal Newlines - line does not get broken
# 'HSC'=Hanging Side Comment - fix this hanging side comment
# 'BC'=Block Comment - an ordinary full line comment
# 'SBC'=Static Block Comment - a block comment which does not get
# indented
# 'SBCX'=Static Block Comment Without Leading Space
# 'VER'=VERSION statement
# '' or (undefined) - no restructions
# For a hanging side comment we insert an empty quote before
# the comment so that it becomes a normal side comment and
# will be aligned by the vertical aligner
if ( $CODE_type eq 'HSC' ) {
# Safety Check: This must be a line with one token (a comment)
my $rtoken_vars = $rLL->[$Kfirst];
if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
# Note that even if the flag 'noadd-whitespace' is set, we
# will make an exception here and allow a blank to be
# inserted to push the comment to the right. We can think
# of this as an adjustment of indentation rather than
# whitespace between tokens. This will also prevent the
# hanging side comment from getting converted to a block
# comment if whitespace gets deleted, as for example with
# the -extrude and -mangle options.
my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
$store_token->($rcopy);
$rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
$store_token->($rcopy);
$store_token->($rtoken_vars);
next;
}
else {
# This line was mis-marked by sub scan_comment
Fault(
"Program bug. A hanging side comment has been mismarked"
);
}
}
# Copy tokens unchanged
foreach my $KK ( $Kfirst .. $Klast ) {
$store_token->( $rLL->[$KK] );
}
next;
}
# Handle normal line..
# Insert any essential whitespace between lines
# if last line was normal CODE.
# Patch for rt #125012: use K_previous_code rather than '_nonblank'
# because comments may disappear.
my $type_next = $rLL->[$Kfirst]->[_TYPE_];
my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
my $Kp = $self->K_previous_code( undef, $rLL_new );
if ( $last_line_type eq 'CODE'
&& $type_next ne 'b'
&& defined($Kp) )
{
my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
my $type_p = $rLL_new->[$Kp]->[_TYPE_];
my ( $token_pp, $type_pp );
my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
if ( defined($Kpp) ) {
$token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
$type_pp = $rLL_new->[$Kpp]->[_TYPE_];
}
else {
$token_pp = ";";
$type_pp = ';';
}
if (
is_essential_whitespace(
$token_pp, $type_pp, $token_p,
$type_p, $token_next, $type_next,
)
)
{
# Copy this first token as blank, but use previous line number
my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
$rcopy->[_LINE_INDEX_] =
$rLL_new->[-1]->[_LINE_INDEX_];
$store_token->($rcopy);
}
}
# loop to copy all tokens on this line, with any changes
my $type_sequence;
for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
$rtoken_vars = $rLL->[$KK];
my $token = $rtoken_vars->[_TOKEN_];
my $type = $rtoken_vars->[_TYPE_];
my $last_type_sequence = $type_sequence;
$type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
# Handle a blank space ...
if ( $type eq 'b' ) {
# Delete it if not wanted by whitespace rules
# or we are deleting all whitespace
# Note that whitespace flag is a flag indicating whether a
# white space BEFORE the token is needed
next if ( $KK >= $Klast ); # skip terminal blank
my $Knext = $KK + 1;
my $ws = $rwhitespace_flags->[$Knext];
if ( $ws == -1
|| $rOpts_delete_old_whitespace )
{
# FIXME: maybe switch to using _new
my $Kp = $self->K_previous_nonblank($KK);
next unless defined($Kp);
my $token_p = $rLL->[$Kp]->[_TOKEN_];
my $type_p = $rLL->[$Kp]->[_TYPE_];
my ( $token_pp, $type_pp );
my $Kpp = $self->K_previous_nonblank($Kp);
if ( defined($Kpp) ) {
$token_pp = $rLL->[$Kpp]->[_TOKEN_];
$type_pp = $rLL->[$Kpp]->[_TYPE_];
}
else {
$token_pp = ";";
$type_pp = ';';
}
my $token_next = $rLL->[$Knext]->[_TOKEN_];
my $type_next = $rLL->[$Knext]->[_TYPE_];
my $do_not_delete = is_essential_whitespace(
$token_pp, $type_pp, $token_p,
$type_p, $token_next, $type_next,
);
next unless ($do_not_delete);
}
# make it just one character if allowed
if ($rOpts_add_whitespace) {
$rtoken_vars->[_TOKEN_] = ' ';
}
$store_token->($rtoken_vars);
next;
}
# Handle a nonblank token...
if ($type_sequence) {
if ( $is_opening_token{$token} ) {
my $seqno_parent = $seqno_stack{ $depth_next - 1 };
$seqno_parent = SEQ_ROOT unless defined($seqno_parent);
push @{ $rchildren_of_seqno->{$seqno_parent} },
$type_sequence;
$rparent_of_seqno->{$type_sequence} = $seqno_parent;
$seqno_stack{$depth_next} = $type_sequence;
$KK_stack{$depth_next} = $KK;
$K_opening_by_seqno{$type_sequence} = $KK;
$depth_next++;
if ( $depth_next > $depth_next_max ) {
$depth_next_max = $depth_next;
}
}
elsif ( $is_closing_token{$token} ) {
# Insert a tentative missing semicolon if the next token is
# a closing block brace
if (
$type eq '}'
&& $token eq '}'
# not preceded by a ';'
&& $last_nonblank_type ne ';'
# and this is not a VERSION stmt (is all one line, we
# are not inserting semicolons on one-line blocks)
&& $CODE_type ne 'VER'
# and we are allowed to add semicolons
&& $rOpts->{'add-semicolons'}
)
{
$add_phantom_semicolon->($KK);
}
# Update the stack... Note that we do this after adding
# any phantom semicolons so that they will be counted in
# the correct container.
$depth_next--;
# keep track of broken lists for later formatting
my $seqno_test = $seqno_stack{$depth_next};
my $KK_open = $KK_stack{$depth_next};
my $seqno_outer = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno_test)
&& defined($KK_open)
&& $seqno_test == $type_sequence )
{
my $lx_open = $rLL->[$KK_open]->[_LINE_INDEX_];
my $lx_close = $rLL->[$KK]->[_LINE_INDEX_];
if ( $lx_open < $lx_close ) {
$ris_broken_container->{$type_sequence} =
$lx_close - $lx_open;
if ( defined($seqno_outer) ) {
$rhas_broken_container->{$seqno_outer} = 1;
}
}
}
}
}
# Modify certain tokens here for whitespace
# The following is not yet done, but could be:
# sub (x x x)
elsif ( $type =~ /^[wit]$/ ) {
# Examples: <<snippets/space1.in>>
# change '$ var' to '$var' etc
# change '@ ' to '@'
my ( $sigil, $word ) = split /\s+/, $token, 2;
if ( length($sigil) == 1
&& $sigil =~ /^[\$\&\%\*\@]$/ )
{
$token = $sigil;
$token .= $word if ($word);
$rtoken_vars->[_TOKEN_] = $token;
}
# Split identifiers with leading arrows, inserting blanks if
# necessary. It is easier and safer here than in the
# tokenizer. For example '->new' becomes two tokens, '->' and
# 'new' with a possible blank between.
#
# Note: there is a related patch in sub set_whitespace_flags
if ( substr( $token, 0, 1 ) eq '-'
&& $token =~ /^\-\>(.*)$/
&& $1 )
{
my $token_save = $1;
my $type_save = $type;
# Change '-> new' to '->new'
$token_save =~ s/^\s+//g;
# store a blank to left of arrow if necessary
my $Kprev = $self->K_previous_nonblank($KK);
if ( defined($Kprev)
&& $rLL->[$Kprev]->[_TYPE_] ne 'b'
&& $rOpts_add_whitespace
&& $want_left_space{'->'} == WS_YES )
{
my $rcopy =
copy_token_as_type( $rtoken_vars, 'b', ' ' );
$store_token->($rcopy);
}
# then store the arrow
my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
$store_token->($rcopy);
# store a blank after the arrow if requested
# added for issue git #33
if ( $want_right_space{'->'} == WS_YES ) {
my $rcopy =
copy_token_as_type( $rtoken_vars, 'b', ' ' );
$store_token->($rcopy);
}
# then reset the current token to be the remainder,
# and reset the whitespace flag according to the arrow
$token = $rtoken_vars->[_TOKEN_] = $token_save;
$type = $rtoken_vars->[_TYPE_] = $type_save;
$store_token->($rtoken_vars);
next;
}
if ( $token =~ /$ANYSUB_PATTERN/ ) {
# -spp = 0 : no space before opening prototype paren
# -spp = 1 : stable (follow input spacing)
# -spp = 2 : always space before opening prototype paren
my $spp = $rOpts->{'space-prototype-paren'};
if ( defined($spp) ) {
if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
}
# one space max, and no tabs
$token =~ s/\s+/ /g;
$rtoken_vars->[_TOKEN_] = $token;
}
# clean up spaces in package identifiers, like
# "package Bob::Dog;"
if ( $token =~ /^package\s/ ) {
$token =~ s/\s+/ /g;
$rtoken_vars->[_TOKEN_] = $token;
}
# trim identifiers of trailing blanks which can occur
# under some unusual circumstances, such as if the
# identifier 'witch' has trailing blanks on input here:
#
# sub
# witch
# () # prototype may be on new line ...
# ...
if ( $type eq 'i' ) {
$token =~ s/\s+$//g;
$rtoken_vars->[_TOKEN_] = $token;
}
}
# handle semicolons
elsif ( $type eq ';' ) {
# Remove unnecessary semicolons, but not after bare
# blocks, where it could be unsafe if the brace is
# mistokenized.
if (
$rOpts->{'delete-semicolons'}
&& (
(
$last_nonblank_type eq '}'
&& (
$is_block_without_semicolon{
$last_nonblank_block_type}
|| $last_nonblank_block_type =~ /$SUB_PATTERN/
|| $last_nonblank_block_type =~ /^\w+:$/ )
)
|| $last_nonblank_type eq ';'
)
)
{
# This looks like a deletable semicolon, but even if a
# semicolon can be deleted it is necessarily best to do so.
# We apply these additional rules for deletion:
# - Always ok to delete a ';' at the end of a line
# - Never delete a ';' before a '#' because it would
# promote it to a block comment.
# - If a semicolon is not at the end of line, then only
# delete if it is followed by another semicolon or closing
# token. This includes the comment rule. It may take
# two passes to get to a final state, but it is a little
# safer. For example, keep the first semicolon here:
# eval { sub bubba { ok(0) }; ok(0) } || ok(1);
# It is not required but adds some clarity.
my $ok_to_delete = 1;
if ( $KK < $Klast ) {
my $Kn = $self->K_next_nonblank($KK);
if ( defined($Kn) && $Kn <= $Klast ) {
my $next_nonblank_token_type =
$rLL->[$Kn]->[_TYPE_];
$ok_to_delete = $next_nonblank_token_type eq ';'
|| $next_nonblank_token_type eq '}';
}
}
# do not delete only nonblank token in a file
else {
my $Kn = $self->K_next_nonblank($KK);
$ok_to_delete = defined($Kn) || $nonblank_token_count;
}
if ($ok_to_delete) {
$self->note_deleted_semicolon($input_line_number);
next;
}
else {
write_logfile_entry("Extra ';'\n");
}
}
}
# patch to add space to something like "x10"
# This avoids having to split this token in the pre-tokenizer
elsif ( $type eq 'n' ) {
if ( $token =~ /^x\d+/ ) {
$token =~ s/x/x /;
$rtoken_vars->[_TOKEN_] = $token;
}
}
# check for a qw quote
elsif ( $type eq 'q' ) {
# trim blanks from right of qw quotes
# (To avoid trimming qw quotes use -ntqw; the tokenizer handles
# this)
$token =~ s/\s*$//;
$rtoken_vars->[_TOKEN_] = $token;
$self->note_embedded_tab($input_line_number)
if ( $token =~ "\t" );
if ($in_multiline_qw) {
# If we are at the end of a multiline qw ..
if ( $in_multiline_qw == $KK ) {
# Split off the closing delimiter character
# so that the formatter can put a line break there if necessary
my $part1 = $token;
my $part2 = substr( $part1, -1, 1, "" );
if ($part1) {
my $rcopy =
copy_token_as_type( $rtoken_vars, 'q', $part1 );
$store_token->($rcopy);
$token = $part2;
$rtoken_vars->[_TOKEN_] = $token;
}
$in_multiline_qw = undef;
# store without preceding blank
$store_token->($rtoken_vars);
next;
}
else {
# continuing a multiline qw
$store_token->($rtoken_vars);
next;
}
}
else {
# we are encountered new qw token...see if multiline
if ($ALLOW_BREAK_MULTILINE_QW) {
my $K_end = $K_end_q->($KK);
if ( $K_end != $KK ) {
# Starting multiline qw...
# set flag equal to the ending K
$in_multiline_qw = $K_end;
# Split off the leading part so that the formatter can
# put a line break there if necessary
if ( $token =~ /^(qw\s*.)(.*)$/ ) {
my $part1 = $1;
my $part2 = $2;
if ($part2) {
my $rcopy =
copy_token_as_type( $rtoken_vars, 'q',
$part1 );
$store_token_and_space->(
$rcopy,
$rwhitespace_flags->[$KK] == WS_YES
);
$token = $part2;
$rtoken_vars->[_TOKEN_] = $token;
# Second part goes without intermediate blank
$store_token->($rtoken_vars);
next;
}
}
}
}
else {
# this is a new single token qw -
# store with possible preceding blank
$store_token_and_space->(
$rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
);
next;
}
}
} ## end if ( $type eq 'q' )
# change 'LABEL :' to 'LABEL:'
elsif ( $type eq 'J' ) {
$token =~ s/\s+//g;
$rtoken_vars->[_TOKEN_] = $token;
}
# check a quote for problems
elsif ( $type eq 'Q' ) {
$check_Q->( $KK, $Kfirst, $input_line_number );
}
# Store this token with possible previous blank
$store_token_and_space->(
$rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
);
} # End token loop
} # End line loop
# Walk backwards through the tokens, making forward links to sequence items.
if ( @{$rLL_new} ) {
my $KNEXT;
for ( my $KK = @{$rLL_new} - 1 ; $KK >= 0 ; $KK-- ) {
$rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
}
$self->[_K_first_seq_item_] = $KNEXT;
}
# find and remember lists by sequence number
# TODO: eventually this should hold a name for the list
my $ris_list_by_seqno = {};
foreach my $seqno ( keys %{$K_opening_container} ) {
my $K_opening = $K_opening_container->{$seqno};
my $block_type = $rLL_new->[$K_opening]->[_BLOCK_TYPE_];
next if ($block_type);
my $rtype_count = $rtype_count_by_seqno->{$seqno};
next unless ($rtype_count);
my $fat_comma_count = $rtype_count->{'=>'};
my $comma_count = $rtype_count->{','};
my $semicolon_count = $rtype_count->{';'};
# This definition of a list is sufficient for our needs
if ( ( $fat_comma_count || $comma_count ) && !$semicolon_count ) {
$ris_list_by_seqno->{$seqno} = $seqno;
}
}
# Reset memory to be the new array
$self->[_rLL_] = $rLL_new;
my $Klimit;
if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
$self->[_Klimit_] = $Klimit;
$self->[_K_opening_container_] = $K_opening_container;
$self->[_K_closing_container_] = $K_closing_container;
$self->[_K_opening_ternary_] = $K_opening_ternary;
$self->[_K_closing_ternary_] = $K_closing_ternary;
$self->[_rK_phantom_semicolons_] = $rK_phantom_semicolons;
$self->[_rtype_count_by_seqno_] = $rtype_count_by_seqno;
$self->[_ris_broken_container_] = $ris_broken_container;
$self->[_rhas_broken_container_] = $rhas_broken_container;
$self->[_rparent_of_seqno_] = $rparent_of_seqno;
$self->[_rchildren_of_seqno_] = $rchildren_of_seqno;
$self->[_ris_list_by_seqno_] = $ris_list_by_seqno;
# DEBUG OPTION: make sure the new array looks okay.
# This is no longer needed but should be retained for future development.
DEVEL_MODE && $self->check_token_array();
# reset the token limits of each line
$self->resync_lines_and_tokens();
return;
}
sub copy_token_as_type {
# This provides a quick way to create a new token by
# slightly modifying an existing token.
my ( $rold_token, $type, $token ) = @_;
if ( $type eq 'b' ) {
$token = " " unless defined($token);
}
elsif ( $type eq 'q' ) {
$token = '' unless defined($token);
}
elsif ( $type eq '->' ) {
$token = '->' unless defined($token);
}
elsif ( $type eq ';' ) {
$token = ';' unless defined($token);
}
else {
# This sub assumes it will be called with just two types, 'b' or 'q'
Fault(
"Programming error: copy_token_as has type $type but should be 'b' or 'q'"
);
}
my @rnew_token = @{$rold_token};
$rnew_token[_TYPE_] = $type;
$rnew_token[_TOKEN_] = $token;
$rnew_token[_BLOCK_TYPE_] = '';
$rnew_token[_CONTAINER_ENVIRONMENT_] = '';
$rnew_token[_TYPE_SEQUENCE_] = '';
return \@rnew_token;
}
sub Debug_dump_tokens {
# a debug routine, not normally used
my ( $self, $msg ) = @_;
my $rLL = $self->[_rLL_];
my $nvars = @{$rLL};
print STDERR "$msg\n";
print STDERR "ntokens=$nvars\n";
print STDERR "K\t_TOKEN_\t_TYPE_\n";
my $K = 0;
foreach my $item ( @{$rLL} ) {
print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
$K++;
}
return;
}
sub K_next_code {
my ( $self, $KK, $rLL ) = @_;
# return the index K of the next nonblank, non-comment token
return unless ( defined($KK) && $KK >= 0 );
# use the standard array unless given otherwise
$rLL = $self->[_rLL_] unless ( defined($rLL) );
my $Num = @{$rLL};
my $Knnb = $KK + 1;
while ( $Knnb < $Num ) {
if ( !defined( $rLL->[$Knnb] ) ) {
# We seem to have encountered a gap in our array.
# This shouldn't happen because sub write_line() pushed
# items into the $rLL array.
Fault("Undefined entry for k=$Knnb");
}
if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
&& $rLL->[$Knnb]->[_TYPE_] ne '#' )
{
return $Knnb;
}
$Knnb++;
}
return;
}
sub K_next_nonblank {
my ( $self, $KK, $rLL ) = @_;
# return the index K of the next nonblank token, or
# return undef if none
return unless ( defined($KK) && $KK >= 0 );
# The third arg allows this routine to be used on any array. This is
# useful in sub respace_tokens when we are copying tokens from an old $rLL
# to a new $rLL array. But usually the third arg will not be given and we
# will just use the $rLL array in $self.
$rLL = $self->[_rLL_] unless ( defined($rLL) );
my $Num = @{$rLL};
my $Knnb = $KK + 1;
return unless ( $Knnb < $Num );
return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
return unless ( ++$Knnb < $Num );
return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
# Backup loop. Very unlikely to get here; it means we have neighboring
# blanks in the token stream.
$Knnb++;
while ( $Knnb < $Num ) {
# Safety check, this fault shouldn't happen: The $rLL array is the
# main array of tokens, so all entries should be used. It is
# initialized in sub write_line, and then re-initialized by sub
# $store_token() within sub respace_tokens. Tokens are pushed on
# so there shouldn't be any gaps.
if ( !defined( $rLL->[$Knnb] ) ) {
Fault("Undefined entry for k=$Knnb");
}
if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
$Knnb++;
}
return;
}
sub K_previous_code {
# return the index K of the previous nonblank, non-comment token
# Call with $KK=undef to start search at the top of the array
my ( $self, $KK, $rLL ) = @_;
# use the standard array unless given otherwise
$rLL = $self->[_rLL_] unless ( defined($rLL) );
my $Num = @{$rLL};
if ( !defined($KK) ) { $KK = $Num }
elsif ( $KK > $Num ) {
# This fault can be caused by a programming error in which a bad $KK is
# given. The caller should make the first call with KK_new=undef to
# avoid this error.
Fault(
"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
);
}
my $Kpnb = $KK - 1;
while ( $Kpnb >= 0 ) {
if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
&& $rLL->[$Kpnb]->[_TYPE_] ne '#' )
{
return $Kpnb;
}
$Kpnb--;
}
return;
}
sub K_previous_nonblank {
# return index of previous nonblank token before item K;
# Call with $KK=undef to start search at the top of the array
my ( $self, $KK, $rLL ) = @_;
# use the standard array unless given otherwise
$rLL = $self->[_rLL_] unless ( defined($rLL) );
my $Num = @{$rLL};
if ( !defined($KK) ) { $KK = $Num }
elsif ( $KK > $Num ) {
# This fault can be caused by a programming error in which a bad $KK is
# given. The caller should make the first call with KK_new=undef to
# avoid this error.
Fault(
"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
);
}
my $Kpnb = $KK - 1;
return unless ( $Kpnb >= 0 );
return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
return unless ( --$Kpnb >= 0 );
return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
# Backup loop. We should not get here unless some routine
# slipped repeated blanks into the token stream.
return unless ( --$Kpnb >= 0 );
while ( $Kpnb >= 0 ) {
if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
$Kpnb--;
}
return;
}
sub get_old_line_index {
# return index of the original line that token K was on
my ( $self, $K ) = @_;
my $rLL = $self->[_rLL_];
return 0 unless defined($K);
return $rLL->[$K]->[_LINE_INDEX_];
}
sub get_old_line_count {
# return number of input lines separating two tokens
my ( $self, $Kbeg, $Kend ) = @_;
my $rLL = $self->[_rLL_];
return 0 unless defined($Kbeg);
return 0 unless defined($Kend);
return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
}
sub parent_seqno_by_K {
# Return the sequence number of the parent container of token K, if any.
my ( $self, $KK ) = @_;
return unless defined($KK);
# Note: This routine is relatively slow. I tried replacing it with a hash
# which is easily created in sub respace_tokens. But the total time with a
# hash was greater because this routine is called once per line whereas a
# hash must be created token-by-token.
my $rLL = $self->[_rLL_];
my $KNEXT = $KK;
# For example, consider the following with seqno=5 of the '[' and ']'
# being called with index K of the first token of each line:
# # result
# push @tests, # -
# [ # -
# sub { 99 }, 'do {&{%s} for 1,2}', # 5
# '(&{})(&{})', undef, # 5
# [ 2, 2, 0 ], 0 # 5
# ]; # -
my $parent_seqno;
while ( defined($KNEXT) ) {
my $Kt = $KNEXT;
$KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $rtoken_vars = $rLL->[$Kt];
my $type = $rtoken_vars->[_TYPE_];
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
# if next container token is closing, it is the parent seqno
if ( $is_closing_type{$type} ) {
if ( $Kt > $KK ) {
$parent_seqno = $type_sequence;
}
else {
$parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
}
last;
}
# if next container token is opening, we want its parent container
elsif ( $is_opening_type{$type} ) {
$parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
last;
}
# not a container - must be ternary - keep going
}
return $parent_seqno;
}
sub is_list_by_K {
# Return true if token K is in a list
my ( $self, $KK ) = @_;
my $parent_seqno = $self->parent_seqno_by_K($KK);
return unless defined($parent_seqno);
return $self->[_ris_list_by_seqno_]->{$parent_seqno};
}
sub is_list_by_seqno {
# Return true if the immediate contents of a container appears to be a
# list.
my ( $self, $seqno ) = @_;
return unless defined($seqno);
return $self->[_ris_list_by_seqno_]->{$seqno};
}
sub resync_lines_and_tokens {
my $self = shift;
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
my $rlines = $self->[_rlines_];
my @Krange_code_without_comments;
my @Klast_valign_code;
# Re-construct the arrays of tokens associated with the original input lines
# since they have probably changed due to inserting and deleting blanks
# and a few other tokens.
my $Kmax = -1;
# This is the next token and its line index:
my $Knext = 0;
my $inext;
if ( defined($rLL) && @{$rLL} ) {
$Kmax = @{$rLL} - 1;
$inext = $rLL->[$Knext]->[_LINE_INDEX_];
}
# Remember the most recently output token index
my $Klast_out;
my $iline = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
$iline++;
my $line_type = $line_of_tokens->{_line_type};
my $CODE_type = $line_of_tokens->{_code_type};
if ( $line_type eq 'CODE' ) {
my @K_array;
my $rK_range;
if ( $Knext <= $Kmax ) {
$inext = $rLL->[$Knext]->[_LINE_INDEX_];
while ( $inext <= $iline ) {
push @K_array, $Knext;
$Knext += 1;
if ( $Knext > $Kmax ) {
$inext = undef;
last;
}
$inext = $rLL->[$Knext]->[_LINE_INDEX_];
}
}
# Delete any terminal blank token
if (@K_array) {
if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
pop @K_array;
}
}
# Define the range of K indexes for the line:
# $Kfirst = index of first token on line
# $Klast_out = index of last token on line
my ( $Kfirst, $Klast );
if (@K_array) {
$Kfirst = $K_array[0];
$Klast = $K_array[-1];
$Klast_out = $Klast;
if ( defined($Kfirst) ) {
# Save ranges of non-comment code. This will be used by
# sub keep_old_line_breaks.
if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
push @Krange_code_without_comments, [ $Kfirst, $Klast ];
}
# Only save ending K indexes of code types which are blank
# or 'VER'. These will be used for a convergence check.
# See related code in sub 'send_lines_to_vertical_aligner'.
if ( !$CODE_type
|| $CODE_type eq 'VER' )
{
push @Klast_valign_code, $Klast;
}
}
}
# It is only safe to trim the actual line text if the input
# line had a terminal blank token. Otherwise, we may be
# in a quote.
if ( $line_of_tokens->{_ended_in_blank_token} ) {
$line_of_tokens->{_line_text} =~ s/\s+$//;
}
$line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
# Deleting semicolons can create new empty code lines
# which should be marked as blank
if ( !defined($Kfirst) ) {
my $code_type = $line_of_tokens->{_code_type};
if ( !$code_type ) {
$line_of_tokens->{_code_type} = 'BL';
}
}
}
}
# There shouldn't be any nodes beyond the last one. This routine is
# relinking lines and tokens after the tokens have been respaced. A fault
# here indicates some kind of bug has been introduced into the above loops.
if ( defined($inext) ) {
Fault("unexpected tokens at end of file when reconstructing lines");
}
$self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
# Setup the convergence test in the FileWriter based on line-ending indexes
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->setup_convergence_test( \@Klast_valign_code );
return;
}
sub keep_old_line_breaks {
# Called once per file to find and mark any old line breaks which
# should be kept. We will be translating the input hashes into
# token indexes.
my ($self) = @_;
return unless ( %keep_break_before_type || %keep_break_after_type );
my $rLL = $self->[_rLL_];
my $rKrange_code_without_comments =
$self->[_rKrange_code_without_comments_];
my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
foreach my $item ( @{$rKrange_code_without_comments} ) {
my ( $Kfirst, $Klast ) = @{$item};
my $type_first = $rLL->[$Kfirst]->[_TYPE_];
if ( $keep_break_before_type{$type_first} ) {
$rbreak_before_Kfirst->{$Kfirst} = 1;
}
my $type_last = $rLL->[$Klast]->[_TYPE_];
if ( $keep_break_after_type{$type_last} ) {
$rbreak_after_Klast->{$Klast} = 1;
}
}
return;
}
sub weld_containers {
# Called once per file to do any welding operations requested by --weld*
# flags.
my ($self) = @_;
return if ( $rOpts->{'indent-only'} );
return unless ($rOpts_add_newlines);
if ( $rOpts->{'weld-nested-containers'} ) {
# if called, weld_nested_containers must be called before other weld
# operations. This is because weld_nested_containers could overwrite
# hash values written by weld_cuddled_blocks and weld_nested_quotes.
$self->weld_nested_containers();
$self->weld_nested_quotes();
}
# Note that weld_nested_containers() changes the _LEVEL_ values, so
# weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
# Here is a good test case to be sure that both cuddling and welding
# are working and not interfering with each other: <<snippets/ce_wn1.in>>
# perltidy -wn -ce
# if ($BOLD_MATH) { (
# $labels, $comment,
# join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
# ) } else { (
# &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
# $after
# ) }
$self->weld_cuddled_blocks();
# After all welding is complete, we make a note of which seqence numbers
# have welds for quick checks.
my @q;
my $ris_welded_seqno = $self->[_ris_welded_seqno_];
@q = keys %{ $self->[_rweld_len_left_closing_] };
@{$ris_welded_seqno}{@q} = (1) x scalar(@q);
@q = keys %{ $self->[_rweld_len_right_closing_] };
@{$ris_welded_seqno}{@q} = (1) x scalar(@q);
@q = keys %{ $self->[_rweld_len_left_opening_] };
@{$ris_welded_seqno}{@q} = (1) x scalar(@q);
@q = keys %{ $self->[_rweld_len_right_opening_] };
@{$ris_welded_seqno}{@q} = (1) x scalar(@q);
# total number of sequenced items involved in a weld, for
# quick checks for avoiding calls to weld_len_xxx
$total_weld_count = 0 + keys %{$ris_welded_seqno};
return;
}
sub cumulative_length_before_K {
my ( $self, $KK ) = @_;
my $rLL = $self->[_rLL_];
return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
}
sub cumulative_length_after_K {
# NOTE: This routine not currently called; could be deleted
my ( $self, $KK ) = @_;
my $rLL = $self->[_rLL_];
return $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
}
sub weld_cuddled_blocks {
my ($self) = @_;
# Called once per file to handle cuddled formatting
my $rweld_len_right_closing = $self->[_rweld_len_right_closing_];
# This routine implements the -cb flag by finding the appropriate
# closing and opening block braces and welding them together.
return unless ( %{$rcuddled_block_types} );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $rbreak_container = $self->[_rbreak_container_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $length_to_opening_seqno = sub {
my ($seqno) = @_;
my $KK = $K_opening_container->{$seqno};
my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
return $lentot;
};
my $length_to_closing_seqno = sub {
my ($seqno) = @_;
my $KK = $K_closing_container->{$seqno};
my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
return $lentot;
};
my $is_broken_block = sub {
# a block is broken if the input line numbers of the braces differ
# we can only cuddle between broken blocks
my ($seqno) = @_;
my $K_opening = $K_opening_container->{$seqno};
return unless ( defined($K_opening) );
my $K_closing = $K_closing_container->{$seqno};
return unless ( defined($K_closing) );
return $rbreak_container->{$seqno}
|| $rLL->[$K_closing]->[_LINE_INDEX_] !=
$rLL->[$K_opening]->[_LINE_INDEX_];
};
# A stack to remember open chains at all levels: This is a hash rather than
# an array for safety because negative levels can occur in files with
# errors. This allows us to keep processing with negative levels.
# $in_chain{$level} = [$chain_type, $type_sequence];
my %in_chain;
my $CBO = $rOpts->{'cuddled-break-option'};
# loop over structure items to find cuddled pairs
my $level = 0;
my $KNEXT = $self->[_K_first_seq_item_];
while ( defined($KNEXT) ) {
my $KK = $KNEXT;
$KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $rtoken_vars = $rLL->[$KK];
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
if ( !$type_sequence ) {
next if ( $KK == 0 ); # first token in file may not be container
# A fault here implies that an error was made in the little loop at
# the bottom of sub 'respace_tokens' which set the values of
# _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
# loop control lines above.
Fault("sequence = $type_sequence not defined at K=$KK");
}
# We use the original levels because they get changed by sub
# 'weld_nested_containers'. So if this were to be called before that
# routine, the levels would be wrong and things would go bad.
my $last_level = $level;
$level = $rtoken_vars->[_LEVEL_TRUE_];
if ( $level < $last_level ) { $in_chain{$last_level} = undef }
elsif ( $level > $last_level ) { $in_chain{$level} = undef }
# We are only looking at code blocks
my $token = $rtoken_vars->[_TOKEN_];
my $type = $rtoken_vars->[_TYPE_];
next unless ( $type eq $token );
if ( $token eq '{' ) {
my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
if ( !$block_type ) {
# patch for unrecognized block types which may not be labeled
my $Kp = $self->K_previous_nonblank($KK);
while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
$Kp = $self->K_previous_nonblank($Kp);
}
next unless $Kp;
$block_type = $rLL->[$Kp]->[_TOKEN_];
}
if ( $in_chain{$level} ) {
# we are in a chain and are at an opening block brace.
# See if we are welding this opening brace with the previous
# block brace. Get their identification numbers:
my $closing_seqno = $in_chain{$level}->[1];
my $opening_seqno = $type_sequence;
# The preceding block must be on multiple lines so that its
# closing brace will start a new line.
if ( !$is_broken_block->($closing_seqno) ) {
next unless ( $CBO == 2 );
$rbreak_container->{$closing_seqno} = 1;
}
# we will let the trailing block be either broken or intact
## && $is_broken_block->($opening_seqno);
# We can weld the closing brace to its following word ..
my $Ko = $K_closing_container->{$closing_seqno};
my $Kon;
if ( defined($Ko) ) {
$Kon = $self->K_next_nonblank($Ko);
}
# ..unless it is a comment
if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
my $dlen =
$rLL->[$Kon]->[_CUMULATIVE_LENGTH_] -
$rLL->[ $Ko - 1 ]->[_CUMULATIVE_LENGTH_];
$rweld_len_right_closing->{$closing_seqno} = $dlen;
# Set flag that we want to break the next container
# so that the cuddled line is balanced.
$rbreak_container->{$opening_seqno} = 1
if ($CBO);
}
}
else {
# We are not in a chain. Start a new chain if we see the
# starting block type.
if ( $rcuddled_block_types->{$block_type} ) {
$in_chain{$level} = [ $block_type, $type_sequence ];
}
else {
$block_type = '*';
$in_chain{$level} = [ $block_type, $type_sequence ];
}
}
}
elsif ( $token eq '}' ) {
if ( $in_chain{$level} ) {
# We are in a chain at a closing brace. See if this chain
# continues..
my $Knn = $self->K_next_code($KK);
next unless $Knn;
my $chain_type = $in_chain{$level}->[0];
my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
if (
$rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
)
{
# Note that we do not weld yet because we must wait until
# we we are sure that an opening brace for this follows.
$in_chain{$level}->[1] = $type_sequence;
}
else { $in_chain{$level} = undef }
}
}
}
return;
}
sub find_nested_pairs {
my $self = shift;
# This routine is called once per file to do preliminary work needed for
# the --weld-nested option. This information is also needed for adding
# semicolons.
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $Num = @{$rLL};
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
# We define an array of pairs of nested containers
my @nested_pairs;
# Names of calling routines can either be marked as 'i' or 'w',
# and they may invoke a sub call with an '->'. We will consider
# any consecutive string of such types as a single unit when making
# weld decisions. We also allow a leading !
my $is_name_type = {
'i' => 1,
'w' => 1,
'U' => 1,
'->' => 1,
'!' => 1,
};
# Loop over all closing container tokens
foreach my $inner_seqno ( keys %{$K_closing_container} ) {
my $K_inner_closing = $K_closing_container->{$inner_seqno};
# See if it is immediately followed by another, outer closing token
my $K_outer_closing = $K_inner_closing + 1;
$K_outer_closing += 1
if ( $K_outer_closing < $Num
&& $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
next unless ( $K_outer_closing < $Num );
my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
next unless ($outer_seqno);
my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
next unless ( $is_closing_token{$token_outer_closing} );
# Now we have to check the opening tokens.
my $K_outer_opening = $K_opening_container->{$outer_seqno};
my $K_inner_opening = $K_opening_container->{$inner_seqno};
next unless defined($K_outer_opening) && defined($K_inner_opening);
# Verify that the inner opening token is the next container after the
# outer opening token.
my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
next unless defined($K_io_check);
if ( $K_io_check != $K_inner_opening ) {
# The inner opening container does not immediately follow the outer
# opening container, but we may still allow a weld if they are
# separated by a sub signature. For example, we may have something
# like this, where $K_io_check may be at the first 'x' instead of
# 'io'. So we need to hop over the signature and see if we arrive
# at 'io'.
# oo io
# | x x |
# $obj->then( sub ( $code ) {
# ...
# return $c->render(text => '', status => $code);
# } );
# | |
# ic oc
next if $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] ne 'sub';
next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
next unless defined($seqno_signature);
my $K_signature_closing = $K_closing_container->{$seqno_signature};
next unless defined($K_signature_closing);
my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
next
unless ( defined($K_test) && $K_test == $K_inner_opening );
# OK, we have arrived at 'io' in the above diagram. We should put
# a limit on the length or complexity of the signature here. There
# is no perfect way to do this, one way is to put a limit on token
# count. For consistency with older versions, we should allow a
# signature with a single variable to weld, but not with
# multiple variables. A single variable as in 'sub ($code) {' can
# have a $Kdiff of 2 to 4, depending on spacing.
# But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
# 7, depending on spacing. So to keep formatting consistent with
# previous versions, we will also avoid welding if there is a comma
# in the signature.
my $Kdiff = $K_signature_closing - $K_io_check;
next if ( $Kdiff > 4 );
my $saw_comma;
foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) {
if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last }
}
next if ($saw_comma);
}
# Yes .. this is a possible nesting pair.
# They can be separated by a small amount.
my $K_diff = $K_inner_opening - $K_outer_opening;
# Count nonblank characters separating them.
if ( $K_diff < 0 ) { next } # Shouldn't happen
my $Kn = $K_outer_opening;
my $nonblank_count = 0;
my $type;
my $is_name;
# Here is an example of a long identifier chain which counts as a
# single nonblank here (this spans about 10 K indexes):
# if ( !Boucherot::SetOfConnections->new->handler->execute(
# ^--K_o_o ^--K_i_o
# @array) )
my $Kn_first = $K_outer_opening;
for (
my $Kn = $K_outer_opening + 1 ;
$Kn <= $K_inner_opening ;
$Kn += 1
)
{
next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
if ( !$nonblank_count ) { $Kn_first = $Kn }
if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
# skip chain of identifier tokens
my $last_type = $type;
my $last_is_name = $is_name;
$type = $rLL->[$Kn]->[_TYPE_];
$is_name = $is_name_type->{$type};
next if ( $is_name && $last_is_name );
$nonblank_count++;
last if ( $nonblank_count > 2 );
}
if (
# adjacent opening containers, like: do {{
$nonblank_count == 1
# short item following opening paren, like: fun( yyy (
|| ( $nonblank_count == 2
&& $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
# anonymous sub + prototype or sig: )->then( sub ($code) {
# ... but it seems best not to stack two structural blocks, like
# this
# sub make_anon_with_my_sub { sub {
# because it probably hides the structure a little too much.
|| ( $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] eq 'sub'
&& $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
&& !$rLL->[$K_outer_opening]->[_BLOCK_TYPE_] )
)
{
push @nested_pairs,
[ $inner_seqno, $outer_seqno, $K_inner_closing ];
}
next;
}
# The weld routine expects the pairs in order in the form
# [$seqno_inner, $seqno_outer]
# And they must be in the same order as the inner closing tokens
# (otherwise, welds of three or more adjacent tokens will not work). The K
# value of this inner closing token has temporarily been stored for
# sorting.
@nested_pairs =
# Drop the K index after sorting (it would cause trouble downstream)
map { [ $_->[0], $_->[1] ] }
# Sort on the K values
sort { $a->[2] <=> $b->[2] } @nested_pairs;
return \@nested_pairs;
}
sub is_excluded_weld {
# decide if this weld is excluded by user request
my ( $self, $KK, $is_leading ) = @_;
my $rLL = $self->[_rLL_];
my $rtoken_vars = $rLL->[$KK];
my $token = $rtoken_vars->[_TOKEN_];
my $rflags = $weld_nested_exclusion_rules{$token};
return 0 unless ( defined($rflags) );
my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
return 0 unless ( defined($flag) );
return 1 if $flag eq '*';
my ( $is_f, $is_k, $is_w );
my $Kp = $self->K_previous_nonblank($KK);
if ( defined($Kp) ) {
my $type_p = $rLL->[$Kp]->[_TYPE_];
my $token_p = $rLL->[$Kp]->[_TOKEN_];
# keyword?
$is_k = $type_p eq 'k';
# function call? Use the same definition as used for
# the parameter 'space-function-paren'
$is_f =
$type_p =~ /^[wUG]$/
|| $type_p eq '->'
|| $type_p =~ /^[wi]$/ && $token_p =~ /^(\&|->)/;
# either keyword or function call?
$is_w = $is_k || $is_f;
}
my $match;
if ( $flag eq 'k' ) { $match = $is_k }
elsif ( $flag eq 'K' ) { $match = !$is_k }
elsif ( $flag eq 'f' ) { $match = $is_f }
elsif ( $flag eq 'F' ) { $match = !$is_f }
elsif ( $flag eq 'w' ) { $match = $is_w }
elsif ( $flag eq 'W' ) { $match = !$is_w }
return $match;
}
sub weld_nested_containers {
my ($self) = @_;
# Called once per file for option '--weld-nested-containers'
my $rweld_len_left_closing = $self->[_rweld_len_left_closing_];
my $rweld_len_left_opening = $self->[_rweld_len_left_opening_];
my $rweld_len_right_closing = $self->[_rweld_len_right_closing_];
my $rweld_len_right_opening = $self->[_rweld_len_right_opening_];
# This routine implements the -wn flag by "welding together"
# the nested closing and opening tokens which were previously
# identified by sub 'find_nested_pairs'. "welding" simply
# involves setting certain hash values which will be checked
# later during formatting.
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
# Find nested pairs of container tokens for any welding.
my $rnested_pairs = $self->find_nested_pairs();
# Return unless there are nested pairs to weld
return unless defined($rnested_pairs) && @{$rnested_pairs};
# This array will hold the sequence numbers of the tokens to be welded.
my @welds;
# Variables needed for estimating line lengths
my $starting_indent;
my $starting_lentot;
# A tolerance to the length for length estimates. In some rare cases
# this can avoid problems where a final weld slightly exceeds the
# line length and gets broken in a bad spot.
my $length_tol = 1;
my $excess_length_to_K = sub {
my ($K) = @_;
# Estimate the length from the line start to a given token
my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
my $excess_length =
$starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
return ($excess_length);
};
my $length_to_opening_seqno = sub {
my ($seqno) = @_;
my $KK = $K_opening_container->{$seqno};
my $lentot = defined($KK)
&& $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
return $lentot;
};
my $length_to_closing_seqno = sub {
my ($seqno) = @_;
my $KK = $K_closing_container->{$seqno};
my $lentot = defined($KK)
&& $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
return $lentot;
};
# Abbreviations:
# _oo=outer opening, i.e. first of { {
# _io=inner opening, i.e. second of { {
# _oc=outer closing, i.e. second of } {
# _ic=inner closing, i.e. first of } }
my $previous_pair;
# Main loop over nested pairs...
# We are working from outermost to innermost pairs so that
# level changes will be complete when we arrive at the inner pairs.
while ( my $item = pop( @{$rnested_pairs} ) ) {
my ( $inner_seqno, $outer_seqno ) = @{$item};
my $Kouter_opening = $K_opening_container->{$outer_seqno};
my $Kinner_opening = $K_opening_container->{$inner_seqno};
my $Kouter_closing = $K_closing_container->{$outer_seqno};
my $Kinner_closing = $K_closing_container->{$inner_seqno};
my $outer_opening = $rLL->[$Kouter_opening];
my $inner_opening = $rLL->[$Kinner_opening];
my $outer_closing = $rLL->[$Kouter_closing];
my $inner_closing = $rLL->[$Kinner_closing];
my $iline_oo = $outer_opening->[_LINE_INDEX_];
my $iline_io = $inner_opening->[_LINE_INDEX_];
my $iline_ic = $inner_closing->[_LINE_INDEX_];
# Set flag saying if this pair starts a new weld
my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
# Set flag saying if this pair is adjacent to the previous nesting pair
# (even if previous pair was rejected as a weld)
my $touch_previous_pair =
defined($previous_pair) && $outer_seqno == $previous_pair->[0];
$previous_pair = $item;
# Set a flag if we should not weld. It sometimes looks best not to weld
# when the opening and closing tokens are very close. However, there
# is a danger that we will create a "blinker", which oscillates between
# two semi-stable states, if we do not weld. So the rules for
# not welding have to be carefully defined and tested.
my $do_not_weld;
if ( !$touch_previous_pair ) {
# If this pair is not adjacent to the previous pair (skipped or
# not), then measure lengths from the start of line of oo
my $rK_range = $rlines->[$iline_oo]->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
$starting_lentot =
$Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
$starting_indent = 0;
if ( !$rOpts_variable_maximum_line_length ) {
my $level = $rLL->[$Kfirst]->[_LEVEL_];
$starting_indent = $rOpts_indent_columns * $level;
}
# DO-NOT-WELD RULE 1:
# Do not weld something that looks like the start of a two-line
# function call, like this: <<snippets/wn6.in>>
# $trans->add_transformation(
# PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
# We will look for a semicolon after the closing paren.
# We want to weld something complex, like this though
# my $compass = uc( opposite_direction( line_to_canvas_direction(
# @{ $coords[0] }, @{ $coords[1] } ) ) );
# Otherwise we will get a 'blinker'. For example, the following
# would become a blinker without this rule:
# $Self->_Add( $SortOrderDisplay{ $Field
# ->GenerateFieldForSelectSQL() } );
# But it is okay to weld a two-line statement if it looks like
# it was already welded, meaning that the two opening containers are
# on a different line that the two closing containers. This is
# necessary to prevent blinking of something like this with
# perltidy -wn -pbp (starting indentation two levels deep):
# $top_label->set_text( gettext(
# "Unable to create personal directory - check permissions.") );
my $iline_oc = $outer_closing->[_LINE_INDEX_];
my $token_oo = $outer_opening->[_TOKEN_];
if ( $iline_oc == $iline_oo + 1
&& $iline_io == $iline_ic
&& $token_oo eq '(' )
{
# Look for following semicolon...
my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
my $next_nonblank_type =
defined($Knext_nonblank)
? $rLL->[$Knext_nonblank]->[_TYPE_]
: 'b';
if ( $next_nonblank_type eq ';' ) {
# Then do not weld if no other containers between inner
# opening and closing.
my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
if ( $Knext_seq_item == $Kinner_closing ) {
$do_not_weld ||= 1;
}
}
}
}
# DO-NOT-WELD RULE 2:
# Do not weld an opening paren to an inner one line brace block
# We will just use old line numbers for this test and require
# iterations if necessary for convergence
# For example, otherwise we could cause the opening paren
# in the following example to separate from the caller name
# as here:
# $_[0]->code_handler
# ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
# Here is another example where we do not want to weld:
# $wrapped->add_around_modifier(
# sub { push @tracelog => 'around 1'; $_[0]->(); } );
# If the one line sub block gets broken due to length or by the
# user, then we can weld. The result will then be:
# $wrapped->add_around_modifier( sub {
# push @tracelog => 'around 1';
# $_[0]->();
# } );
if ( $iline_ic == $iline_io ) {
my $token_oo = $outer_opening->[_TOKEN_];
$do_not_weld ||= $token_oo eq '(';
}
# DO-NOT-WELD RULE 3:
# Do not weld if this makes our line too long
$do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0;
# DO-NOT-WELD RULE 4; implemented for git#10:
# Do not weld an opening -ce brace if the next container is on a single
# line, different from the opening brace. (This is very rare). For
# example, given the following with -ce, we will avoid joining the {
# and [
# } else {
# [ $_, length($_) ]
# }
# because this would produce a terminal one-line block:
# } else { [ $_, length($_) ] }
# which may not be what is desired. But given this input:
# } else { [ $_, length($_) ] }
# then we will do the weld and retain the one-line block
if ( $rOpts->{'cuddled-else'} ) {
my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
my $io_line = $inner_opening->[_LINE_INDEX_];
my $ic_line = $inner_closing->[_LINE_INDEX_];
my $oo_line = $outer_opening->[_LINE_INDEX_];
$do_not_weld ||=
( $oo_line < $io_line && $ic_line == $io_line );
}
}
# DO-NOT-WELD RULE 5: do not include welds excluded by user
if ( !$do_not_weld && %weld_nested_exclusion_rules ) {
$do_not_weld ||=
$self->is_excluded_weld( $Kouter_opening, $starting_new_weld );
$do_not_weld ||= $self->is_excluded_weld( $Kinner_opening, 0 );
}
if ($do_not_weld) {
# After neglecting a pair, we start measuring from start of point io
$starting_lentot =
$self->cumulative_length_before_K($Kinner_opening);
$starting_indent = 0;
if ( !$rOpts_variable_maximum_line_length ) {
my $level = $inner_opening->[_LEVEL_];
$starting_indent = $rOpts_indent_columns * $level;
}
# Normally, a broken pair should not decrease indentation of
# intermediate tokens:
## if ( $last_pair_broken ) { next }
# However, for long strings of welded tokens, such as '{{{{{{...'
# we will allow broken pairs to also remove indentation.
# This will keep very long strings of opening and closing
# braces from marching off to the right. We will do this if the
# number of tokens in a weld before the broken weld is 4 or more.
# This rule will mainly be needed for test scripts, since typical
# welds have fewer than about 4 welded tokens.
if ( !@welds || @{ $welds[-1] } < 4 ) { next }
}
# otherwise start new weld ...
elsif ($starting_new_weld) {
push @welds, $item;
}
# ... or extend current weld
else {
unshift @{ $welds[-1] }, $inner_seqno;
}
# After welding, reduce the indentation level if all intermediate tokens
my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
if ( $dlevel != 0 ) {
my $Kstart = $Kinner_opening;
my $Kstop = $Kinner_closing;
for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
$rLL->[$KK]->[_LEVEL_] += $dlevel;
}
}
}
# Define weld lengths needed later to set line breaks
foreach my $item (@welds) {
# sweep from inner to outer
my $inner_seqno;
my $len_close = 0;
my $len_open = 0;
foreach my $outer_seqno ( @{$item} ) {
if ($inner_seqno) {
my $dlen_opening =
$length_to_opening_seqno->($inner_seqno) -
$length_to_opening_seqno->($outer_seqno);
my $dlen_closing =
$length_to_closing_seqno->($outer_seqno) -
$length_to_closing_seqno->($inner_seqno);
$len_open += $dlen_opening;
$len_close += $dlen_closing;
}
$rweld_len_left_closing->{$outer_seqno} = $len_close;
$rweld_len_right_opening->{$outer_seqno} = $len_open;
$inner_seqno = $outer_seqno;
}
# sweep from outer to inner
foreach my $seqno ( reverse @{$item} ) {
$rweld_len_right_closing->{$seqno} =
$len_close - $rweld_len_left_closing->{$seqno};
$rweld_len_left_opening->{$seqno} =
$len_open - $rweld_len_right_opening->{$seqno};
}
}
#####################################
# DEBUG
#####################################
if (0) {
my $count = 0;
local $" = ')(';
foreach my $weld (@welds) {
print "\nWeld number $count has seq: (@{$weld})\n";
foreach my $seq ( @{$weld} ) {
print <<EOM;
seq=$seq
left_opening=$rweld_len_left_opening->{$seq};
right_opening=$rweld_len_right_opening->{$seq};
left_closing=$rweld_len_left_closing->{$seq};
right_closing=$rweld_len_right_closing->{$seq};
EOM
}
$count++;
}
}
return;
}
sub weld_nested_quotes {
# Called once per file for option '--weld-nested-containers'. This
# does welding on qw quotes.
my $self = shift;
# See if quotes are excluded from welding
my $rflags = $weld_nested_exclusion_rules{'q'};
return if ( defined($rflags) && defined( $rflags->[1] ) );
my $rweld_len_left_closing = $self->[_rweld_len_left_closing_];
my $rweld_len_right_opening = $self->[_rweld_len_right_opening_];
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $Num = @{$rLL};
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rlines = $self->[_rlines_];
my $is_single_quote = sub {
my ( $Kbeg, $Kend, $quote_type ) = @_;
foreach my $K ( $Kbeg .. $Kend ) {
my $test_type = $rLL->[$K]->[_TYPE_];
next if ( $test_type eq 'b' );
return if ( $test_type ne $quote_type );
}
return 1;
};
my $excess_line_length_K = sub {
my ( $KK, $Ktest ) = @_;
# what is the excess length if we add token $Ktest to the line with $KK?
my $iline = $rLL->[$KK]->[_LINE_INDEX_];
my $rK_range = $rlines->[$iline]->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
my $starting_lentot =
$Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
my $starting_indent = 0;
my $length_tol = 1;
if ( !$rOpts_variable_maximum_line_length ) {
my $level = $rLL->[$Kfirst]->[_LEVEL_];
$starting_indent = $rOpts_indent_columns * $level;
}
my $length = $rLL->[$Ktest]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
my $excess_length =
$starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
return $excess_length;
};
# look for single qw quotes nested in containers
my $KNEXT = $self->[_K_first_seq_item_];
while ( defined($KNEXT) ) {
my $KK = $KNEXT;
$KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $rtoken_vars = $rLL->[$KK];
my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
if ( !$outer_seqno ) {
next if ( $KK == 0 ); # first token in file may not be container
# A fault here implies that an error was made in the little loop at
# the bottom of sub 'respace_tokens' which set the values of
# _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
# loop control lines above.
Fault("sequence = $outer_seqno not defined at K=$KK");
}
my $token = $rtoken_vars->[_TOKEN_];
if ( $is_opening_token{$token} ) {
# see if the next token is a quote of some type
my $Kn = $KK + 1;
$Kn += 1
if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
next unless ( $Kn < $Num );
my $next_token = $rLL->[$Kn]->[_TOKEN_];
my $next_type = $rLL->[$Kn]->[_TYPE_];
next
unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
&& $next_token =~ /^q/ );
# The token before the closing container must also be a quote
my $K_closing = $K_closing_container->{$outer_seqno};
my $Kt_end = $self->K_previous_nonblank($K_closing);
next unless $rLL->[$Kt_end]->[_TYPE_] eq $next_type;
# Do not weld to single-line quotes. Nothing is gained, and it may
# look bad.
next if ( $Kt_end == $Kn );
# Only weld to quotes delimited with container tokens. This is
# because welding to arbitrary quote delimiters can produce code
# which is less readable than without welding.
my $closing_delimiter = substr( $rLL->[$Kt_end]->[_TOKEN_], -1, 1 );
next
unless ( $is_closing_token{$closing_delimiter}
|| $closing_delimiter eq '>' );
# Now make sure that there is just a single quote in the container
next
unless ( $is_single_quote->( $Kn + 1, $Kt_end - 1, $next_type ) );
# If welded, the line must not exceed allowed line length
# Assume old line breaks for this estimate.
next if ( $excess_line_length_K->( $KK, $Kn ) > 0 );
# Check weld exclusion rules for outer container
my $is_leading = !$self->[_rweld_len_left_opening_]->{$outer_seqno};
next if ( $self->is_excluded_weld( $KK, $is_leading ) );
# OK to weld
# FIXME: Are these always correct?
$rweld_len_left_closing->{$outer_seqno} = 1;
$rweld_len_right_opening->{$outer_seqno} = 2;
# Undo one indentation level if an extra level was added to this
# multiline quote
my $qw_seqno = $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kn};
if ( $qw_seqno
&& $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
{
foreach my $K ( $Kn + 1 .. $Kt_end - 1 ) {
$rLL->[$K]->[_LEVEL_] -= 1;
}
$rLL->[$Kn]->[_CI_LEVEL_] = 0;
$rLL->[$Kt_end]->[_CI_LEVEL_] = 0;
}
# undo CI for other welded quotes
else {
foreach my $K ( $Kn .. $Kt_end ) {
$rLL->[$K]->[_CI_LEVEL_] = 0;
}
}
# Change the level of a closing qw token to be that of the outer
# containing token. This will allow -lp indentation to function
# correctly in the vertical aligner.
$rLL->[$Kt_end]->[_LEVEL_] = $rLL->[$K_closing]->[_LEVEL_];
}
}
return;
}
sub weld_len_left {
my ( $self, $seqno, $type_or_tok ) = @_;
# Given the sequence number of a token, and the token or its type,
# return the length of any weld to its left
# quick check
return 0
unless ( $total_weld_count
&& $seqno
&& $self->[_ris_welded_seqno_]->{$seqno} );
my $weld_len;
if ( $is_closing_type{$type_or_tok} ) {
$weld_len = $self->[_rweld_len_left_closing_]->{$seqno};
}
elsif ( $is_opening_type{$type_or_tok} ) {
$weld_len = $self->[_rweld_len_left_opening_]->{$seqno};
}
$weld_len = 0 unless ( defined($weld_len) );
return $weld_len;
}
sub weld_len_right {
my ( $self, $seqno, $type_or_tok ) = @_;
# Given the sequence number of a token, and the token or its type,
# return the length of any weld to its right
# quick check
return 0
unless ( $total_weld_count
&& $seqno
&& $self->[_ris_welded_seqno_]->{$seqno} );
my $weld_len;
if ( $is_closing_type{$type_or_tok} ) {
$weld_len = $self->[_rweld_len_right_closing_]->{$seqno};
}
elsif ( $is_opening_type{$type_or_tok} ) {
$weld_len = $self->[_rweld_len_right_opening_]->{$seqno};
}
$weld_len = 0 unless ( defined($weld_len) );
return $weld_len;
}
sub weld_len_right_to_go {
my ( $self, $i ) = @_;
# Given the index of a token in the 'to_go' array return the length of any
# weld to its right.
# Back up at a blank.
return 0 unless ( $total_weld_count && $i >= 0 );
if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
my $seqno = $type_sequence_to_go[$i];
return 0 unless ( $seqno && $self->[_ris_welded_seqno_]->{$seqno} );
my $weld_len;
my $type_or_tok = $types_to_go[$i];
if ( $is_closing_type{$type_or_tok} ) {
$weld_len = $self->[_rweld_len_right_closing_]->{$seqno};
}
elsif ( $is_opening_type{$type_or_tok} ) {
$weld_len = $self->[_rweld_len_right_opening_]->{$seqno};
}
$weld_len = 0 unless ( defined($weld_len) );
return $weld_len;
}
sub mark_short_nested_blocks {
# This routine looks at the entire file and marks any short nested blocks
# which should not be broken. The results are stored in the hash
# $rshort_nested->{$type_sequence}
# which will be true if the container should remain intact.
#
# For example, consider the following line:
# sub cxt_two { sort { $a <=> $b } test_if_list() }
# The 'sort' block is short and nested within an outer sub block.
# Normally, the existance of the 'sort' block will force the sub block to
# break open, but this is not always desirable. Here we will set a flag for
# the sort block to prevent this. To give the user control, we will
# follow the input file formatting. If either of the blocks is broken in
# the input file then we will allow it to remain broken. Otherwise we will
# set a flag to keep it together in later formatting steps.
# The flag which is set here will be checked in two places:
# 'sub process_line_of_CODE' and 'sub starting_one_line_block'
my $self = shift;
return if $rOpts->{'indent-only'};
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
return unless ( $rOpts->{'one-line-block-nesting'} );
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rbreak_container = $self->[_rbreak_container_];
my $rshort_nested = $self->[_rshort_nested_];
my $rlines = $self->[_rlines_];
# Variables needed for estimating line lengths
my $starting_indent;
my $starting_lentot;
my $length_tol = 1;
my $excess_length_to_K = sub {
my ($K) = @_;
# Estimate the length from the line start to a given token
my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
my $excess_length =
$starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
return ($excess_length);
};
my $is_broken_block = sub {
# a block is broken if the input line numbers of the braces differ
my ($seqno) = @_;
my $K_opening = $K_opening_container->{$seqno};
return unless ( defined($K_opening) );
my $K_closing = $K_closing_container->{$seqno};
return unless ( defined($K_closing) );
return $rbreak_container->{$seqno}
|| $rLL->[$K_closing]->[_LINE_INDEX_] !=
$rLL->[$K_opening]->[_LINE_INDEX_];
};
# loop over all containers
my @open_block_stack;
my $iline = -1;
my $KNEXT = $self->[_K_first_seq_item_];
while ( defined($KNEXT) ) {
my $KK = $KNEXT;
$KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $rtoken_vars = $rLL->[$KK];
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
if ( !$type_sequence ) {
next if ( $KK == 0 ); # first token in file may not be container
# A fault here implies that an error was made in the little loop at
# the bottom of sub 'respace_tokens' which set the values of
# _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
# loop control lines above.
Fault("sequence = $type_sequence not defined at K=$KK");
}
# We are just looking at code blocks
my $token = $rtoken_vars->[_TOKEN_];
my $type = $rtoken_vars->[_TYPE_];
next unless ( $type eq $token );
my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
next unless ($block_type);
# Keep a stack of all acceptable block braces seen.
# Only consider blocks entirely on one line so dump the stack when line
# changes.
my $iline_last = $iline;
$iline = $rLL->[$KK]->[_LINE_INDEX_];
if ( $iline != $iline_last ) { @open_block_stack = () }
if ( $token eq '}' ) {
if (@open_block_stack) { pop @open_block_stack }
}
next unless ( $token eq '{' );
# block must be balanced (bad scripts may be unbalanced)
my $K_opening = $K_opening_container->{$type_sequence};
my $K_closing = $K_closing_container->{$type_sequence};
next unless ( defined($K_opening) && defined($K_closing) );
# require that this block be entirely on one line
next if ( $is_broken_block->($type_sequence) );
# See if this block fits on one line of allowed length (which may
# be different from the input script)
$starting_lentot =
$KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
$starting_indent = 0;
if ( !$rOpts_variable_maximum_line_length ) {
my $level = $rLL->[$KK]->[_LEVEL_];
$starting_indent = $rOpts_indent_columns * $level;
}
# Dump the stack if block is too long and skip this block
if ( $excess_length_to_K->($K_closing) > 0 ) {
@open_block_stack = ();
next;
}
# OK, Block passes tests, remember it
push @open_block_stack, $type_sequence;
# We are only marking nested code blocks,
# so check for a previous block on the stack
next unless ( @open_block_stack > 1 );
# Looks OK, mark this as a short nested block
$rshort_nested->{$type_sequence} = 1;
}
return;
}
sub adjust_indentation_levels {
my ($self) = @_;
# Called once per file to do special indentation adjustments.
# These routines adjust levels either by changing _CI_LEVEL_ directly or
# by setting modified levels in the array $self->[_radjusted_levels_].
# Initialize the adjusted levels. These will be the levels actually used
# for computing indentation.
# NOTE: This routine is called after the weld routines, which may have
# already adjusted _LEVEL_, so we are making adjustments on top of those
# levels. It would be much nicer to have the weld routines also use this
# adjustment, but that gets complicated when we combine -gnu -wn and have
# some welded quotes.
my $radjusted_levels = $self->[_radjusted_levels_];
my $rLL = $self->[_rLL_];
foreach my $KK ( 0 .. @{$rLL} - 1 ) {
$radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
}
# First set adjusted levels for any non-indenting braces.
$self->non_indenting_braces();
# Adjust indentation for list containers
$self->adjust_container_indentation();
# Set adjusted levels for the whitespace cycle option.
$self->whitespace_cycle_adjustment();
# Adjust continuation indentation if -bli is set
$self->bli_adjustment();
$self->extended_ci()
if ( $rOpts->{'extended-continuation-indentation'} );
# Now clip any adjusted levels to be non-negative
$self->clip_adjusted_levels();
return;
}
sub clip_adjusted_levels {
# Replace any negative adjusted levels with zero.
# Negative levels can occur in files with brace errors.
my ($self) = @_;
my $radjusted_levels = $self->[_radjusted_levels_];
return unless defined($radjusted_levels) && @{$radjusted_levels};
foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
return;
}
sub non_indenting_braces {
# Called once per file to handle the --non-indenting-braces parameter.
# Remove indentation within marked braces if requested
my ($self) = @_;
return unless ( $rOpts->{'non-indenting-braces'} );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
my $radjusted_levels = $self->[_radjusted_levels_];
my $Kmax = @{$rLL} - 1;
my @seqno_stack;
my $is_non_indenting_brace = sub {
my ($KK) = @_;
# looking for an opening block brace
my $token = $rLL->[$KK]->[_TOKEN_];
my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
return unless ( $token eq '{' && $block_type );
# followed by a comment
my $K_sc = $KK + 1;
$K_sc += 1
if ( $K_sc <= $Kmax && $rLL->[$K_sc]->[_TYPE_] eq 'b' );
return unless ( $K_sc <= $Kmax );
my $type_sc = $rLL->[$K_sc]->[_TYPE_];
return unless ( $type_sc eq '#' );
# on the same line
my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
my $line_index_sc = $rLL->[$K_sc]->[_LINE_INDEX_];
return unless ( $line_index_sc == $line_index );
# get the side comment text
my $token_sc = $rLL->[$K_sc]->[_TOKEN_];
# The pattern ends in \s but we have removed the newline, so
# we added it back for the match. That way we require an exact
# match to the special string and also allow additional text.
$token_sc .= "\n";
my $is_nib = ( $token_sc =~ /$non_indenting_brace_pattern/ );
if ($is_nib) { $rspecial_side_comment_type->{$K_sc} = 'NIB' }
return $is_nib;
};
foreach my $KK ( 0 .. $Kmax ) {
my $num = @seqno_stack;
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
if ($seqno) {
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $token eq '{' && $is_non_indenting_brace->($KK) ) {
push @seqno_stack, $seqno;
}
if ( $token eq '}' && @seqno_stack && $seqno_stack[-1] == $seqno ) {
pop @seqno_stack;
$num -= 1;
}
}
next unless $num;
$radjusted_levels->[$KK] -= $num;
}
return;
}
sub whitespace_cycle_adjustment {
my $self = shift;
# Called once per file to implement the --whitespace-cycle option
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $radjusted_levels = $self->[_radjusted_levels_];
my $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
my $Kmax = @{$rLL} - 1;
my $whitespace_last_level = -1;
my @whitespace_level_stack = ();
my $last_nonblank_type = 'b';
my $last_nonblank_token = '';
foreach my $KK ( 0 .. $Kmax ) {
my $level_abs = $radjusted_levels->[$KK];
my $level = $level_abs;
if ( $level_abs < $whitespace_last_level ) {
pop(@whitespace_level_stack);
}
if ( !@whitespace_level_stack ) {
push @whitespace_level_stack, $level_abs;
}
elsif ( $level_abs > $whitespace_last_level ) {
$level = $whitespace_level_stack[-1] +
( $level_abs - $whitespace_last_level );
if (
# 1 Try to break at a block brace
(
$level > $rOpts_whitespace_cycle
&& $last_nonblank_type eq '{'
&& $last_nonblank_token eq '{'
)
# 2 Then either a brace or bracket
|| ( $level > $rOpts_whitespace_cycle + 1
&& $last_nonblank_token =~ /^[\{\[]$/ )
# 3 Then a paren too
|| $level > $rOpts_whitespace_cycle + 2
)
{
$level = 1;
}
push @whitespace_level_stack, $level;
}
$level = $whitespace_level_stack[-1];
$radjusted_levels->[$KK] = $level;
$whitespace_last_level = $level_abs;
my $type = $rLL->[$KK]->[_TYPE_];
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $type ne 'b' ) {
$last_nonblank_type = $type;
$last_nonblank_token = $token;
}
}
}
return;
}
sub adjust_container_indentation {
# Called once per file to implement the -bbhb* and related flags:
# -bbhbi=n
# -bbsbi=n
# -bbpi=n
# where:
# n=0 default indentation (usually one ci)
# n=1 outdent one ci
# n=2 indent one level (minus one ci)
# n=3 indent one extra ci [This may be dropped]
my ($self) = @_;
return unless %container_indentation_options;
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
# Option 2 needs the following array:
my $radjusted_levels = $self->[_radjusted_levels_];
# Loop over all opening container tokens
my $K_opening_container = $self->[_K_opening_container_];
my $ris_broken_container = $self->[_ris_broken_container_];
foreach my $seqno ( keys %{$K_opening_container} ) {
my $KK = $K_opening_container->{$seqno};
# this routine is not for code block braces
my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
next if ($block_type);
# These flags only apply if the corresponding -bb* flags
# have been set to non-default values
my $rtoken_vars = $rLL->[$KK];
my $token = $rtoken_vars->[_TOKEN_];
my $flag = $container_indentation_options{$token};
next unless ($flag);
# Require previous nonblank to be certain types (= and =>)
# Note similar coding in sub insert_breaks_before...
my $Kprev = $KK - 1;
next if ( $Kprev < 0 );
my $prev_type = $rLL->[$Kprev]->[_TYPE_];
if ( $prev_type eq 'b' ) {
$Kprev--;
next if ( $Kprev < 0 );
$prev_type = $rLL->[$Kprev]->[_TYPE_];
}
next unless ( $is_equal_or_fat_comma{$prev_type} );
# This is only for list containers
next unless $self->is_list_by_seqno($seqno);
# and only for broken lists
next unless $ris_broken_container->{$seqno};
# NOTE: We are adjusting indentation of the opening container. The
# closing container will normally follow the indentation of the opening
# container automatically, so this is not currently done.
my $ci = $rLL->[$KK]->[_CI_LEVEL_];
next unless ($ci);
# option 1: outdent
if ( $flag == 1 ) {
$ci -= 1;
}
# option 2: indent one level
elsif ( $flag == 2 ) {
$ci -= 1;
$radjusted_levels->[$KK] += 1;
}
# option 3: for testing only, probably will be deleted
elsif ( $flag == 3 ) {
$ci += 1;
}
$rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
}
return;
}
sub extended_ci {
# This routine implements the -xci (--extended-continuation-indentation)
# flag. We add CI to interior tokens of a container which itself has CI but
# only if a token does not already have CI.
# To do this, we will locate opening tokens which themselves have
# continuation indentation (CI). We track them with their sequence
# numbers. These sequence numbers are called 'controlling sequence
# numbers'. They apply continuation indentation to the tokens that they
# contain. These inner tokens remember their controlling sequence numbers.
# Later, when these inner tokens are output, they have to see if the output
# lines with their controlling tokens were output with CI or not. If not,
# then they must remove their CI too.
# The controlling CI concept works hierarchically. But CI itself is not
# hierarchical; it is either on or off. There are some rare instances where
# it would be best to have hierarchical CI too, but not enough to be worth
# the programming effort.
# The operations to remove unwanted CI are done in sub 'undo_ci'.
my ($self) = @_;
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
# Loop over all opening container tokens
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $ris_broken_container = $self->[_ris_broken_container_];
my @seqno_stack;
my $seqno_top;
my $KLAST;
my $KNEXT = $self->[_K_first_seq_item_];
while ( defined($KNEXT) ) {
# Fix all tokens up to the next sequence item if we are changing CI
if ($seqno_top) {
my $count = 0;
for ( my $Kt = $KLAST + 1 ; $Kt < $KNEXT ; $Kt++ ) {
if ( !$rLL->[$Kt]->[_CI_LEVEL_] ) {
$rLL->[$Kt]->[_CI_LEVEL_] = 1;
$rseqno_controlling_my_ci->{$Kt} = $seqno_top;
$count++;
}
}
$ris_seqno_controlling_ci->{$seqno_top} += $count;
}
$KLAST = $KNEXT;
my $KK = $KNEXT;
$KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
my $K_opening = $K_opening_container->{$seqno};
# see if we have reached the end of the current controlling container
if ( $seqno_top && $seqno == $seqno_top ) {
$seqno_top = pop @seqno_stack;
}
# Patch to fix some block types...
# Certain block types arrive from the tokenizer without CI but should
# have it for this option. These include anonymous subs and
# do sort map grep eval
my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
if ( $block_type && $is_block_with_ci{$block_type} ) {
$rLL->[$KK]->[_CI_LEVEL_] = 1;
if ($seqno_top) {
$rseqno_controlling_my_ci->{$KK} = $seqno_top;
$ris_seqno_controlling_ci->{$seqno_top}++;
}
}
# If this does not have ci, update ci if necessary and continue looking
if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
if ($seqno_top) {
$rLL->[$KK]->[_CI_LEVEL_] = 1;
$rseqno_controlling_my_ci->{$KK} = $seqno_top;
$ris_seqno_controlling_ci->{$seqno_top}++;
}
next;
}
# We are looking for opening container tokens with ci
next unless ( defined($K_opening) && $KK == $K_opening );
# Make sure there is a corresponding closing container
# (could be missing if the script has a brace error)
my $K_closing = $K_closing_container->{$seqno};
next unless defined($K_closing);
# Require different input lines. This will filter out a large number
# of small hash braces and array brackets. If we accidentally filter
# out an important container, it will get fixed on the next pass.
next
if (
$rLL->[$K_opening]->[_LINE_INDEX_] ==
$rLL->[$K_closing]->[_LINE_INDEX_]
&& ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
$rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
$rOpts_maximum_line_length )
);
# This becomes the next controlling container
push @seqno_stack, $seqno_top if ($seqno_top);
$seqno_top = $seqno;
}
return;
}
sub bli_adjustment {
# Called once per file to implement the --brace-left-and-indent option.
# If -bli is set, adds one continuation indentation for certain braces
my $self = shift;
return unless ( $rOpts->{'brace-left-and-indent'} );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $ris_bli_container = $self->[_ris_bli_container_];
my $K_opening_container = $self->[_K_opening_container_];
my $KNEXT = $self->[_K_first_seq_item_];
while ( defined($KNEXT) ) {
my $KK = $KNEXT;
$KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
if ( $block_type && $block_type =~ /$bli_pattern/ ) {
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
my $K_opening = $K_opening_container->{$seqno};
if ( defined($K_opening) ) {
if ( $KK eq $K_opening ) {
$rLL->[$KK]->[_CI_LEVEL_]++;
$ris_bli_container->{$seqno} = 1;
}
else {
$rLL->[$KK]->[_CI_LEVEL_] =
$rLL->[$K_opening]->[_CI_LEVEL_];
}
}
}
}
return;
}
sub find_multiline_qw {
my $self = shift;
# Multiline qw quotes are not sequenced items like containers { [ (
# but behave in some respects in a similar way. So this routine finds them
# and creates a separate sequence number system for later use.
# This is straightforward because they always begin at the end of one line
# and and at the beginning of a later line. This is true no matter how we
# finally make our line breaks, so we can find them before deciding on new
# line breaks.
my $rstarting_multiline_qw_seqno_by_K = {};
my $rending_multiline_qw_seqno_by_K = {};
my $rKrange_multiline_qw_by_seqno = {};
my $rcontains_multiline_qw_by_seqno = {};
my $rmultiline_qw_has_extra_level = {};
my $rlines = $self->[_rlines_];
my $rLL = $self->[_rLL_];
my $qw_seqno;
my $num_qw_seqno = 0;
my $K_start_multiline_qw;
foreach my $line_of_tokens ( @{$rlines} ) {
my $line_type = $line_of_tokens->{_line_type};
next unless ( $line_type eq 'CODE' );
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
if ( defined($K_start_multiline_qw) ) {
my $type = $rLL->[$Kfirst]->[_TYPE_];
# shouldn't happen
if ( $type ne 'q' ) {
DEVEL_MODE && print STDERR <<EOM;
STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
EOM
$K_start_multiline_qw = undef;
next;
}
my $Kprev = $self->K_previous_nonblank($Kfirst);
my $Knext = $self->K_next_nonblank($Kfirst);
my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
if ( $type_m eq 'q' && $type_p ne 'q' ) {
$rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
$rKrange_multiline_qw_by_seqno->{$qw_seqno} =
[ $K_start_multiline_qw, $Kfirst ];
$K_start_multiline_qw = undef;
$qw_seqno = undef;
}
}
if ( !defined($K_start_multiline_qw)
&& $rLL->[$Klast]->[_TYPE_] eq 'q' )
{
my $Kprev = $self->K_previous_nonblank($Klast);
my $Knext = $self->K_next_nonblank($Klast);
my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
if ( $type_m ne 'q' && $type_p eq 'q' ) {
$num_qw_seqno++;
$qw_seqno = 'q' . $num_qw_seqno;
$K_start_multiline_qw = $Klast;
$rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
}
}
}
# Give multiline qw lists extra indentation instead of CI. This option
# works well but is currently only activated when the -xci flag is set.
# The reason is to avoid unexpected changes in formatting.
if ( $rOpts->{'extended-continuation-indentation'} ) {
while ( my ( $qw_seqno, $rKrange ) =
each %{$rKrange_multiline_qw_by_seqno} )
{
my ( $Kbeg, $Kend ) = @{$rKrange};
# require isolated closing token
my $token_end = $rLL->[$Kend]->[_TOKEN_];
next
unless ( length($token_end) == 1
&& ( $is_closing_token{$token_end} || $token_end eq '>' ) );
# require isolated opening token
my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
# allow space(s) after the qw
if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) eq ' ' ) {
$token_beg =~ s/\s+//;
}
next unless ( length($token_beg) == 3 );
foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
$rLL->[$KK]->[_LEVEL_]++;
$rLL->[$KK]->[_CI_LEVEL_] = 0;
}
# set flag for -wn option, which will remove the level
$rmultiline_qw_has_extra_level->{$qw_seqno} = 1;
}
}
# For the -lp option we need to mark all parent containers of
# multiline quotes
if ($rOpts_line_up_parentheses) {
while ( my ( $qw_seqno, $rKrange ) =
each %{$rKrange_multiline_qw_by_seqno} )
{
my ( $Kbeg, $Kend ) = @{$rKrange};
my $parent_seqno = $self->parent_seqno_by_K($Kend);
next unless ($parent_seqno);
# If the parent container exactly surrounds this qw, then -lp
# formatting seems to work so we will not mark it.
my $is_tightly_contained;
my $Kn = $self->K_next_nonblank($Kend);
my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
my $Kp = $self->K_previous_nonblank($Kbeg);
my $seqno_p =
defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
$is_tightly_contained = 1;
}
}
$rcontains_multiline_qw_by_seqno->{$parent_seqno} = 1
unless ($is_tightly_contained);
# continue up the tree marking parent containers
while (1) {
$parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
last
unless ( defined($parent_seqno)
&& $parent_seqno ne SEQ_ROOT );
$rcontains_multiline_qw_by_seqno->{$parent_seqno} = 1;
}
}
}
$self->[_rstarting_multiline_qw_seqno_by_K_] =
$rstarting_multiline_qw_seqno_by_K;
$self->[_rending_multiline_qw_seqno_by_K_] =
$rending_multiline_qw_seqno_by_K;
$self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
$self->[_rcontains_multiline_qw_by_seqno_] =
$rcontains_multiline_qw_by_seqno;
$self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
return;
}
######################################
# CODE SECTION 6: Process line-by-line
######################################
sub process_all_lines {
# Main loop over all lines of a file.
# Lines are processed according to type.
my $self = shift;
my $rlines = $self->[_rlines_];
my $sink_object = $self->[_sink_object_];
my $fh_tee = $self->[_fh_tee_];
my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
my $file_writer_object = $self->[_file_writer_object_];
my $logger_object = $self->[_logger_object_];
my $vertical_aligner_object = $self->[_vertical_aligner_object_];
my $save_logfile = $self->[_save_logfile_];
# Note for RT#118553, leave only one newline at the end of a file.
# Example code to do this is in comments below:
# my $Opt_trim_ending_blank_lines = 0;
# if ($Opt_trim_ending_blank_lines) {
# while ( my $line_of_tokens = pop @{$rlines} ) {
# my $line_type = $line_of_tokens->{_line_type};
# if ( $line_type eq 'CODE' ) {
# my $CODE_type = $line_of_tokens->{_code_type};
# next if ( $CODE_type eq 'BL' );
# }
# push @{$rlines}, $line_of_tokens;
# last;
# }
# }
# But while this would be a trivial update, it would have very undesirable
# side effects when perltidy is run from within an editor on a small snippet.
# So this is best done with a separate filter, such
# as 'delete_ending_blank_lines.pl' in the examples folder.
# Flag to prevent blank lines when POD occurs in a format skipping sect.
my $in_format_skipping_section;
# set locations for blanks around long runs of keywords
my $rwant_blank_line_after = $self->keyword_group_scan();
my $line_type = "";
my $i = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
$i++;
# insert blank lines requested for keyword sequences
if ( $i > 0
&& defined( $rwant_blank_line_after->{ $i - 1 } )
&& $rwant_blank_line_after->{ $i - 1 } == 1 )
{
$self->want_blank_line();
}
my $last_line_type = $line_type;
$line_type = $line_of_tokens->{_line_type};
my $input_line = $line_of_tokens->{_line_text};
# _line_type codes are:
# SYSTEM - system-specific code before hash-bang line
# CODE - line of perl code (including comments)
# POD_START - line starting pod, such as '=head'
# POD - pod documentation text
# POD_END - last line of pod section, '=cut'
# HERE - text of here-document
# HERE_END - last line of here-doc (target word)
# FORMAT - format section
# FORMAT_END - last line of format section, '.'
# DATA_START - __DATA__ line
# DATA - unidentified text following __DATA__
# END_START - __END__ line
# END - unidentified text following __END__
# ERROR - we are in big trouble, probably not a perl script
# put a blank line after an =cut which comes before __END__ and __DATA__
# (required by podchecker)
if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
$file_writer_object->reset_consecutive_blank_lines();
if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
$self->want_blank_line();
}
}
# handle line of code..
if ( $line_type eq 'CODE' ) {
my $CODE_type = $line_of_tokens->{_code_type};
$in_format_skipping_section = $CODE_type eq 'FS';
# Handle blank lines
if ( $CODE_type eq 'BL' ) {
# If keep-old-blank-lines is zero, we delete all
# old blank lines and let the blank line rules generate any
# needed blanks.
# and delete lines requested by the keyword-group logic
my $kgb_keep = !( defined( $rwant_blank_line_after->{$i} )
&& $rwant_blank_line_after->{$i} == 2 );
# But: the keep-old-blank-lines flag has priority over kgb flags
$kgb_keep = 1 if ( $rOpts_keep_old_blank_lines == 2 );
if ( $rOpts_keep_old_blank_lines && $kgb_keep ) {
$self->flush($CODE_type);
$file_writer_object->write_blank_code_line(
$rOpts_keep_old_blank_lines == 2 );
$self->[_last_line_leading_type_] = 'b';
}
next;
}
else {
# Let logger see all non-blank lines of code. This is a slow operation
# so we avoid it if it is not going to be saved.
if ( $save_logfile && $logger_object ) {
$logger_object->black_box( $line_of_tokens,
$vertical_aligner_object->get_output_line_number );
}
}
# Handle Format Skipping (FS) and Verbatim (VB) Lines
if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
$self->write_unindented_line("$input_line");
$file_writer_object->reset_consecutive_blank_lines();
next;
}
# Handle all other lines of code
$self->process_line_of_CODE($line_of_tokens);
}
# handle line of non-code..
else {
# set special flags
my $skip_line = 0;
if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
# Pod docs should have a preceding blank line. But stay
# out of __END__ and __DATA__ sections, because
# the user may be using this section for any purpose whatsoever
if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
if ( !$skip_line
&& !$in_format_skipping_section
&& $line_type eq 'POD_START'
&& !$self->[_saw_END_or_DATA_] )
{
$self->want_blank_line();
}
}
# leave the blank counters in a predictable state
# after __END__ or __DATA__
elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
$file_writer_object->reset_consecutive_blank_lines();
$self->[_saw_END_or_DATA_] = 1;
}
# write unindented non-code line
if ( !$skip_line ) {
$self->write_unindented_line($input_line);
}
}
}
return;
} ## end sub process_all_lines
sub keyword_group_scan {
my $self = shift;
# Called once per file to process the --keyword-group-blanks-* parameters.
# Manipulate blank lines around keyword groups (kgb* flags)
# Scan all lines looking for runs of consecutive lines beginning with
# selected keywords. Example keywords are 'my', 'our', 'local', ... but
# they may be anything. We will set flags requesting that blanks be
# inserted around and within them according to input parameters. Note
# that we are scanning the lines as they came in in the input stream, so
# they are not necessarily well formatted.
# The output of this sub is a return hash ref whose keys are the indexes of
# lines after which we desire a blank line. For line index i:
# $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
# $rhash_of_desires->{$i} = 2 means we want blank line $i removed
my $rhash_of_desires = {};
my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
# A range of sizes can be input with decimal notation like 'min.max' with
# any number of dots between the two numbers. Examples:
# string => min max matches
# 1.1 1 1 exactly 1
# 1.3 1 3 1,2, or 3
# 1..3 1 3 1,2, or 3
# 5 5 - 5 or more
# 6. 6 - 6 or more
# .2 - 2 up to 2
# 1.0 1 0 nothing
my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
|| $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
{
Warn(<<EOM);
Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
ignoring all -kgb flags
EOM
# Turn this option off so that this message does not keep repeating
# during iterations and other files.
$rOpts->{'keyword-group-blanks-size'} = "";
return $rhash_of_desires;
}
$Opt_size_min = 1 unless ($Opt_size_min);
if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
return $rhash_of_desires;
}
# codes for $Opt_blanks_before and $Opt_blanks_after:
# 0 = never (delete if exist)
# 1 = stable (keep unchanged)
# 2 = always (insert if missing)
return $rhash_of_desires
unless $Opt_size_min > 0
&& ( $Opt_blanks_before != 1
|| $Opt_blanks_after != 1
|| $Opt_blanks_inside
|| $Opt_blanks_delete );
my $Opt_pattern = $keyword_group_list_pattern;
my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
my $Opt_repeat_count =
$rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
my $rlines = $self->[_rlines_];
my $rLL = $self->[_rLL_];
my $K_closing_container = $self->[_K_closing_container_];
# variables for the current group and subgroups:
my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
@subgroup );
# Definitions:
# ($ibeg, $iend) = starting and ending line indexes of this entire group
# $count = total number of keywords seen in this entire group
# $level_beg = indententation level of this group
# @group = [ $i, $token, $count ] =list of all keywords & blanks
# @subgroup = $j, index of group where token changes
# @iblanks = line indexes of blank lines in input stream in this group
# where i=starting line index
# token (the keyword)
# count = number of this token in this subgroup
# j = index in group where token changes
#
# These vars will contain values for the most recently seen line:
my ( $line_type, $CODE_type, $K_first, $K_last );
my $number_of_groups_seen = 0;
####################
# helper subroutines
####################
my $insert_blank_after = sub {
my ($i) = @_;
$rhash_of_desires->{$i} = 1;
my $ip = $i + 1;
if ( defined( $rhash_of_desires->{$ip} )
&& $rhash_of_desires->{$ip} == 2 )
{
$rhash_of_desires->{$ip} = 0;
}
return;
};
my $split_into_sub_groups = sub {
# place blanks around long sub-groups of keywords
# ...if requested
return unless ($Opt_blanks_inside);
# loop over sub-groups, index k
push @subgroup, scalar @group;
my $kbeg = 1;
my $kend = @subgroup - 1;
for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
# index j runs through all keywords found
my $j_b = $subgroup[ $k - 1 ];
my $j_e = $subgroup[$k] - 1;
# index i is the actual line number of a keyword
my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
my $num = $count_e - $count_b + 1;
# This subgroup runs from line $ib to line $ie-1, but may contain
# blank lines
if ( $num >= $Opt_size_min ) {
# if there are blank lines, we require that at least $num lines
# be non-blank up to the boundary with the next subgroup.
my $nog_b = my $nog_e = 1;
if ( @iblanks && !$Opt_blanks_delete ) {
my $j_bb = $j_b + $num - 1;
my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
$nog_b = $count_bb - $count_b + 1 == $num;
my $j_ee = $j_e - ( $num - 1 );
my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
$nog_e = $count_e - $count_ee + 1 == $num;
}
if ( $nog_b && $k > $kbeg ) {
$insert_blank_after->( $i_b - 1 );
}
if ( $nog_e && $k < $kend ) {
my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
$insert_blank_after->( $i_ep - 1 );
}
}
}
};
my $delete_if_blank = sub {
my ($i) = @_;
# delete line $i if it is blank
return unless ( $i >= 0 && $i < @{$rlines} );
my $line_type = $rlines->[$i]->{_line_type};
return if ( $line_type ne 'CODE' );
my $code_type = $rlines->[$i]->{_code_type};
if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
return;
};
my $delete_inner_blank_lines = sub {
# always remove unwanted trailing blank lines from our list
return unless (@iblanks);
while ( my $ibl = pop(@iblanks) ) {
if ( $ibl < $iend ) { push @iblanks, $ibl; last }
$iend = $ibl;
}
# now mark mark interior blank lines for deletion if requested
return unless ($Opt_blanks_delete);
while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
};
my $end_group = sub {
# end a group of keywords
my ($bad_ending) = @_;
if ( defined($ibeg) && $ibeg >= 0 ) {
# then handle sufficiently large groups
if ( $count >= $Opt_size_min ) {
$number_of_groups_seen++;
# do any blank deletions regardless of the count
$delete_inner_blank_lines->();
if ( $ibeg > 0 ) {
my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
# patch for hash bang line which is not currently marked as
# a comment; mark it as a comment
if ( $ibeg == 1 && !$code_type ) {
my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
$code_type = 'BC'
if ( $line_text && $line_text =~ /^#/ );
}
# Do not insert a blank after a comment
# (this could be subject to a flag in the future)
if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
if ( $Opt_blanks_before == INSERT ) {
$insert_blank_after->( $ibeg - 1 );
}
elsif ( $Opt_blanks_before == DELETE ) {
$delete_if_blank->( $ibeg - 1 );
}
}
}
# We will only put blanks before code lines. We could loosen
# this rule a little, but we have to be very careful because
# for example we certainly don't want to drop a blank line
# after a line like this:
# my $var = <<EOM;
if ( $line_type eq 'CODE' && defined($K_first) ) {
# - Do not put a blank before a line of different level
# - Do not put a blank line if we ended the search badly
# - Do not put a blank at the end of the file
# - Do not put a blank line before a hanging side comment
my $level = $rLL->[$K_first]->[_LEVEL_];
my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
if ( $level == $level_beg
&& $ci_level == 0
&& !$bad_ending
&& $iend < @{$rlines}
&& $CODE_type ne 'HSC' )
{
if ( $Opt_blanks_after == INSERT ) {
$insert_blank_after->($iend);
}
elsif ( $Opt_blanks_after == DELETE ) {
$delete_if_blank->( $iend + 1 );
}
}
}
}
$split_into_sub_groups->();
}
# reset for another group
$ibeg = -1;
$iend = undef;
$level_beg = -1;
$K_closing = undef;
@group = ();
@subgroup = ();
@iblanks = ();
};
my $find_container_end = sub {
# If the keyword lines ends with an open token, find the closing token
# '$K_closing' so that we can easily skip past the contents of the
# container.
return if ( $K_last <= $K_first );
my $KK = $K_last;
my $type_last = $rLL->[$KK]->[_TYPE_];
my $tok_last = $rLL->[$KK]->[_TOKEN_];
if ( $type_last eq '#' ) {
$KK = $self->K_previous_nonblank($KK);
$tok_last = $rLL->[$KK]->[_TOKEN_];
}
if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
my $lev = $rLL->[$KK]->[_LEVEL_];
if ( $lev == $level_beg ) {
$K_closing = $K_closing_container->{$type_sequence};
}
}
};
my $add_to_group = sub {
my ( $i, $token, $level ) = @_;
# End the previous group if we have reached the maximum
# group size
if ( $Opt_size_max && @group >= $Opt_size_max ) {
$end_group->();
}
if ( @group == 0 ) {
$ibeg = $i;
$level_beg = $level;
$count = 0;
}
$count++;
$iend = $i;
# New sub-group?
if ( !@group || $token ne $group[-1]->[1] ) {
push @subgroup, scalar(@group);
}
push @group, [ $i, $token, $count ];
# remember if this line ends in an open container
$find_container_end->();
return;
};
###################################
# loop over all lines of the source
###################################
$end_group->();
my $i = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
$i++;
last
if ( $Opt_repeat_count > 0
&& $number_of_groups_seen >= $Opt_repeat_count );
$CODE_type = "";
$K_first = undef;
$K_last = undef;
$line_type = $line_of_tokens->{_line_type};
# always end a group at non-CODE
if ( $line_type ne 'CODE' ) { $end_group->(); next }
$CODE_type = $line_of_tokens->{_code_type};
# end any group at a format skipping line
if ( $CODE_type && $CODE_type eq 'FS' ) {
$end_group->();
next;
}
# continue in a verbatim (VB) type; it may be quoted text
if ( $CODE_type eq 'VB' ) {
if ( $ibeg >= 0 ) { $iend = $i; }
next;
}
# and continue in blank (BL) types
if ( $CODE_type eq 'BL' ) {
if ( $ibeg >= 0 ) {
$iend = $i;
push @{iblanks}, $i;
# propagate current subgroup token
my $tok = $group[-1]->[1];
push @group, [ $i, $tok, $count ];
}
next;
}
# examine the first token of this line
my $rK_range = $line_of_tokens->{_rK_range};
( $K_first, $K_last ) = @{$rK_range};
if ( !defined($K_first) ) {
# Somewhat unexpected blank line..
# $rK_range is normally defined for line type CODE, but this can
# happen for example if the input line was a single semicolon which
# is being deleted. In that case there was code in the input
# file but it is not being retained. So we can silently return.
return $rhash_of_desires;
}
my $level = $rLL->[$K_first]->[_LEVEL_];
my $type = $rLL->[$K_first]->[_TYPE_];
my $token = $rLL->[$K_first]->[_TOKEN_];
my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
# see if this is a code type we seek (i.e. comment)
if ( $CODE_type
&& $Opt_comment_pattern
&& $CODE_type =~ /$Opt_comment_pattern/ )
{
my $tok = $CODE_type;
# Continuing a group
if ( $ibeg >= 0 && $level == $level_beg ) {
$add_to_group->( $i, $tok, $level );
}
# Start new group
else {
# first end old group if any; we might be starting new
# keywords at different level
if ( $ibeg > 0 ) { $end_group->(); }
$add_to_group->( $i, $tok, $level );
}
next;
}
# See if it is a keyword we seek, but never start a group in a
# continuation line; the code may be badly formatted.
if ( $ci_level == 0
&& $type eq 'k'
&& $token =~ /$Opt_pattern/ )
{
# Continuing a keyword group
if ( $ibeg >= 0 && $level == $level_beg ) {
$add_to_group->( $i, $token, $level );
}
# Start new keyword group
else {
# first end old group if any; we might be starting new
# keywords at different level
if ( $ibeg > 0 ) { $end_group->(); }
$add_to_group->( $i, $token, $level );
}
next;
}
# This is not one of our keywords, but we are in a keyword group
# so see if we should continue or quit
elsif ( $ibeg >= 0 ) {
# - bail out on a large level change; we may have walked into a
# data structure or anoymous sub code.
if ( $level > $level_beg + 1 || $level < $level_beg ) {
$end_group->();
next;
}
# - keep going on a continuation line of the same level, since
# it is probably a continuation of our previous keyword,
# - and keep going past hanging side comments because we never
# want to interrupt them.
if ( ( ( $level == $level_beg ) && $ci_level > 0 )
|| $CODE_type eq 'HSC' )
{
$iend = $i;
next;
}
# - continue if if we are within in a container which started with
# the line of the previous keyword.
if ( defined($K_closing) && $K_first <= $K_closing ) {
# continue if entire line is within container
if ( $K_last <= $K_closing ) { $iend = $i; next }
# continue at ); or }; or ];
my $KK = $K_closing + 1;
if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
if ( $KK < $K_last ) {
if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
$end_group->(1);
next;
}
}
$iend = $i;
next;
}
$end_group->(1);
next;
}
# - end the group if none of the above
$end_group->();
next;
}
# not in a keyword group; continue
else { next }
}
# end of loop over all lines
$end_group->();
return $rhash_of_desires;
} ## end sub keyword_group_scan
#######################################
# CODE SECTION 7: Process lines of code
#######################################
{ ## begin closure process_line_of_CODE
# The routines in this closure receive lines of code and combine them into
# 'batches' and send them along. A 'batch' is the unit of code which can be
# processed further as a unit. It has the property that it is the largest
# amount of code into which which perltidy is free to place one or more
# line breaks within it without violating any constraints.
# When a new batch is formed it is sent to sub 'grind_batch_of_code'.
# flags needed by the store routine
my $line_of_tokens;
my $no_internal_newlines;
my $side_comment_follows;
my $CODE_type;
# range of K of tokens for the current line
my ( $K_first, $K_last );
my ( $rLL, $radjusted_levels );
# past stored nonblank tokens
my (
$last_last_nonblank_token, $last_last_nonblank_type,
$last_nonblank_token, $last_nonblank_type,
$last_nonblank_block_type, $K_last_nonblank_code,
$K_last_last_nonblank_code, $looking_for_else,
$is_static_block_comment, $batch_CODE_type,
$last_line_had_side_comment,
);
# Called once at the start of a new file
sub initialize_process_line_of_CODE {
$last_nonblank_token = ';';
$last_nonblank_type = ';';
$last_last_nonblank_token = ';';
$last_last_nonblank_type = ';';
$last_nonblank_block_type = "";
$K_last_nonblank_code = undef;
$K_last_last_nonblank_code = undef;
$looking_for_else = 0;
$is_static_block_comment = 0;
$batch_CODE_type = "";
$last_line_had_side_comment = 0;
return;
}
# Batch variables: these describe the current batch of code being formed
# and sent down the pipeline. They are initialized in the next
# sub.
my ( $rbrace_follower, $index_start_one_line_block,
$semicolons_before_block_self_destruct,
$starting_in_quote, $ending_in_quote, );
# Called before the start of each new batch
sub initialize_batch_variables {
$max_index_to_go = UNDEFINED_INDEX;
@summed_lengths_to_go = @nesting_depth_to_go = (0);
# The initialization code for the remaining batch arrays is as follows
# and can be activated for testing. But profiling shows that it is
# time-consuming to re-initialize the batch arrays and is not necessary
# because the maximum valid token, $max_index_to_go, is carefully
# controlled. This means however that it is not possible to do any
# type of filter or map operation directly on these arrays. And it is
# not possible to use negative indexes. As a precaution against program
# changes which might do this, sub pad_array_to_go adds some undefs at
# the end of the current batch of data.
# So 'long story short': this is a waste of time
0 && do { #<<<
@block_type_to_go = ();
@type_sequence_to_go = ();
@container_environment_to_go = ();
@bond_strength_to_go = ();
@forced_breakpoint_to_go = ();
@token_lengths_to_go = ();
@levels_to_go = ();
@mate_index_to_go = ();
@ci_levels_to_go = ();
@nobreak_to_go = ();
@old_breakpoint_to_go = ();
@tokens_to_go = ();
@K_to_go = ();
@types_to_go = ();
@leading_spaces_to_go = ();
@reduced_spaces_to_go = ();
@inext_to_go = ();
@iprev_to_go = ();
};
$rbrace_follower = undef;
$ending_in_quote = 0;
destroy_one_line_block();
return;
}
sub leading_spaces_to_go {
# return the number of indentation spaces for a token in the output
# stream; these were previously stored by 'set_leading_whitespace'.
my ($ii) = @_;
return 0 if ( $ii < 0 );
my $indentation = $leading_spaces_to_go[$ii];
return ref($indentation) ? $indentation->get_spaces() : $indentation;
}
sub create_one_line_block {
( $index_start_one_line_block, $semicolons_before_block_self_destruct )
= @_;
return;
}
sub destroy_one_line_block {
$index_start_one_line_block = UNDEFINED_INDEX;
$semicolons_before_block_self_destruct = 0;
return;
}
# Routine to place the current token into the output stream.
# Called once per output token.
use constant DEBUG_STORE => 0;
sub store_token_to_go {
my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
# Add one token to the next batch.
# $Ktoken_vars = the index K in the global token array
# $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
# unless they are temporarily being overriden
# NOTE: This routine needs to be coded efficiently because it is called
# once per token. I have gotten it down from the second slowest to the
# eighth slowest, but that still seems rather slow for what it does.
# This closure variable has already been defined, for efficiency:
# my $radjusted_levels = $self->[_radjusted_levels_];
my $type = $rtoken_vars->[_TYPE_];
# Check for emergency flush...
# The K indexes in the batch must always be a continuous sequence of
# the global token array. The batch process programming assumes this.
# If storing this token would cause this relation to fail we must dump
# the current batch before storing the new token. It is extremely rare
# for this to happen. One known example is the following two-line
# snippet when run with parameters
# --noadd-newlines --space-terminal-semicolon:
# if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
# $yy=1;
if ( $max_index_to_go >= 0 ) {
my $Klast = $K_to_go[$max_index_to_go];
if ( $Ktoken_vars != $Klast + 1 ) {
$self->flush_batch_of_CODE();
}
# Do not output consecutive blank tokens ... this should not
# happen, but it is worth checking. Later code can then make the
# simplifying assumption that blank tokens are not consecutive.
elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
return;
}
}
++$max_index_to_go;
$batch_CODE_type = $CODE_type;
$K_to_go[$max_index_to_go] = $Ktoken_vars;
$types_to_go[$max_index_to_go] = $type;
$old_breakpoint_to_go[$max_index_to_go] = 0;
$forced_breakpoint_to_go[$max_index_to_go] = 0;
$mate_index_to_go[$max_index_to_go] = -1;
my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_];
my $ci_level = $ci_levels_to_go[$max_index_to_go] =
$rtoken_vars->[_CI_LEVEL_];
# Clip levels to zero if there are level errors in the file.
# We had to wait until now for reasons explained in sub 'write_line'.
my $level = $rtoken_vars->[_LEVEL_];
if ( $level < 0 ) { $level = 0 }
$levels_to_go[$max_index_to_go] = $level;
$nesting_depth_to_go[$max_index_to_go] = $rtoken_vars->[_SLEVEL_];
$block_type_to_go[$max_index_to_go] = $rtoken_vars->[_BLOCK_TYPE_];
$container_environment_to_go[$max_index_to_go] =
$rtoken_vars->[_CONTAINER_ENVIRONMENT_];
$type_sequence_to_go[$max_index_to_go] =
$rtoken_vars->[_TYPE_SEQUENCE_];
$nobreak_to_go[$max_index_to_go] =
$side_comment_follows ? 2 : $no_internal_newlines;
my $length = $rtoken_vars->[_TOKEN_LENGTH_];
# Safety check that length is defined. Should not be needed now.
# Former patch for indent-only, in which the entire set of tokens is
# turned into type 'q'. Lengths may have not been defined because sub
# 'respace_tokens' is bypassed. We do not need lengths in this case,
# but we will use the character count to have a defined value. In the
# future, it would be nicer to have 'respace_tokens' convert the lines
# to quotes and get correct lengths.
if ( !defined($length) ) { $length = length($token) }
$token_lengths_to_go[$max_index_to_go] = $length;
# We keep a running sum of token lengths from the start of this batch:
# summed_lengths_to_go[$i] = total length to just before token $i
# summed_lengths_to_go[$i+1] = total length to just after token $i
$summed_lengths_to_go[ $max_index_to_go + 1 ] =
$summed_lengths_to_go[$max_index_to_go] + $length;
my $in_continued_quote =
( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote};
if ( $max_index_to_go == 0 ) {
$starting_in_quote = $in_continued_quote;
}
# Define the indentation that this token will have in two cases:
# Without CI = reduced_spaces_to_go
# With CI = leading_spaces_to_go
if ($in_continued_quote) {
$leading_spaces_to_go[$max_index_to_go] = 0;
$reduced_spaces_to_go[$max_index_to_go] = 0;
}
else {
$reduced_spaces_to_go[$max_index_to_go] = my $reduced_spaces =
$rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
$leading_spaces_to_go[$max_index_to_go] =
$reduced_spaces + $rOpts_continuation_indentation * $ci_level;
}
# Correct these values if -lp is used
if ($rOpts_line_up_parentheses) {
$self->set_leading_whitespace( $Ktoken_vars, $K_last_nonblank_code,
$K_last_last_nonblank_code, $level, $ci_level,
$in_continued_quote );
}
DEBUG_STORE && do {
my ( $a, $b, $c ) = caller();
print STDOUT
"STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
};
return;
}
sub flush_batch_of_CODE {
# Finish any batch packaging and call the process routine.
# This must be the only call to grind_batch_of_CODE()
my ($self) = @_;
return unless ( $max_index_to_go >= 0 );
# Create an array to hold variables for this batch
my $this_batch = [];
$this_batch->[_starting_in_quote_] = $starting_in_quote;
$this_batch->[_ending_in_quote_] = $ending_in_quote;
$this_batch->[_max_index_to_go_] = $max_index_to_go;
$this_batch->[_rK_to_go_] = \@K_to_go;
$this_batch->[_batch_CODE_type_] = $batch_CODE_type;
# The flag $is_static_block_comment applies to the line which just
# arrived. So it only applies if we are outputting that line.
$this_batch->[_is_static_block_comment_] =
defined($K_first)
&& $max_index_to_go == 0
&& $K_to_go[0] == $K_first ? $is_static_block_comment : 0;
$self->[_this_batch_] = $this_batch;
$last_line_had_side_comment =
$max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#';
$self->grind_batch_of_CODE();
# Done .. this batch is history
$self->[_this_batch_] = [];
initialize_batch_variables();
initialize_forced_breakpoint_vars();
initialize_gnu_batch_vars()
if $rOpts_line_up_parentheses;
return;
}
sub end_batch {
# end the current batch, EXCEPT for a few special cases
my ($self) = @_;
# Exception 1: Do not end line in a weld
return
if ( $total_weld_count
&& $self->weld_len_right_to_go($max_index_to_go) );
# Exception 2: just set a tentative breakpoint if we might be in a
# one-line block
if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
$self->set_forced_breakpoint($max_index_to_go);
return;
}
$self->flush_batch_of_CODE();
return;
}
sub flush_vertical_aligner {
my ($self) = @_;
my $vao = $self->[_vertical_aligner_object_];
$vao->flush();
return;
}
# flush is called to output any tokens in the pipeline, so that
# an alternate source of lines can be written in the correct order
sub flush {
my ( $self, $CODE_type ) = @_;
# end the current batch with 1 exception
destroy_one_line_block();
# Exception: if we are flushing within the code stream only to insert
# blank line(s), then we can keep the batch intact at a weld. This
# improves formatting of -ce. See test 'ce1.ce'
if ( $CODE_type && $CODE_type eq 'BL' ) { $self->end_batch() }
# otherwise, we have to shut things down completely.
else { $self->flush_batch_of_CODE() }
$self->flush_vertical_aligner();
return;
}
sub process_line_of_CODE {
my ( $self, $my_line_of_tokens ) = @_;
# This routine is called once per INPUT line to process all of the
# tokens on that line.
# It outputs full-line comments and blank lines immediately.
# The tokens are copied one-by-one from the global token array $rLL to
# a set of '_to_go' arrays which collect batches of tokens for a
# further processing via calls to 'sub store_token_to_go', until a well
# defined 'structural' break point* or 'forced' breakpoint* is reached.
# Then, the batch of collected '_to_go' tokens is passed along to 'sub
# grind_batch_of_CODE' for further processing.
# * 'structural' break points are basically line breaks corresponding
# to code blocks. An example is a chain of if-elsif-else statements,
# which should typically be broken at the opening and closing braces.
# * 'forced' break points are breaks required by side comments or by
# special user controls.
# So this routine is just making an initial set of required line
# breaks, basically regardless of the maximum requested line length.
# The subsequent stage of formating make additional line breaks
# appropriate for lists and logical structures, and to keep line
# lengths below the requested maximum line length.
$line_of_tokens = $my_line_of_tokens;
$CODE_type = $line_of_tokens->{_code_type};
my $input_line_number = $line_of_tokens->{_line_number};
my $input_line = $line_of_tokens->{_line_text};
# initialize closure variables
my $rK_range = $line_of_tokens->{_rK_range};
( $K_first, $K_last ) = @{$rK_range};
# remember original starting index in case it changes
my $K_first_true = $K_first;
$rLL = $self->[_rLL_];
$radjusted_levels = $self->[_radjusted_levels_];
my $file_writer_object = $self->[_file_writer_object_];
my $rbreak_container = $self->[_rbreak_container_];
my $rshort_nested = $self->[_rshort_nested_];
my $sink_object = $self->[_sink_object_];
my $fh_tee = $self->[_fh_tee_];
my $ris_bli_container = $self->[_ris_bli_container_];
if ( !defined($K_first) ) {
# Empty line: This can happen if tokens are deleted, for example
# with the -mangle parameter
return;
}
$no_internal_newlines = 0;
if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
$no_internal_newlines = 2;
}
$side_comment_follows = 0;
my $is_comment =
( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
my $is_static_block_comment_without_leading_space =
$CODE_type eq 'SBCX';
$is_static_block_comment =
$CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
my $is_hanging_side_comment = $CODE_type eq 'HSC';
my $is_VERSION_statement = $CODE_type eq 'VER';
if ($is_VERSION_statement) {
$self->[_saw_VERSION_in_this_file_] = 1;
$no_internal_newlines = 2;
}
# Add interline blank if any
my $last_old_nonblank_type = "b";
my $first_new_nonblank_token = "";
if ( $max_index_to_go >= 0 ) {
$last_old_nonblank_type = $types_to_go[$max_index_to_go];
$first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
if ( !$is_comment
&& $types_to_go[$max_index_to_go] ne 'b'
&& $K_first > 0
&& $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
{
$K_first -= 1;
}
}
my $rtok_first = $rLL->[$K_first];
my $in_quote = $line_of_tokens->{_ending_in_quote};
$ending_in_quote = $in_quote;
my $guessed_indentation_level =
$line_of_tokens->{_guessed_indentation_level};
######################################
# Handle a block (full-line) comment..
######################################
if ($is_comment) {
if ( $rOpts->{'delete-block-comments'} ) {
$self->flush();
return;
}
destroy_one_line_block();
$self->end_batch();
# output a blank line before block comments
if (
# unless we follow a blank or comment line
$self->[_last_line_leading_type_] ne '#'
&& $self->[_last_line_leading_type_] ne 'b'
# only if allowed
&& $rOpts->{'blanks-before-comments'}
# if this is NOT an empty comment, unless it follows a side
# comment and could become a hanging side comment.
&& (
$rtok_first->[_TOKEN_] ne '#'
|| ( $last_line_had_side_comment
&& $rLL->[$K_first]->[_LEVEL_] > 0 )
)
# not after a short line ending in an opening token
# because we already have space above this comment.
# Note that the first comment in this if block, after
# the 'if (', does not get a blank line because of this.
&& !$self->[_last_output_short_opening_token_]
# never before static block comments
&& !$is_static_block_comment
)
{
$self->flush(); # switching to new output stream
$file_writer_object->write_blank_code_line();
$self->[_last_line_leading_type_] = 'b';
}
if (
$rOpts->{'indent-block-comments'}
&& ( !$rOpts->{'indent-spaced-block-comments'}
|| $input_line =~ /^\s+/ )
&& !$is_static_block_comment_without_leading_space
)
{
my $Ktoken_vars = $K_first;
my $rtoken_vars = $rLL->[$Ktoken_vars];
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
$self->end_batch();
}
else {
# switching to new output stream
$self->flush();
# Note that last arg in call here is 'undef' for comments
$file_writer_object->write_code_line(
$rtok_first->[_TOKEN_] . "\n", undef );
$self->[_last_line_leading_type_] = '#';
}
return;
}
# compare input/output indentation except for continuation lines
# (because they have an unknown amount of initial blank space)
# and lines which are quotes (because they may have been outdented)
$self->compare_indentation_levels( $K_first, $guessed_indentation_level,
$input_line_number )
unless ( $is_hanging_side_comment
|| $rtok_first->[_CI_LEVEL_] > 0
|| $guessed_indentation_level == 0
&& $rtok_first->[_TYPE_] eq 'Q' );
##########################
# Handle indentation-only
##########################
# NOTE: In previous versions we sent all qw lines out immediately here.
# No longer doing this: also write a line which is entirely a 'qw' list
# to allow stacking of opening and closing tokens. Note that interior
# qw lines will still go out at the end of this routine.
if ( $CODE_type eq 'IO' ) {
$self->flush();
my $line = $input_line;
# Fix for rt #125506 Unexpected string formating
# in which leading space of a terminal quote was removed
$line =~ s/\s+$//;
$line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
my $Ktoken_vars = $K_first;
# We work with a copy of the token variables and change the
# first token to be the entire line as a quote variable
my $rtoken_vars = $rLL->[$Ktoken_vars];
$rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
# Patch: length is not really important here
$rtoken_vars->[_TOKEN_LENGTH_] = length($line);
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
$self->end_batch();
return;
}
############################
# Handle all other lines ...
############################
# If we just saw the end of an elsif block, write nag message
# if we do not see another elseif or an else.
if ($looking_for_else) {
unless ( $rLL->[$K_first]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
write_logfile_entry("(No else block)\n");
}
$looking_for_else = 0;
}
# This is a good place to kill incomplete one-line blocks
if (
(
( $semicolons_before_block_self_destruct == 0 )
&& ( $max_index_to_go >= 0 )
&& ( $last_old_nonblank_type eq ';' )
&& ( $first_new_nonblank_token ne '}' )
)
# Patch for RT #98902. Honor request to break at old commas.
|| ( $rOpts_break_at_old_comma_breakpoints
&& $max_index_to_go >= 0
&& $last_old_nonblank_type eq ',' )
)
{
$forced_breakpoint_to_go[$max_index_to_go] = 1
if ($rOpts_break_at_old_comma_breakpoints);
destroy_one_line_block();
$self->end_batch();
}
# Keep any requested breaks before this line. Note that we have to
# use the original K_first because it may have been reduced above
# to add a blank.
if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
destroy_one_line_block();
$self->end_batch();
}
# loop to process the tokens one-by-one
# We do not want a leading blank if the previous batch just got output
if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
$K_first++;
}
foreach my $Ktoken_vars ( $K_first .. $K_last ) {
my $rtoken_vars = $rLL->[$Ktoken_vars];
my $token = $rtoken_vars->[_TOKEN_];
my $type = $rtoken_vars->[_TYPE_];
my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
# If we are continuing after seeing a right curly brace, flush
# buffer unless we see what we are looking for, as in
# } else ...
if ( $rbrace_follower && $type ne 'b' ) {
unless ( $rbrace_follower->{$token} ) {
$self->end_batch();
}
$rbrace_follower = undef;
}
# Get next nonblank on this line
my $next_nonblank_token = '';
my $next_nonblank_token_type = 'b';
if ( $Ktoken_vars < $K_last ) {
my $Knnb = $Ktoken_vars + 1;
if ( $rLL->[$Knnb]->[_TYPE_] eq 'b'
&& $Knnb < $K_last )
{
$Knnb++;
}
$next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
$next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
}
# Do not allow breaks which would promote a side comment to a
# block comment. In order to allow a break before an opening
# or closing BLOCK, followed by a side comment, those sections
# of code will handle this flag separately.
$side_comment_follows = ( $next_nonblank_token_type eq '#' );
my $is_opening_BLOCK =
( $type eq '{'
&& $token eq '{'
&& $block_type
&& !$rshort_nested->{$type_sequence}
&& $block_type ne 't' );
my $is_closing_BLOCK =
( $type eq '}'
&& $token eq '}'
&& $block_type
&& !$rshort_nested->{$type_sequence}
&& $block_type ne 't' );
if ( $side_comment_follows
&& !$is_opening_BLOCK
&& !$is_closing_BLOCK )
{
$no_internal_newlines = 1;
}
# We're only going to handle breaking for code BLOCKS at this
# (top) level. Other indentation breaks will be handled by
# sub scan_list, which is better suited to dealing with them.
if ($is_opening_BLOCK) {
# Tentatively output this token. This is required before
# calling starting_one_line_block. We may have to unstore
# it, though, if we have to break before it.
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
# Look ahead to see if we might form a one-line block..
my $too_long =
$self->starting_one_line_block( $Ktoken_vars,
$K_last_nonblank_code, $K_last );
$self->clear_breakpoint_undo_stack();
# to simplify the logic below, set a flag to indicate if
# this opening brace is far from the keyword which introduces it
my $keyword_on_same_line = 1;
if (
$max_index_to_go >= 0
&& $last_nonblank_type eq ')'
&& ( ( $rtoken_vars->[_SLEVEL_] < $nesting_depth_to_go[0] )
|| $too_long )
)
{
$keyword_on_same_line = 0;
}
# decide if user requested break before '{'
my $want_break =
# use -bl flag if not a sub block of any type
$block_type !~ /$ANYSUB_PATTERN/
? $rOpts->{'opening-brace-on-new-line'}
# use -sbl flag for a named sub block
: $block_type !~ /$ASUB_PATTERN/
? $rOpts->{'opening-sub-brace-on-new-line'}
# use -asbl flag for an anonymous sub block
: $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
# Break if requested with -bli flag
$want_break ||= $ris_bli_container->{$type_sequence};
# Do not break if this token is welded to the left
if ( $self->weld_len_left( $type_sequence, $token ) ) {
$want_break = 0;
}
# Break before an opening '{' ...
if (
# if requested
$want_break
# and we were unable to start looking for a block,
&& $index_start_one_line_block == UNDEFINED_INDEX
# or if it will not be on same line as its keyword, so that
# it will be outdented (eval.t, overload.t), and the user
# has not insisted on keeping it on the right
|| ( !$keyword_on_same_line
&& !$rOpts->{'opening-brace-always-on-right'} )
)
{
# but only if allowed
unless ($no_internal_newlines) {
# since we already stored this token, we must unstore it
$self->unstore_token_to_go();
# then output the line
$self->end_batch();
# and now store this token at the start of a new line
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
}
}
# Now update for side comment
if ($side_comment_follows) { $no_internal_newlines = 1 }
# now output this line
unless ($no_internal_newlines) {
$self->end_batch();
}
}
elsif ($is_closing_BLOCK) {
# If there is a pending one-line block ..
if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
# we have to terminate it if..
if (
# it is too long (final length may be different from
# initial estimate). note: must allow 1 space for this
# token
$self->excess_line_length( $index_start_one_line_block,
$max_index_to_go ) >= 0
# or if it has too many semicolons
|| ( $semicolons_before_block_self_destruct == 0
&& $last_nonblank_type ne ';' )
)
{
destroy_one_line_block();
}
}
# put a break before this closing curly brace if appropriate
unless ( $no_internal_newlines
|| $index_start_one_line_block != UNDEFINED_INDEX )
{
# write out everything before this closing curly brace
$self->end_batch();
}
# Now update for side comment
if ($side_comment_follows) { $no_internal_newlines = 1 }
# store the closing curly brace
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
# ok, we just stored a closing curly brace. Often, but
# not always, we want to end the line immediately.
# So now we have to check for special cases.
# if this '}' successfully ends a one-line block..
my $is_one_line_block = 0;
my $keep_going = 0;
if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
# Remember the type of token just before the
# opening brace. It would be more general to use
# a stack, but this will work for one-line blocks.
$is_one_line_block =
$types_to_go[$index_start_one_line_block];
# we have to actually make it by removing tentative
# breaks that were set within it
$self->undo_forced_breakpoint_stack(0);
$self->set_nobreaks( $index_start_one_line_block,
$max_index_to_go - 1 );
# then re-initialize for the next one-line block
destroy_one_line_block();
# then decide if we want to break after the '}' ..
# We will keep going to allow certain brace followers as in:
# do { $ifclosed = 1; last } unless $losing;
#
# But make a line break if the curly ends a
# significant block:
if (
(
$is_block_without_semicolon{$block_type}
# Follow users break point for
# one line block types U & G, such as a 'try' block
|| $is_one_line_block =~ /^[UG]$/
&& $Ktoken_vars == $K_last
)
# if needless semicolon follows we handle it later
&& $next_nonblank_token ne ';'
)
{
$self->end_batch()
unless ($no_internal_newlines);
}
}
# set string indicating what we need to look for brace follower
# tokens
if ( $block_type eq 'do' ) {
$rbrace_follower = \%is_do_follower;
if ( $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
)
{
$rbrace_follower = { ')' => 1 };
}
}
elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
$rbrace_follower = \%is_if_brace_follower;
}
elsif ( $block_type eq 'else' ) {
$rbrace_follower = \%is_else_brace_follower;
}
# added eval for borris.t
elsif ($is_sort_map_grep_eval{$block_type}
|| $is_one_line_block eq 'G' )
{
$rbrace_follower = undef;
$keep_going = 1;
}
# anonymous sub
elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
if ($is_one_line_block) {
$rbrace_follower = \%is_anon_sub_1_brace_follower;
}
else {
$rbrace_follower = \%is_anon_sub_brace_follower;
}
}
# None of the above: specify what can follow a closing
# brace of a block which is not an
# if/elsif/else/do/sort/map/grep/eval
# Testfiles:
# 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
else {
$rbrace_follower = \%is_other_brace_follower;
}
# See if an elsif block is followed by another elsif or else;
# complain if not.
if ( $block_type eq 'elsif' ) {
if ( $next_nonblank_token_type eq 'b' ) { # end of line?
$looking_for_else = 1; # ok, check on next line
}
else {
unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
write_logfile_entry("No else block :(\n");
}
}
}
# keep going after certain block types (map,sort,grep,eval)
# added eval for borris.t
if ($keep_going) {
# keep going
}
# if no more tokens, postpone decision until re-entring
elsif ( ( $next_nonblank_token_type eq 'b' )
&& $rOpts_add_newlines )
{
unless ($rbrace_follower) {
$self->end_batch()
unless ($no_internal_newlines);
}
}
elsif ($rbrace_follower) {
unless ( $rbrace_follower->{$next_nonblank_token} ) {
$self->end_batch()
unless ($no_internal_newlines);
}
$rbrace_follower = undef;
}
else {
$self->end_batch()
unless ($no_internal_newlines);
}
} # end treatment of closing block token
# handle semicolon
elsif ( $type eq ';' ) {
my $break_before_semicolon = ( $Ktoken_vars == $K_first )
&& $rOpts_break_at_old_semicolon_breakpoints;
# kill one-line blocks with too many semicolons
$semicolons_before_block_self_destruct--;
if (
$break_before_semicolon
|| ( $semicolons_before_block_self_destruct < 0 )
|| ( $semicolons_before_block_self_destruct == 0
&& $next_nonblank_token_type !~ /^[b\}]$/ )
)
{
destroy_one_line_block();
$self->end_batch() if ($break_before_semicolon);
}
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
$self->end_batch()
unless (
$no_internal_newlines
|| ( $rOpts_keep_interior_semicolons
&& $Ktoken_vars < $K_last )
|| ( $next_nonblank_token eq '}' )
);
}
# handle here_doc target string
elsif ( $type eq 'h' ) {
# no newlines after seeing here-target
$no_internal_newlines = 2;
destroy_one_line_block();
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
}
# handle all other token types
else {
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
}
# remember two previous nonblank OUTPUT tokens
if ( $type ne '#' && $type ne 'b' ) {
$last_last_nonblank_token = $last_nonblank_token;
$last_last_nonblank_type = $last_nonblank_type;
$last_nonblank_token = $token;
$last_nonblank_type = $type;
$last_nonblank_block_type = $block_type;
$K_last_last_nonblank_code = $K_last_nonblank_code;
$K_last_nonblank_code = $Ktoken_vars;
}
} # end of loop over all tokens in this 'line_of_tokens'
my $type = $rLL->[$K_last]->[_TYPE_];
# we have to flush ..
if (
# if there is a side comment...
$type eq '#'
# if this line ends in a quote
# NOTE: This is critically important for insuring that quoted lines
# do not get processed by things like -sot and -sct
|| $in_quote
# if this is a VERSION statement
|| $is_VERSION_statement
# to keep a label at the end of a line
|| $type eq 'J'
# if we are instructed to keep all old line breaks
|| !$rOpts->{'delete-old-newlines'}
# we have a request to keep a break after this line
|| $self->[_rbreak_after_Klast_]->{$K_last}
# if this is a line of the form 'use overload'. A break here
# in the input file is a good break because it will allow
# the operators which follow to be formatted well. Without
# this break the formatting with -ci=4 -xci is poor, for example.
# use overload
# '+' => sub {
# print length $_[2], "\n";
# my ( $x, $y ) = _order(@_);
# Number::Roman->new( int $x + $y );
# },
# '-' => sub {
# my ( $x, $y ) = _order(@_);
# Number::Roman->new( int $x - $y );
# };
|| ( $max_index_to_go == 2
&& $types_to_go[0] eq 'k'
&& $tokens_to_go[0] eq 'use'
&& $tokens_to_go[$max_index_to_go] eq 'overload' )
)
{
destroy_one_line_block();
$self->end_batch();
}
# mark old line breakpoints in current output stream
if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
my $jobp = $max_index_to_go;
if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
{
$jobp--;
}
$old_breakpoint_to_go[$jobp] = 1;
}
return;
} ## end sub process_line_of_CODE
} ## end closure process_line_of_CODE
sub tight_paren_follows {
my ( $self, $K_to_go_0, $K_ic ) = @_;
# Input parameters:
# $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
# $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
# Return parameter:
# false if we want a break after the closing do brace
# true if we do not want a break after the closing do brace
# We are at the closing brace of a 'do' block. See if this brace is
# followed by a closing paren, and if so, set a flag which indicates
# that we do not want a line break between the '}' and ')'.
# xxxxx ( ...... do { ... } ) {
# ^-------looking at this brace, K_ic
# Subscript notation:
# _i = inner container (braces in this case)
# _o = outer container (parens in this case)
# _io = inner opening = '{'
# _ic = inner closing = '}'
# _oo = outer opening = '('
# _oc = outer closing = ')'
# |--K_oo |--K_oc = outer container
# xxxxx ( ...... do { ...... } ) {
# |--K_io |--K_ic = inner container
# In general, the safe thing to do is return a 'false' value
# if the statement appears to be complex. This will have
# the downstream side-effect of opening up outer containers
# to help make complex code readable. But for simpler
# do blocks it can be preferable to keep the code compact
# by returning a 'true' value.
return unless defined($K_ic);
my $rLL = $self->[_rLL_];
# we should only be called at a closing block
my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
return unless ($seqno_i); # shouldn't happen;
# This only applies if the next nonblank is a ')'
my $K_oc = $self->K_next_nonblank($K_ic);
return unless defined($K_oc);
my $token_next = $rLL->[$K_oc]->[_TOKEN_];
return unless ( $token_next eq ')' );
my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
my $K_io = $self->[_K_opening_container_]->{$seqno_i};
my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
return unless ( defined($K_io) && defined($K_oo) );
# RULE 1: Do not break before a closing signature paren
# (regardless of complexity). This is a fix for issue git#22.
# Looking for something like:
# sub xxx ( ... do { ... } ) {
# ^----- next block_type
my $K_test = $self->K_next_nonblank($K_oc);
if ( defined($K_test) ) {
my $block_type = $rLL->[$K_test]->[_BLOCK_TYPE_];
if ( $block_type
&& $rLL->[$K_test]->[_TYPE_] eq '{'
&& $block_type =~ /$ANYSUB_PATTERN/ )
{
return 1;
}
}
# RULE 2: Break if the contents within braces appears to be 'complex'. We
# base this decision on the number of tokens between braces.
# xxxxx ( ... do { ... } ) {
# ^^^^^^
# Although very simple, it has the advantages of (1) being insensitive to
# changes in lengths of identifier names, (2) easy to understand, implement
# and test. A test case for this is 't/snippets/long_line.in'.
# Example: $K_ic - $K_oo = 9 [Pass Rule 2]
# if ( do { $2 !~ /&/ } ) { ... }
# Example: $K_ic - $K_oo = 10 [Pass Rule 2]
# for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
# Example: $K_ic - $K_oo = 20 [Fail Rule 2]
# test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
return if ( $K_ic - $K_io > 16 );
# RULE 3: break if the code between the opening '(' and the '{' is 'complex'
# As with the previous rule, we decide based on the token count
# xxxxx ( ... do { ... } ) {
# ^^^^^^^^
# Example: $K_ic - $K_oo = 9 [Pass Rule 2]
# $K_io - $K_oo = 4 [Pass Rule 3]
# if ( do { $2 !~ /&/ } ) { ... }
# Example: $K_ic - $K_oo = 10 [Pass rule 2]
# $K_io - $K_oo = 9 [Pass rule 3]
# for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
return if ( $K_io - $K_oo > 9 );
# RULE 4: Break if we have already broken this batch of output tokens
return if ( $K_oo < $K_to_go_0 );
# RULE 5: Break if input is not on one line
# For example, we will set the flag for the following expression
# written in one line:
# This has: $K_ic - $K_oo = 10 [Pass rule 2]
# $K_io - $K_oo = 8 [Pass rule 3]
# $self->debug( 'Error: ' . do { local $/; <$err> } );
# but we break after the brace if it is on multiple lines on input, since
# the user may prefer it on multiple lines:
# [Fail rule 5]
# $self->debug(
# 'Error: ' . do { local $/; <$err> }
# );
if ( !$rOpts_ignore_old_breakpoints ) {
my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
return if ( $iline_oo != $iline_oc );
}
# OK to keep the paren tight
return 1;
}
sub starting_one_line_block {
# after seeing an opening curly brace, look for the closing brace and see
# if the entire block will fit on a line. This routine is not always right
# so a check is made later (at the closing brace) to make sure we really
# have a one-line block. We have to do this preliminary check, though,
# because otherwise we would always break at a semicolon within a one-line
# block if the block contains multiple statements.
my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
my $rbreak_container = $self->[_rbreak_container_];
my $rshort_nested = $self->[_rshort_nested_];
my $rLL = $self->[_rLL_];
my $K_opening_container = $self->[_K_opening_container_];
# kill any current block - we can only go 1 deep
destroy_one_line_block();
# return value:
# 1=distance from start of block to opening brace exceeds line length
# 0=otherwise
my $i_start = 0;
# This routine should not have been called if there are no tokens in the
# 'to_go' arrays of previously stored tokens. A previous call to
# 'store_token_to_go' should have stored an opening brace. An error here
# indicates that a programming change may have caused a flush operation to
# clean out the previously stored tokens.
if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
Fault("program bug: store_token_to_go called incorrectly\n");
}
# return if block should be broken
my $type_sequence = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
if ( $rbreak_container->{$type_sequence} ) {
return 0;
}
my $ris_bli_container = $self->[_ris_bli_container_];
my $is_bli = $ris_bli_container->{$type_sequence};
my $block_type = $rLL->[$Kj]->[_BLOCK_TYPE_];
my $index_max_forced_break = get_index_max_forced_break();
my $previous_nonblank_token = '';
my $i_last_nonblank = -1;
if ( defined($K_last_nonblank) ) {
$i_last_nonblank = $K_last_nonblank - $K_to_go[0];
if ( $i_last_nonblank >= 0 ) {
$previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
}
}
# find the starting keyword for this block (such as 'if', 'else', ...)
if ( $max_index_to_go == 0
|| $block_type =~ /^[\{\}\;\:]$/
|| $block_type =~ /^package/ )
{
$i_start = $max_index_to_go;
}
# the previous nonblank token should start these block types
elsif (
$i_last_nonblank >= 0
&& ( $previous_nonblank_token eq $block_type
|| $block_type =~ /$ANYSUB_PATTERN/
|| $block_type =~ /\(\)/ )
)
{
$i_start = $i_last_nonblank;
# For signatures and extended syntax ...
# If this brace follows a parenthesized list, we should look back to
# find the keyword before the opening paren because otherwise we might
# form a one line block which stays intack, and cause the parenthesized
# expression to break open. That looks bad.
if ( $tokens_to_go[$i_start] eq ')' ) {
# Find the opening paren
my $K_start = $K_to_go[$i_start];
return 0 unless defined($K_start);
my $seqno = $type_sequence_to_go[$i_start];
return 0 unless ($seqno);
my $K_opening = $K_opening_container->{$seqno};
return 0 unless defined($K_opening);
my $i_opening = $i_start + ( $K_opening - $K_start );
# give up if not on this line
return 0 unless ( $i_opening >= 0 );
$i_start = $i_opening; ##$index_max_forced_break + 1;
# go back one token before the opening paren
if ( $i_start > 0 ) { $i_start-- }
if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
my $lev = $levels_to_go[$i_start];
if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 }
}
}
elsif ( $previous_nonblank_token eq ')' ) {
# For something like "if (xxx) {", the keyword "if" will be
# just after the most recent break. This will be 0 unless
# we have just killed a one-line block and are starting another.
# (doif.t)
# Note: cannot use inext_index_to_go[] here because that array
# is still being constructed.
$i_start = $index_max_forced_break + 1;
if ( $types_to_go[$i_start] eq 'b' ) {
$i_start++;
}
# Patch to avoid breaking short blocks defined with extended_syntax:
# Strip off any trailing () which was added in the parser to mark
# the opening keyword. For example, in the following
# create( TypeFoo $e) {$bubba}
# the blocktype would be marked as create()
my $stripped_block_type = $block_type;
$stripped_block_type =~ s/\(\)$//;
unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
return 0;
}
}
# patch for SWITCH/CASE to retain one-line case/when blocks
elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
# Note: cannot use inext_index_to_go[] here because that array
# is still being constructed.
$i_start = $index_max_forced_break + 1;
if ( $types_to_go[$i_start] eq 'b' ) {
$i_start++;
}
unless ( $tokens_to_go[$i_start] eq $block_type ) {
return 0;
}
}
else {
return 1;
}
my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
# see if length is too long to even start
if ( $pos > $maximum_line_length[ $levels_to_go[$i_start] ] ) {
return 1;
}
foreach my $Ki ( $Kj + 1 .. $K_last ) {
# old whitespace could be arbitrarily large, so don't use it
if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
# ignore some small blocks
my $type_sequence = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
my $nobreak = $rshort_nested->{$type_sequence};
# Return false result if we exceed the maximum line length,
if ( $pos > $maximum_line_length[ $levels_to_go[$i_start] ] ) {
return 0;
}
# keep going for non-containers
elsif ( !$type_sequence ) {
}
# return if we encounter another opening brace before finding the
# closing brace.
elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
&& $rLL->[$Ki]->[_TYPE_] eq '{'
&& $rLL->[$Ki]->[_BLOCK_TYPE_]
&& !$nobreak )
{
return 0;
}
# if we find our closing brace..
elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
&& $rLL->[$Ki]->[_TYPE_] eq '}'
&& $rLL->[$Ki]->[_BLOCK_TYPE_]
&& !$nobreak )
{
# be sure any trailing comment also fits on the line
my $Ki_nonblank = $Ki;
if ( $Ki_nonblank < $K_last ) {
$Ki_nonblank++;
if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
&& $Ki_nonblank < $K_last )
{
$Ki_nonblank++;
}
}
# Patch for one-line sort/map/grep/eval blocks with side comments:
# We will ignore the side comment length for sort/map/grep/eval
# because this can lead to statements which change every time
# perltidy is run. Here is an example from Denis Moskowitz which
# oscillates between these two states without this patch:
## --------
## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
## @baz;
##
## grep {
## $_->foo ne 'bar'
## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
## @baz;
## --------
# When the first line is input it gets broken apart by the main
# line break logic in sub process_line_of_CODE.
# When the second line is input it gets recombined by
# process_line_of_CODE and passed to the output routines. The
# output routines (set_continuation_breaks) do not break it apart
# because the bond strengths are set to the highest possible value
# for grep/map/eval/sort blocks, so the first version gets output.
# It would be possible to fix this by changing bond strengths,
# but they are high to prevent errors in older versions of perl.
if ( $Ki < $K_last
&& $rLL->[$Ki_nonblank]->[_TYPE_] eq '#'
&& !$is_sort_map_grep{$block_type} )
{
$pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
if ( $Ki_nonblank > $Ki + 1 ) {
# source whitespace could be anything, assume
# at least one space before the hash on output
if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
$pos += 1;
}
else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
}
if ( $pos >= $maximum_line_length[ $levels_to_go[$i_start] ] ) {
return 0;
}
}
# ok, it's a one-line block
create_one_line_block( $i_start, 20 );
return 0;
}
# just keep going for other characters
else {
}
}
# We haven't hit the closing brace, but there is still space. So the
# question here is, should we keep going to look at more lines in hopes of
# forming a new one-line block, or should we stop right now. The problem
# with continuing is that we will not be able to honor breaks before the
# opening brace if we continue.
# Typically we will want to keep trying to make one-line blocks for things
# like sort/map/grep/eval. But it is not always a good idea to make as
# many one-line blocks as possible, so other types are not done. The user
# can always use -mangle.
# If we want to keep going, we will create a new one-line block.
# The blocks which we can keep going are in a hash, but we never want
# to continue if we are at a '-bli' block.
if ( $want_one_line_block{$block_type} && !$is_bli ) {
create_one_line_block( $i_start, 1 );
}
return 0;
}
sub unstore_token_to_go {
# remove most recent token from output stream
my $self = shift;
if ( $max_index_to_go > 0 ) {
$max_index_to_go--;
}
else {
$max_index_to_go = UNDEFINED_INDEX;
}
return;
}
sub compare_indentation_levels {
# Check to see if output line tabbing agrees with input line
# this can be very useful for debugging a script which has an extra
# or missing brace.
my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
return unless ( defined($K_first) );
my $rLL = $self->[_rLL_];
my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
my $radjusted_levels = $self->[_radjusted_levels_];
if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
$structural_indentation_level = $radjusted_levels->[$K_first];
}
my $is_closing_block = $rLL->[$K_first]->[_TYPE_] eq '}'
&& $rLL->[$K_first]->[_BLOCK_TYPE_];
if ( $guessed_indentation_level ne $structural_indentation_level ) {
$self->[_last_tabbing_disagreement_] = $line_number;
if ($is_closing_block) {
if ( !$self->[_in_brace_tabbing_disagreement_] ) {
$self->[_in_brace_tabbing_disagreement_] = $line_number;
}
if ( !$self->[_first_brace_tabbing_disagreement_] ) {
$self->[_first_brace_tabbing_disagreement_] = $line_number;
}
}
if ( !$self->[_in_tabbing_disagreement_] ) {
$self->[_tabbing_disagreement_count_]++;
if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
write_logfile_entry(
"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
);
}
$self->[_in_tabbing_disagreement_] = $line_number;
$self->[_first_tabbing_disagreement_] = $line_number
unless ( $self->[_first_tabbing_disagreement_] );
}
}
else {
$self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
if ($in_tabbing_disagreement) {
if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
write_logfile_entry(
"End indentation disagreement from input line $in_tabbing_disagreement\n"
);
if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
{
write_logfile_entry(
"No further tabbing disagreements will be noted\n");
}
}
$self->[_in_tabbing_disagreement_] = 0;
}
}
return;
}
###################################################
# CODE SECTION 8: Utilities for setting breakpoints
###################################################
{ ## begin closure set_forced_breakpoint
my $forced_breakpoint_count;
my $forced_breakpoint_undo_count;
my @forced_breakpoint_undo_stack;
my $index_max_forced_break;
# Break before or after certain tokens based on user settings
my %break_before_or_after_token;
BEGIN {
my @q = qw( = . : ? and or xor && || );
push @q, ',';
@break_before_or_after_token{@q} = (1) x scalar(@q);
}
sub initialize_forced_breakpoint_vars {
$forced_breakpoint_count = 0;
$index_max_forced_break = UNDEFINED_INDEX;
$forced_breakpoint_undo_count = 0;
@forced_breakpoint_undo_stack = ();
return;
}
sub get_forced_breakpoint_count {
return $forced_breakpoint_count;
}
sub get_forced_breakpoint_undo_count {
return $forced_breakpoint_undo_count;
}
sub get_index_max_forced_break {
return $index_max_forced_break;
}
sub set_fake_breakpoint {
# Just bump up the breakpoint count as a signal that there are breaks.
# This is useful if we have breaks but may want to postpone deciding
# where to make them.
$forced_breakpoint_count++;
return;
}
use constant DEBUG_FORCE => 0;
sub set_forced_breakpoint {
my ( $self, $i ) = @_;
return unless defined $i && $i >= 0;
# no breaks between welded tokens
return if ( $self->weld_len_right_to_go($i) );
my $token = $tokens_to_go[$i];
# For certain tokens, use user settings to decide if we break before or
# after it
# qw( = . : ? and or xor && || )
if ( $break_before_or_after_token{$token} ) {
if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
}
# breaks are forced before 'if' and 'unless'
elsif ( $is_if_unless{$token} ) { $i-- }
if ( $i >= 0 && $i <= $max_index_to_go ) {
my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
DEBUG_FORCE && do {
my ( $a, $b, $c ) = caller();
print STDOUT
"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
};
######################################################################
# NOTE: if we call set_closing_breakpoint below it will then call
# this routing back. So there is the possibility of an infinite
# loop if a programming error is made. As a precaution, I have
# added a check on the forced_breakpoint flag, so that we won't
# keep trying to set it. That will give additional protection
# against a loop.
######################################################################
if ( $i_nonblank >= 0
&& $nobreak_to_go[$i_nonblank] == 0
&& !$forced_breakpoint_to_go[$i_nonblank] )
{
$forced_breakpoint_to_go[$i_nonblank] = 1;
if ( $i_nonblank > $index_max_forced_break ) {
$index_max_forced_break = $i_nonblank;
}
$forced_breakpoint_count++;
$forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
= $i_nonblank;
# if we break at an opening container..break at the closing
if ( $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
{
$self->set_closing_breakpoint($i_nonblank);
}
}
}
return;
}
sub clear_breakpoint_undo_stack {
my ($self) = @_;
$forced_breakpoint_undo_count = 0;
return;
}
use constant DEBUG_UNDOBP => 0;
sub undo_forced_breakpoint_stack {
my ( $self, $i_start ) = @_;
# Given $i_start, a non-negative index the 'undo stack' of breakpoints,
# remove all breakpoints from the top of the 'undo stack' down to and
# including index $i_start.
# The 'undo stack' is a stack of all breakpoints made for a batch of
# code.
if ( $i_start < 0 ) {
$i_start = 0;
my ( $a, $b, $c ) = caller();
# Bad call, can only be due to a recent programming change.
# Better stop here.
Fault(
"Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
);
}
while ( $forced_breakpoint_undo_count > $i_start ) {
my $i =
$forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
if ( $i >= 0 && $i <= $max_index_to_go ) {
$forced_breakpoint_to_go[$i] = 0;
$forced_breakpoint_count--;
DEBUG_UNDOBP && do {
my ( $a, $b, $c ) = caller();
print STDOUT
"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
};
}
# shouldn't happen, but not a critical error
else {
DEBUG_UNDOBP && do {
my ( $a, $b, $c ) = caller();
print STDOUT
"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
};
}
}
return;
}
} ## end closure set_forced_breakpoint
{ ## begin closure set_closing_breakpoint
my %postponed_breakpoint;
sub initialize_postponed_breakpoint {
%postponed_breakpoint = ();
return;
}
sub has_postponed_breakpoint {
my ($seqno) = @_;
return $postponed_breakpoint{$seqno};
}
sub set_closing_breakpoint {
# set a breakpoint at a matching closing token
my ( $self, $i_break ) = @_;
if ( $mate_index_to_go[$i_break] >= 0 ) {
# CAUTION: infinite recursion possible here:
# set_closing_breakpoint calls set_forced_breakpoint, and
# set_forced_breakpoint call set_closing_breakpoint
# ( test files attrib.t, BasicLyx.pm.html).
# Don't reduce the '2' in the statement below
if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
# break before } ] and ), but sub set_forced_breakpoint will decide
# to break before or after a ? and :
my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
$self->set_forced_breakpoint(
$mate_index_to_go[$i_break] - $inc );
}
}
else {
my $type_sequence = $type_sequence_to_go[$i_break];
if ($type_sequence) {
my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
$postponed_breakpoint{$type_sequence} = 1;
}
}
return;
}
} ## end closure set_closing_breakpoint
#########################################
# CODE SECTION 9: Process batches of code
#########################################
{ ## begin closure grind_batch_of_CODE
# The routines in this closure begin the processing of a 'batch' of code.
# A variable to keep track of consecutive nonblank lines so that we can
# insert occasional blanks
my @nonblank_lines_at_depth;
# A variable to remember maximum size of previous batches; this is needed
# by the logical padding routine
my $peak_batch_size;
my $batch_count;
sub initialize_grind_batch_of_CODE {
@nonblank_lines_at_depth = ();
$peak_batch_size = 0;
$batch_count = 0;
return;
}
# sub grind_batch_of_CODE receives sections of code which are the longest
# possible lines without a break. In other words, it receives what is left
# after applying all breaks forced by blank lines, block comments, side
# comments, pod text, and structural braces. Its job is to break this code
# down into smaller pieces, if necessary, which fit within the maximum
# allowed line length. Then it sends the resulting lines of code on down
# the pipeline to the VerticalAligner package, breaking the code into
# continuation lines as necessary. The batch of tokens are in the "to_go"
# arrays. The name 'grind' is slightly suggestive of a machine continually
# breaking down long lines of code, but mainly it is unique and easy to
# remember and find with an editor search.
# The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
# together in the following way:
# - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
# combines them into the largest sequences of tokens which might form a new
# line.
# - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
# lines.
# So sub 'process_line_of_CODE' builds up the longest possible continouus
# sequences of tokens, regardless of line length, and then
# grind_batch_of_CODE breaks these sequences back down into the new output
# lines.
# Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
use constant DEBUG_GRIND => 0;
sub grind_batch_of_CODE {
my ($self) = @_;
my $file_writer_object = $self->[_file_writer_object_];
my $this_batch = $self->[_this_batch_];
$batch_count++;
my $starting_in_quote = $this_batch->[_starting_in_quote_];
my $ending_in_quote = $this_batch->[_ending_in_quote_];
my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
my $rK_to_go = $this_batch->[_rK_to_go_];
my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
my $rLL = $self->[_rLL_];
# This routine is only called from sub flush_batch_of_code, so that
# routine is a better spot for debugging.
DEBUG_GRIND && do {
my $token = my $type = "";
if ( $max_index_to_go >= 0 ) {
$token = $tokens_to_go[$max_index_to_go];
$type = $types_to_go[$max_index_to_go];
}
my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
print STDERR <<EOM;
grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
$output_str
EOM
};
# Safety check - shouldn't happen. The calling routine must not call
# here unless there are tokens in the batch to be processed. This
# fault can only be triggered by a recent programming change.
if ( $max_index_to_go < 0 ) {
Fault(
"sub grind incorrectly called with max_index_to_go=$max_index_to_go"
);
}
# Initialize some batch variables
my $comma_count_in_batch = 0;
my $ilast_nonblank = -1;
my @colon_list;
my @ix_seqno_controlling_ci;
for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
$bond_strength_to_go[$i] = 0;
$iprev_to_go[$i] = $ilast_nonblank;
$inext_to_go[$i] = $i + 1;
my $type = $types_to_go[$i];
if ( $type ne 'b' ) {
if ( $ilast_nonblank >= 0 ) {
$inext_to_go[$ilast_nonblank] = $i;
# just in case there are two blanks in a row (shouldn't
# happen)
if ( ++$ilast_nonblank < $i ) {
$inext_to_go[$ilast_nonblank] = $i;
}
}
$ilast_nonblank = $i;
# This is a good spot to efficiently collect information needed
# for breaking lines...
if ( $type eq ',' ) { $comma_count_in_batch++; }
# gather info needed by sub set_continuation_breaks
my $seqno = $type_sequence_to_go[$i];
if ($seqno) {
# remember indexes of any tokens controlling xci
# in this batch. This list is needed by sub undo_ci.
if ( $ris_seqno_controlling_ci->{$seqno} ) {
push @ix_seqno_controlling_ci, $i;
}
if ( $type eq '?' ) {
push @colon_list, $type;
}
elsif ( $type eq ':' ) {
push @colon_list, $type;
}
}
}
}
my $comma_arrow_count_contained =
$self->match_opening_and_closing_tokens();
# tell the -lp option we are outputting a batch so it can close
# any unfinished items in its stack
finish_lp_batch();
# If this line ends in a code block brace, set breaks at any
# previous closing code block braces to breakup a chain of code
# blocks on one line. This is very rare but can happen for
# user-defined subs. For example we might be looking at this:
# BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
my $saw_good_break = 0; # flag to force breaks even if short line
if (
# looking for opening or closing block brace
$block_type_to_go[$max_index_to_go]
# never any good breaks if just one token
&& $max_index_to_go > 0
# but not one of these which are never duplicated on a line:
# until|while|for|if|elsif|else
&& !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
}
)
{
my $lev = $nesting_depth_to_go[$max_index_to_go];
# Walk backwards from the end and
# set break at any closing block braces at the same level.
# But quit if we are not in a chain of blocks.
for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
next if ( $levels_to_go[$i] > $lev ); # skip past higher level
if ( $block_type_to_go[$i] ) {
if ( $tokens_to_go[$i] eq '}' ) {
$self->set_forced_breakpoint($i);
$saw_good_break = 1;
}
}
# quit if we see anything besides words, function, blanks
# at this level
elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
}
}
my $imin = 0;
my $imax = $max_index_to_go;
# trim any blank tokens
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
# anything left to write?
if ( $imin <= $imax ) {
my $last_line_leading_type = $self->[_last_line_leading_type_];
my $last_line_leading_level = $self->[_last_line_leading_level_];
my $last_last_line_leading_level =
$self->[_last_last_line_leading_level_];
# add a blank line before certain key types but not after a comment
if ( $last_line_leading_type ne '#' ) {
my $want_blank = 0;
my $leading_token = $tokens_to_go[$imin];
my $leading_type = $types_to_go[$imin];
# blank lines before subs except declarations and one-liners
if ( $leading_type eq 'i' ) {
if ( $leading_token =~ /$SUB_PATTERN/ ) {
$want_blank = $rOpts->{'blank-lines-before-subs'}
if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ );
}
# break before all package declarations
elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
$want_blank = $rOpts->{'blank-lines-before-packages'};
}
}
# break before certain key blocks except one-liners
if ( $leading_type eq 'k' ) {
if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' )
{
$want_blank = $rOpts->{'blank-lines-before-subs'}
if ( terminal_type_i( $imin, $imax ) ne '}' );
}
# Break before certain block types if we haven't had a
# break at this level for a while. This is the
# difficult decision..
elsif ($last_line_leading_type ne 'b'
&& $leading_token =~
/^(unless|if|while|until|for|foreach)$/ )
{
my $lc =
$nonblank_lines_at_depth[$last_line_leading_level];
if ( !defined($lc) ) { $lc = 0 }
# patch for RT #128216: no blank line inserted at a level
# change
if ( $levels_to_go[$imin] != $last_line_leading_level )
{
$lc = 0;
}
$want_blank =
$rOpts->{'blanks-before-blocks'}
&& $lc >= $rOpts->{'long-block-line-count'}
&& $self->consecutive_nonblank_lines() >=
$rOpts->{'long-block-line-count'}
&& terminal_type_i( $imin, $imax ) ne '}';
}
}
# Check for blank lines wanted before a closing brace
if ( $leading_token eq '}' ) {
if ( $rOpts->{'blank-lines-before-closing-block'}
&& $block_type_to_go[$imin]
&& $block_type_to_go[$imin] =~
/$blank_lines_before_closing_block_pattern/ )
{
my $nblanks =
$rOpts->{'blank-lines-before-closing-block'};
if ( $nblanks > $want_blank ) {
$want_blank = $nblanks;
}
}
}
if ($want_blank) {
# future: send blank line down normal path to VerticalAligner
$self->flush_vertical_aligner();
$file_writer_object->require_blank_code_lines($want_blank);
}
}
# update blank line variables and count number of consecutive
# non-blank, non-comment lines at this level
$last_last_line_leading_level = $last_line_leading_level;
$last_line_leading_level = $levels_to_go[$imin];
if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
$last_line_leading_type = $types_to_go[$imin];
if ( $last_line_leading_level == $last_last_line_leading_level
&& $last_line_leading_type ne 'b'
&& $last_line_leading_type ne '#'
&& defined( $nonblank_lines_at_depth[$last_line_leading_level] )
)
{
$nonblank_lines_at_depth[$last_line_leading_level]++;
}
else {
$nonblank_lines_at_depth[$last_line_leading_level] = 1;
}
$self->[_last_line_leading_type_] = $last_line_leading_type;
$self->[_last_line_leading_level_] = $last_line_leading_level;
$self->[_last_last_line_leading_level_] =
$last_last_line_leading_level;
# Flag to remember if we called sub 'pad_array_to_go'.
# Some routines (scan_list(), set_continuation_breaks() ) need some
# extra tokens added at the end of the batch. Most batches do not
# use these routines, so we will avoid calling 'pad_array_to_go'
# unless it is needed.
my $called_pad_array_to_go;
# set all forced breakpoints for good list formatting
my $is_long_line = $max_index_to_go > 0
&& $self->excess_line_length( $imin, $max_index_to_go ) > 0;
my $old_line_count_in_batch =
$max_index_to_go == 0
? 1
: $self->get_old_line_count( $K_to_go[0],
$K_to_go[$max_index_to_go] );
if (
$is_long_line
|| $old_line_count_in_batch > 1
# must always call scan_list() with unbalanced batches because
# it is maintaining some stacks
|| is_unbalanced_batch()
# call scan_list if we might want to break at commas
|| (
$comma_count_in_batch
&& ( $rOpts_maximum_fields_per_table > 0
|| $rOpts_comma_arrow_breakpoints == 0 )
)
# call scan_list if user may want to break open some one-line
# hash references
|| ( $comma_arrow_count_contained
&& $rOpts_comma_arrow_breakpoints != 3 )
)
{
# add a couple of extra terminal blank tokens
$self->pad_array_to_go();
$called_pad_array_to_go = 1;
## This caused problems in one version of perl for unknown reasons:
## $saw_good_break ||= scan_list();
my $sgb = $self->scan_list($is_long_line);
$saw_good_break ||= $sgb;
}
# let $ri_first and $ri_last be references to lists of
# first and last tokens of line fragments to output..
my ( $ri_first, $ri_last );
# write a single line if..
if (
# we aren't allowed to add any newlines
!$rOpts_add_newlines
# or,
|| (
# this line is 'short'
!$is_long_line
# and we didn't see a good breakpoint
&& !$saw_good_break
# and we don't already have an interior breakpoint
&& !get_forced_breakpoint_count()
)
)
{
@{$ri_first} = ($imin);
@{$ri_last} = ($imax);
}
# otherwise use multiple lines
else {
# add a couple of extra terminal blank tokens if we haven't
# already done so
$self->pad_array_to_go() unless ($called_pad_array_to_go);
( $ri_first, $ri_last ) =
$self->set_continuation_breaks( $saw_good_break,
\@colon_list );
$self->break_all_chain_tokens( $ri_first, $ri_last );
$self->break_equals( $ri_first, $ri_last );
# now we do a correction step to clean this up a bit
# (The only time we would not do this is for debugging)
if ($rOpts_recombine) {
( $ri_first, $ri_last ) =
$self->recombine_breakpoints( $ri_first, $ri_last );
}
$self->insert_final_ternary_breaks( $ri_first, $ri_last )
if (@colon_list);
}
$self->insert_breaks_before_list_opening_containers( $ri_first,
$ri_last )
if ( %break_before_container_types && $max_index_to_go > 0 );
# do corrector step if -lp option is used
my $do_not_pad = 0;
if ($rOpts_line_up_parentheses) {
$do_not_pad =
$self->correct_lp_indentation( $ri_first, $ri_last );
}
# unmask any invisible line-ending semicolon. They were placed by
# sub respace_tokens but we only now know if we actually need them.
if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
my $i = $imax;
my $tok = ';';
my $tok_len = 1;
if ( $want_left_space{';'} != WS_NO ) {
$tok = ' ;';
$tok_len = 2;
}
$tokens_to_go[$i] = $tok;
$token_lengths_to_go[$i] = $tok_len;
my $KK = $K_to_go[$i];
$rLL->[$KK]->[_TOKEN_] = $tok;
$rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
my $line_number = 1 + $self->get_old_line_index($KK);
$self->note_added_semicolon($line_number);
}
if ( $rOpts_one_line_block_semicolons == 0 ) {
$self->delete_one_line_semicolons( $ri_first, $ri_last );
}
# The line breaks for this batch of code have been finalized. Now we
# can to package the results for further processing. We will switch
# from the local '_to_go' buffer arrays (i-index) back to the global
# token arrays (K-index) at this point.
my $rlines_K;
my $index_error;
for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
my $ibeg = $ri_first->[$n];
my $Kbeg = $K_to_go[$ibeg];
my $iend = $ri_last->[$n];
my $Kend = $K_to_go[$iend];
if ( $iend - $ibeg != $Kend - $Kbeg ) {
$index_error = $n unless defined($index_error);
}
push @{$rlines_K},
[ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ];
}
# Check correctness of the mapping between the i and K token
# indexes. (The K index is the global index, the i index is the
# batch index). It is important to do this check because an error
# would be disasterous. The reason that we should never see an
# index error here is that sub 'store_token_to_go' has a check to
# make sure that the indexes in batches remain continuous. Since
# sub 'store_token_to_go' controls feeding tokens into batches, so
# no index discrepancies should occur unless a recent programming
# change has introduced a bug.
if ( defined($index_error) ) {
# Temporary debug code - should never get here
for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
my $ibeg = $ri_first->[$n];
my $Kbeg = $K_to_go[$ibeg];
my $iend = $ri_last->[$n];
my $Kend = $K_to_go[$iend];
my $idiff = $iend - $ibeg;
my $Kdiff = $Kend - $Kbeg;
print STDERR <<EOM;
line $n, irange $ibeg-$iend = $idiff, Krange $Kbeg-$Kend = $Kdiff;
EOM
}
Fault(
"Index error at line $index_error; i and K ranges differ");
}
$this_batch->[_rlines_K_] = $rlines_K;
$this_batch->[_ibeg0_] = $ri_first->[0];
$this_batch->[_peak_batch_size_] = $peak_batch_size;
$this_batch->[_do_not_pad_] = $do_not_pad;
$this_batch->[_batch_count_] = $batch_count;
$this_batch->[_rix_seqno_controlling_ci_] =
\@ix_seqno_controlling_ci;
$self->send_lines_to_vertical_aligner();
# Insert any requested blank lines after an opening brace. We have
# to skip back before any side comment to find the terminal token
my $iterm;
for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
next if $types_to_go[$iterm] eq '#';
next if $types_to_go[$iterm] eq 'b';
last;
}
# write requested number of blank lines after an opening block brace
if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
if ( $rOpts->{'blank-lines-after-opening-block'}
&& $block_type_to_go[$iterm]
&& $block_type_to_go[$iterm] =~
/$blank_lines_after_opening_block_pattern/ )
{
my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
$self->flush_vertical_aligner();
$file_writer_object->require_blank_code_lines($nblanks);
}
}
}
# Remember the largest batch size processed. This is needed by the
# logical padding routine to avoid padding the first nonblank token
if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
$peak_batch_size = $max_index_to_go;
}
return;
}
} ## end closure grind_batch_of_CODE
{ ## begin closure match_opening_and_closing_tokens
# closure to keep track of unbalanced containers.
# arrays shared by the routines in this block:
my %saved_opening_indentation;
my @unmatched_opening_indexes_in_this_batch;
my @unmatched_closing_indexes_in_this_batch;
my %comma_arrow_count;
sub initialize_saved_opening_indentation {
%saved_opening_indentation = ();
return;
}
sub is_unbalanced_batch {
return @unmatched_opening_indexes_in_this_batch +
@unmatched_closing_indexes_in_this_batch;
}
sub match_opening_and_closing_tokens {
# Match up indexes of opening and closing braces, etc, in this batch.
# This has to be done after all tokens are stored because unstoring
# of tokens would otherwise cause trouble.
my ($self) = @_;
@unmatched_opening_indexes_in_this_batch = ();
@unmatched_closing_indexes_in_this_batch = ();
%comma_arrow_count = ();
my $comma_arrow_count_contained = 0;
foreach my $i ( 0 .. $max_index_to_go ) {
if ( $type_sequence_to_go[$i] ) {
my $token = $tokens_to_go[$i];
if ( $is_opening_sequence_token{$token} ) {
push @unmatched_opening_indexes_in_this_batch, $i;
}
elsif ( $is_closing_sequence_token{$token} ) {
my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
if ( defined($i_mate) && $i_mate >= 0 ) {
if ( $type_sequence_to_go[$i_mate] ==
$type_sequence_to_go[$i] )
{
$mate_index_to_go[$i] = $i_mate;
$mate_index_to_go[$i_mate] = $i;
my $seqno = $type_sequence_to_go[$i];
if ( $comma_arrow_count{$seqno} ) {
$comma_arrow_count_contained +=
$comma_arrow_count{$seqno};
}
}
else {
push @unmatched_opening_indexes_in_this_batch,
$i_mate;
push @unmatched_closing_indexes_in_this_batch, $i;
}
}
else {
push @unmatched_closing_indexes_in_this_batch, $i;
}
}
}
elsif ( $tokens_to_go[$i] eq '=>' ) {
if (@unmatched_opening_indexes_in_this_batch) {
my $j = $unmatched_opening_indexes_in_this_batch[-1];
my $seqno = $type_sequence_to_go[$j];
$comma_arrow_count{$seqno}++;
}
}
}
return $comma_arrow_count_contained;
}
sub save_opening_indentation {
# This should be called after each batch of tokens is output. It
# saves indentations of lines of all unmatched opening tokens.
# These will be used by sub get_opening_indentation.
my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
# QW INDENTATION PATCH 1:
# Also save indentation for multiline qw quotes
my @i_qw;
my $seqno_qw_opening;
if ( $types_to_go[$max_index_to_go] eq 'q' ) {
my $KK = $K_to_go[$max_index_to_go];
$seqno_qw_opening =
$self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
if ($seqno_qw_opening) {
push @i_qw, $max_index_to_go;
}
}
# we need to save indentations of any unmatched opening tokens
# in this batch because we may need them in a subsequent batch.
foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) {
my $seqno = $type_sequence_to_go[$_];
if ( !$seqno ) {
if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
$seqno = $seqno_qw_opening;
}
else {
# shouldn't happen
$seqno = 'UNKNOWN';
}
}
$saved_opening_indentation{$seqno} = [
lookup_opening_indentation(
$_, $ri_first, $ri_last, $rindentation_list
)
];
}
return;
}
sub get_saved_opening_indentation {
my ($seqno) = @_;
my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
if ($seqno) {
if ( $saved_opening_indentation{$seqno} ) {
( $indent, $offset, $is_leading ) =
@{ $saved_opening_indentation{$seqno} };
$exists = 1;
}
}
# some kind of serious error it doesn't exist
# (example is badfile.t)
return ( $indent, $offset, $is_leading, $exists );
}
} ## end closure match_opening_and_closing_tokens
sub lookup_opening_indentation {
# get the indentation of the line in the current output batch
# which output a selected opening token
#
# given:
# $i_opening - index of an opening token in the current output batch
# whose line indentation we need
# $ri_first - reference to list of the first index $i for each output
# line in this batch
# $ri_last - reference to list of the last index $i for each output line
# in this batch
# $rindentation_list - reference to a list containing the indentation
# used for each line. (NOTE: the first slot in
# this list is the last returned line number, and this is
# followed by the list of indentations).
#
# return
# -the indentation of the line which contained token $i_opening
# -and its offset (number of columns) from the start of the line
my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
if ( !@{$ri_last} ) {
# An error here implies a bug introduced by a recent program change.
# Every batch of code has lines.
Fault("Error in opening_indentation: no lines");
return;
}
my $nline = $rindentation_list->[0]; # line number of previous lookup
# reset line location if necessary
$nline = 0 if ( $i_opening < $ri_start->[$nline] );
# find the correct line
unless ( $i_opening > $ri_last->[-1] ) {
while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
}
# Error - token index is out of bounds - shouldn't happen
# A program bug has been introduced in one of the calling routines.
# We better stop here.
else {
my $i_last_line = $ri_last->[-1];
Fault(<<EOM);
Program bug in call to lookup_opening_indentation - index out of range
called with index i_opening=$i_opening > $i_last_line = max index of last line
This batch has max index = $max_index_to_go,
EOM
report_definite_bug(); # old coding, will not get here
$nline = $#{$ri_last};
}
$rindentation_list->[0] =
$nline; # save line number to start looking next call
my $ibeg = $ri_start->[$nline];
my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
my $is_leading = ( $ibeg == $i_opening );
return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
}
{ ## begin closure terminal_type_i
my %is_sort_map_grep_eval_do;
BEGIN {
my @q = qw(sort map grep eval do);
@is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
}
sub terminal_type_i {
# returns type of last token on this line (terminal token), as follows:
# returns # for a full-line comment
# returns ' ' for a blank line
# otherwise returns final token type
my ( $ibeg, $iend ) = @_;
# Start at the end and work backwards
my $i = $iend;
my $type_i = $types_to_go[$i];
# Check for side comment
if ( $type_i eq '#' ) {
$i--;
if ( $i < $ibeg ) {
return wantarray ? ( $type_i, $ibeg ) : $type_i;
}
$type_i = $types_to_go[$i];
}
# Skip past a blank
if ( $type_i eq 'b' ) {
$i--;
if ( $i < $ibeg ) {
return wantarray ? ( $type_i, $ibeg ) : $type_i;
}
$type_i = $types_to_go[$i];
}
# Found it..make sure it is a BLOCK termination,
# but hide a terminal } after sort/grep/map because it is not
# necessarily the end of the line. (terminal.t)
my $block_type = $block_type_to_go[$i];
if (
$type_i eq '}'
&& ( !$block_type
|| ( $is_sort_map_grep_eval_do{$block_type} ) )
)
{
$type_i = 'b';
}
return wantarray ? ( $type_i, $i ) : $type_i;
}
} ## end closure terminal_type_i
sub pad_array_to_go {
# To simplify coding in scan_list and set_bond_strengths, it helps to
# create some extra blank tokens at the end of the arrays. We also add
# some undef's to help guard against using invalid data.
my ($self) = @_;
$K_to_go[ $max_index_to_go + 1 ] = undef;
$tokens_to_go[ $max_index_to_go + 1 ] = '';
$tokens_to_go[ $max_index_to_go + 2 ] = '';
$tokens_to_go[ $max_index_to_go + 3 ] = undef;
$types_to_go[ $max_index_to_go + 1 ] = 'b';
$types_to_go[ $max_index_to_go + 2 ] = 'b';
$types_to_go[ $max_index_to_go + 3 ] = undef;
$nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
$nesting_depth_to_go[ $max_index_to_go + 1 ] =
$nesting_depth_to_go[$max_index_to_go];
# /^[R\}\)\]]$/
if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
# Nesting depths are equivalent to the _SLEVEL_ variable which is
# clipped to be >=0 in sub write_line, so it should not be possible
# to get here unless the code has a bracing error which leaves a
# closing brace with zero nesting depth.
unless ( get_saw_brace_error() ) {
warning(
"Program bug in pad_array_to_go: hit nesting error which should have been caught\n"
);
report_definite_bug();
}
}
else {
$nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
}
}
# /^[L\{\(\[]$/
elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
$nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
}
return;
}
sub break_all_chain_tokens {
# scan the current breakpoints looking for breaks at certain "chain
# operators" (. : && || + etc) which often occur repeatedly in a long
# statement. If we see a break at any one, break at all similar tokens
# within the same container.
#
my ( $self, $ri_left, $ri_right ) = @_;
my %saw_chain_type;
my %left_chain_type;
my %right_chain_type;
my %interior_chain_type;
my $nmax = @{$ri_right} - 1;
# scan the left and right end tokens of all lines
my $count = 0;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
my $typel = $types_to_go[$il];
my $typer = $types_to_go[$ir];
$typel = '+' if ( $typel eq '-' ); # treat + and - the same
$typer = '+' if ( $typer eq '-' );
$typel = '*' if ( $typel eq '/' ); # treat * and / the same
$typer = '*' if ( $typer eq '/' );
my $tokenl = $tokens_to_go[$il];
my $tokenr = $tokens_to_go[$ir];
if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
next if ( $typel eq '?' );
push @{ $left_chain_type{$typel} }, $il;
$saw_chain_type{$typel} = 1;
$count++;
}
if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
next if ( $typer eq '?' );
push @{ $right_chain_type{$typer} }, $ir;
$saw_chain_type{$typer} = 1;
$count++;
}
}
return unless $count;
# now look for any interior tokens of the same types
$count = 0;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
foreach my $i ( $il + 1 .. $ir - 1 ) {
my $type = $types_to_go[$i];
$type = '+' if ( $type eq '-' );
$type = '*' if ( $type eq '/' );
if ( $saw_chain_type{$type} ) {
push @{ $interior_chain_type{$type} }, $i;
$count++;
}
}
}
return unless $count;
# now make a list of all new break points
my @insert_list;
# loop over all chain types
foreach my $type ( keys %saw_chain_type ) {
# quit if just ONE continuation line with leading . For example--
# print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
# . $contents;
last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
# loop over all interior chain tokens
foreach my $itest ( @{ $interior_chain_type{$type} } ) {
# loop over all left end tokens of same type
if ( $left_chain_type{$type} ) {
next if $nobreak_to_go[ $itest - 1 ];
foreach my $i ( @{ $left_chain_type{$type} } ) {
next unless $self->in_same_container_i( $i, $itest );
push @insert_list, $itest - 1;
# Break at matching ? if this : is at a different level.
# For example, the ? before $THRf_DEAD in the following
# should get a break if its : gets a break.
#
# my $flags =
# ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
# : ( $_ & 4 ) ? $THRf_R_DETACHED
# : $THRf_R_JOINABLE;
if ( $type eq ':'
&& $levels_to_go[$i] != $levels_to_go[$itest] )
{
my $i_question = $mate_index_to_go[$itest];
if ( $i_question > 0 ) {
push @insert_list, $i_question - 1;
}
}
last;
}
}
# loop over all right end tokens of same type
if ( $right_chain_type{$type} ) {
next if $nobreak_to_go[$itest];
foreach my $i ( @{ $right_chain_type{$type} } ) {
next unless $self->in_same_container_i( $i, $itest );
push @insert_list, $itest;
# break at matching ? if this : is at a different level
if ( $type eq ':'
&& $levels_to_go[$i] != $levels_to_go[$itest] )
{
my $i_question = $mate_index_to_go[$itest];
if ( $i_question >= 0 ) {
push @insert_list, $i_question;
}
}
last;
}
}
}
}
# insert any new break points
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
return;
}
sub insert_additional_breaks {
# this routine will add line breaks at requested locations after
# sub set_continuation_breaks has made preliminary breaks.
my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
my $i_f;
my $i_l;
my $line_number = 0;
foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
next if ( $nobreak_to_go[$i_break_left] );
$i_f = $ri_first->[$line_number];
$i_l = $ri_last->[$line_number];
while ( $i_break_left >= $i_l ) {
$line_number++;
# shouldn't happen unless caller passes bad indexes
if ( $line_number >= @{$ri_last} ) {
warning(
"Non-fatal program bug: couldn't set break at $i_break_left\n"
);
report_definite_bug();
return;
}
$i_f = $ri_first->[$line_number];
$i_l = $ri_last->[$line_number];
}
# Do not leave a blank at the end of a line; back up if necessary
if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
my $i_break_right = $inext_to_go[$i_break_left];
if ( $i_break_left >= $i_f
&& $i_break_left < $i_l
&& $i_break_right > $i_f
&& $i_break_right <= $i_l )
{
splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
}
}
return;
}
sub in_same_container_i {
# check to see if tokens at i1 and i2 are in the
# same container, and not separated by a comma, ? or :
# This is an interface between the _to_go arrays to the rLL array
my ( $self, $i1, $i2 ) = @_;
return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] );
}
{ ## begin closure in_same_container_K
my $ris_break_token;
my $ris_comma_token;
BEGIN {
# all cases break on seeing commas at same level
my @q = qw( => );
push @q, ',';
@{$ris_comma_token}{@q} = (1) x scalar(@q);
# Non-ternary text also breaks on seeing any of qw(? : || or )
# Example: we would not want to break at any of these .'s
# : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
push @q, qw( or || ? : );
@{$ris_break_token}{@q} = (1) x scalar(@q);
}
sub in_same_container_K {
# Check to see if tokens at K1 and K2 are in the same container,
# and not separated by certain characters: => , ? : || or
# This version uses the newer $rLL data structure.
my ( $self, $K1, $K2 ) = @_;
if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
my $rLL = $self->[_rLL_];
my $depth_1 = $rLL->[$K1]->[_SLEVEL_];
return if ( $depth_1 < 0 );
return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 );
# Select character set to scan for
my $type_1 = $rLL->[$K1]->[_TYPE_];
my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
# Fast preliminary loop to verify that tokens are in the same container
my $KK = $K1;
while (1) {
$KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
last if !defined($KK);
last if ( $KK >= $K2 );
my $depth_K = $rLL->[$KK]->[_SLEVEL_];
return if ( $depth_K < $depth_1 );
next if ( $depth_K > $depth_1 );
if ( $type_1 ne ':' ) {
my $tok_K = $rLL->[$KK]->[_TOKEN_];
return if ( $tok_K eq '?' || $tok_K eq ':' );
}
}
# Slow loop checking for certain characters
###########################################################
# This is potentially a slow routine and not critical.
# For safety just give up for large differences.
# See test file 'infinite_loop.txt'
###########################################################
return if ( $K2 - $K1 > 200 );
foreach my $K ( $K1 + 1 .. $K2 - 1 ) {
my $depth_K = $rLL->[$K]->[_SLEVEL_];
next if ( $depth_K > $depth_1 );
return if ( $depth_K < $depth_1 ); # redundant, checked above
my $tok = $rLL->[$K]->[_TOKEN_];
return if ( $rbreak->{$tok} );
}
return 1;
}
} ## end closure in_same_container_K
sub break_equals {
# Look for assignment operators that could use a breakpoint.
# For example, in the following snippet
#
# $HOME = $ENV{HOME}
# || $ENV{LOGDIR}
# || $pw[7]
# || die "no home directory for user $<";
#
# we could break at the = to get this, which is a little nicer:
# $HOME =
# $ENV{HOME}
# || $ENV{LOGDIR}
# || $pw[7]
# || die "no home directory for user $<";
#
# The logic here follows the logic in set_logical_padding, which
# will add the padding in the second line to improve alignment.
#
my ( $self, $ri_left, $ri_right ) = @_;
my $nmax = @{$ri_right} - 1;
return unless ( $nmax >= 2 );
# scan the left ends of first two lines
my $tokbeg = "";
my $depth_beg;
for my $n ( 1 .. 2 ) {
my $il = $ri_left->[$n];
my $typel = $types_to_go[$il];
my $tokenl = $tokens_to_go[$il];
my $has_leading_op = ( $tokenl =~ /^\w/ )
? $is_chain_operator{$tokenl} # + - * / : ? && ||
: $is_chain_operator{$typel}; # and, or
return unless ($has_leading_op);
if ( $n > 1 ) {
return
unless ( $tokenl eq $tokbeg
&& $nesting_depth_to_go[$il] eq $depth_beg );
}
$tokbeg = $tokenl;
$depth_beg = $nesting_depth_to_go[$il];
}
# now look for any interior tokens of the same types
my $il = $ri_left->[0];
my $ir = $ri_right->[0];
# now make a list of all new break points
my @insert_list;
for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
my $type = $types_to_go[$i];
if ( $is_assignment{$type}
&& $nesting_depth_to_go[$i] eq $depth_beg )
{
if ( $want_break_before{$type} ) {
push @insert_list, $i - 1;
}
else {
push @insert_list, $i;
}
}
}
# Break after a 'return' followed by a chain of operators
# return ( $^O !~ /win32|dos/i )
# && ( $^O ne 'VMS' )
# && ( $^O ne 'OS2' )
# && ( $^O ne 'MacOS' );
# To give:
# return
# ( $^O !~ /win32|dos/i )
# && ( $^O ne 'VMS' )
# && ( $^O ne 'OS2' )
# && ( $^O ne 'MacOS' );
my $i = 0;
if ( $types_to_go[$i] eq 'k'
&& $tokens_to_go[$i] eq 'return'
&& $ir > $il
&& $nesting_depth_to_go[$i] eq $depth_beg )
{
push @insert_list, $i;
}
return unless (@insert_list);
# One final check...
# scan second and third lines and be sure there are no assignments
# we want to avoid breaking at an = to make something like this:
# unless ( $icon =
# $html_icons{"$type-$state"}
# or $icon = $html_icons{$type}
# or $icon = $html_icons{$state} )
for my $n ( 1 .. 2 ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
foreach my $i ( $il + 1 .. $ir ) {
my $type = $types_to_go[$i];
return
if ( $is_assignment{$type}
&& $nesting_depth_to_go[$i] eq $depth_beg );
}
}
# ok, insert any new break point
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
return;
}
{ ## begin closure recombine_breakpoints
# This routine is called once per batch to see if it would be better
# to combine some of the lines into which the batch has been broken.
my %is_amp_amp;
my %is_ternary;
my %is_math_op;
my %is_plus_minus;
my %is_mult_div;
BEGIN {
my @q;
@q = qw( && || );
@is_amp_amp{@q} = (1) x scalar(@q);
@q = qw( ? : );
@is_ternary{@q} = (1) x scalar(@q);
@q = qw( + - * / );
@is_math_op{@q} = (1) x scalar(@q);
@q = qw( + - );
@is_plus_minus{@q} = (1) x scalar(@q);
@q = qw( * / );
@is_mult_div{@q} = (1) x scalar(@q);
}
sub Debug_dump_breakpoints {
# Debug routine to dump current breakpoints...not normally called
# We are given indexes to the current lines:
# $ri_beg = ref to array of BEGinning indexes of each line
# $ri_end = ref to array of ENDing indexes of each line
my ( $self, $ri_beg, $ri_end, $msg ) = @_;
print STDERR "----Dumping breakpoints from: $msg----\n";
for my $n ( 0 .. @{$ri_end} - 1 ) {
my $ibeg = $ri_beg->[$n];
my $iend = $ri_end->[$n];
my $text = "";
foreach my $i ( $ibeg .. $iend ) {
$text .= $tokens_to_go[$i];
}
print STDERR "$n ($ibeg:$iend) $text\n";
}
print STDERR "----\n";
return;
}
sub delete_one_line_semicolons {
my ( $self, $ri_beg, $ri_end ) = @_;
my $rLL = $self->[_rLL_];
my $K_opening_container = $self->[_K_opening_container_];
# Walk down the lines of this batch and delete any semicolons
# terminating one-line blocks;
my $nmax = @{$ri_end} - 1;
foreach my $n ( 0 .. $nmax ) {
my $i_beg = $ri_beg->[$n];
my $i_e = $ri_end->[$n];
my $K_beg = $K_to_go[$i_beg];
my $K_e = $K_to_go[$i_e];
my $K_end = $K_e;
my $type_end = $rLL->[$K_end]->[_TYPE_];
if ( $type_end eq '#' ) {
$K_end = $self->K_previous_nonblank($K_end);
if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
}
# we are looking for a line ending in closing brace
next
unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
# ...and preceded by a semicolon on the same line
my $K_semicolon = $self->K_previous_nonblank($K_end);
next unless defined($K_semicolon);
my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
next if ( $i_semicolon <= $i_beg );
next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
# Safety check - shouldn't happen - not critical
# This is not worth throwing a Fault, except in DEVEL_MODE
if ( $types_to_go[$i_semicolon] ne ';' ) {
DEVEL_MODE
&& Fault("unexpected type looking for semicolon");
next;
}
# ... with the corresponding opening brace on the same line
my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
my $K_opening = $K_opening_container->{$type_sequence};
next unless ( defined($K_opening) );
my $i_opening = $i_beg + ( $K_opening - $K_beg );
next if ( $i_opening < $i_beg );
# ... and only one semicolon between these braces
my $semicolon_count = 0;
foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
$semicolon_count++;
last;
}
}
next if ($semicolon_count);
# ...ok, then make the semicolon invisible
$tokens_to_go[$i_semicolon] = "";
$token_lengths_to_go[$i_semicolon] = 0;
$rLL->[$K_semicolon]->[_TOKEN_] = "";
$rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
}
return;
}
sub recombine_breakpoints {
# sub set_continuation_breaks is very liberal in setting line breaks
# for long lines, always setting breaks at good breakpoints, even
# when that creates small lines. Sometimes small line fragments
# are produced which would look better if they were combined.
# That's the task of this routine.
#
# We are given indexes to the current lines:
# $ri_beg = ref to array of BEGinning indexes of each line
# $ri_end = ref to array of ENDing indexes of each line
my ( $self, $ri_beg, $ri_end ) = @_;
my $rOpts_short_concatenation_item_length =
$rOpts->{'short-concatenation-item-length'};
# Make a list of all good joining tokens between the lines
# n-1 and n.
my @joint;
my $nmax = @{$ri_end} - 1;
for my $n ( 1 .. $nmax ) {
my $ibeg_1 = $ri_beg->[ $n - 1 ];
my $iend_1 = $ri_end->[ $n - 1 ];
my $iend_2 = $ri_end->[$n];
my $ibeg_2 = $ri_beg->[$n];
my ( $itok, $itokp, $itokm );
foreach my $itest ( $iend_1, $ibeg_2 ) {
my $type = $types_to_go[$itest];
if ( $is_math_op{$type}
|| $is_amp_amp{$type}
|| $is_assignment{$type}
|| $type eq ':' )
{
$itok = $itest;
}
}
$joint[$n] = [$itok];
}
my $more_to_do = 1;
# We keep looping over all of the lines of this batch
# until there are no more possible recombinations
my $nmax_last = @{$ri_end};
my $reverse = 0;
while ($more_to_do) {
my $n_best = 0;
my $bs_best;
my $nmax = @{$ri_end} - 1;
# Safety check for infinite loop
unless ( $nmax < $nmax_last ) {
# Shouldn't happen because splice below decreases nmax on each
# iteration. An error can only be due to a recent programming
# change.
Fault("Program bug-infinite loop in recombine breakpoints\n");
}
$nmax_last = $nmax;
$more_to_do = 0;
my $skip_Section_3;
my $leading_amp_count = 0;
my $this_line_is_semicolon_terminated;
# loop over all remaining lines in this batch
for my $iter ( 1 .. $nmax ) {
# alternating sweep direction gives symmetric results
# for recombining lines which exceed the line length
# such as eval {{{{.... }}}}
my $n;
if ($reverse) { $n = 1 + $nmax - $iter; }
else { $n = $iter }
#----------------------------------------------------------
# If we join the current pair of lines,
# line $n-1 will become the left part of the joined line
# line $n will become the right part of the joined line
#
# Here are Indexes of the endpoint tokens of the two lines:
#
# -----line $n-1--- | -----line $n-----
# $ibeg_1 $iend_1 | $ibeg_2 $iend_2
# ^
# |
# We want to decide if we should remove the line break
# between the tokens at $iend_1 and $ibeg_2
#
# We will apply a number of ad-hoc tests to see if joining
# here will look ok. The code will just issue a 'next'
# command if the join doesn't look good. If we get through
# the gauntlet of tests, the lines will be recombined.
#----------------------------------------------------------
#
# beginning and ending tokens of the lines we are working on
my $ibeg_1 = $ri_beg->[ $n - 1 ];
my $iend_1 = $ri_end->[ $n - 1 ];
my $iend_2 = $ri_end->[$n];
my $ibeg_2 = $ri_beg->[$n];
my $ibeg_nmax = $ri_beg->[$nmax];
# combined line cannot be too long
my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
next if ( $excess > 0 );
my $type_iend_1 = $types_to_go[$iend_1];
my $type_iend_2 = $types_to_go[$iend_2];
my $type_ibeg_1 = $types_to_go[$ibeg_1];
my $type_ibeg_2 = $types_to_go[$ibeg_2];
# terminal token of line 2 if any side comment is ignored:
my $iend_2t = $iend_2;
my $type_iend_2t = $type_iend_2;
# some beginning indexes of other lines, which may not exist
my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
my $bs_tweak = 0;
#my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
# $nesting_depth_to_go[$ibeg_1] );
0 && do {
print STDERR
"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
};
# If line $n is the last line, we set some flags and
# do any special checks for it
if ( $n == $nmax ) {
# a terminal '{' should stay where it is
# unless preceded by a fat comma
next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
if ( $type_iend_2 eq '#'
&& $iend_2 - $ibeg_2 >= 2
&& $types_to_go[ $iend_2 - 1 ] eq 'b' )
{
$iend_2t = $iend_2 - 2;
$type_iend_2t = $types_to_go[$iend_2t];
}
$this_line_is_semicolon_terminated = $type_iend_2t eq ';';
}
#----------------------------------------------------------
# Recombine Section 0:
# Examine the special token joining this line pair, if any.
# Put as many tests in this section to avoid duplicate code and
# to make formatting independent of whether breaks are to the
# left or right of an operator.
#----------------------------------------------------------
my ($itok) = @{ $joint[$n] };
if ($itok) {
# FIXME: Patch - may not be necessary
my $iend_1 =
$type_iend_1 eq 'b'
? $iend_1 - 1
: $iend_1;
my $iend_2 =
$type_iend_2 eq 'b'
? $iend_2 - 1
: $iend_2;
## END PATCH
my $type = $types_to_go[$itok];
if ( $type eq ':' ) {
# do not join at a colon unless it disobeys the break request
if ( $itok eq $iend_1 ) {
next unless $want_break_before{$type};
}
else {
$leading_amp_count++;
next if $want_break_before{$type};
}
} ## end if ':'
# handle math operators + - * /
elsif ( $is_math_op{$type} ) {
# Combine these lines if this line is a single
# number, or if it is a short term with same
# operator as the previous line. For example, in
# the following code we will combine all of the
# short terms $A, $B, $C, $D, $E, $F, together
# instead of leaving them one per line:
# my $time =
# $A * $B * $C * $D * $E * $F *
# ( 2. * $eps * $sigma * $area ) *
# ( 1. / $tcold**3 - 1. / $thot**3 );
# This can be important in math-intensive code.
my $good_combo;
my $itokp = min( $inext_to_go[$itok], $iend_2 );
my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
# check for a number on the right
if ( $types_to_go[$itokp] eq 'n' ) {
# ok if nothing else on right
if ( $itokp == $iend_2 ) {
$good_combo = 1;
}
else {
# look one more token to right..
# okay if math operator or some termination
$good_combo =
( ( $itokpp == $iend_2 )
&& $is_math_op{ $types_to_go[$itokpp] } )
|| $types_to_go[$itokpp] =~ /^[#,;]$/;
}
}
# check for a number on the left
if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
# okay if nothing else to left
if ( $itokm == $ibeg_1 ) {
$good_combo = 1;
}
# otherwise look one more token to left
else {
# okay if math operator, comma, or assignment
$good_combo = ( $itokmm == $ibeg_1 )
&& ( $is_math_op{ $types_to_go[$itokmm] }
|| $types_to_go[$itokmm] =~ /^[,]$/
|| $is_assignment{ $types_to_go[$itokmm] }
);
}
}
# look for a single short token either side of the
# operator
if ( !$good_combo ) {
# Slight adjustment factor to make results
# independent of break before or after operator in
# long summed lists. (An operator and a space make
# two spaces).
my $two = ( $itok eq $iend_1 ) ? 2 : 0;
$good_combo =
# numbers or id's on both sides of this joint
$types_to_go[$itokp] =~ /^[in]$/
&& $types_to_go[$itokm] =~ /^[in]$/
# one of the two lines must be short:
&& (
(
# no more than 2 nonblank tokens right of
# joint
$itokpp == $iend_2
# short
&& token_sequence_length( $itokp, $iend_2 )
< $two +
$rOpts_short_concatenation_item_length
)
|| (
# no more than 2 nonblank tokens left of
# joint
$itokmm == $ibeg_1
# short
&& token_sequence_length( $ibeg_1, $itokm )
< 2 - $two +
$rOpts_short_concatenation_item_length
)
)
# keep pure terms; don't mix +- with */
&& !(
$is_plus_minus{$type}
&& ( $is_mult_div{ $types_to_go[$itokmm] }
|| $is_mult_div{ $types_to_go[$itokpp] } )
)
&& !(
$is_mult_div{$type}
&& ( $is_plus_minus{ $types_to_go[$itokmm] }
|| $is_plus_minus{ $types_to_go[$itokpp] } )
)
;
}
# it is also good to combine if we can reduce to 2 lines
if ( !$good_combo ) {
# index on other line where same token would be in a
# long chain.
my $iother =
( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
$good_combo =
$n == 2
&& $n == $nmax
&& $types_to_go[$iother] ne $type;
}
next unless ($good_combo);
} ## end math
elsif ( $is_amp_amp{$type} ) {
##TBD
} ## end &&, ||
elsif ( $is_assignment{$type} ) {
##TBD
} ## end assignment
}
#----------------------------------------------------------
# Recombine Section 1:
# Join welded nested containers immediately
#----------------------------------------------------------
if (
$type_sequence_to_go[$iend_1]
&& $self->weld_len_right( $type_sequence_to_go[$iend_1],
$type_iend_1 )
|| $type_sequence_to_go[$ibeg_2] && $self->weld_len_left(
$type_sequence_to_go[$ibeg_2], $type_ibeg_2
)
)
{
$n_best = $n;
last;
}
$reverse = 0;
#----------------------------------------------------------
# Recombine Section 2:
# Examine token at $iend_1 (right end of first line of pair)
#----------------------------------------------------------
# an isolated '}' may join with a ';' terminated segment
if ( $type_iend_1 eq '}' ) {
# Check for cases where combining a semicolon terminated
# statement with a previous isolated closing paren will
# allow the combined line to be outdented. This is
# generally a good move. For example, we can join up
# the last two lines here:
# (
# $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
# $size, $atime, $mtime, $ctime, $blksize, $blocks
# )
# = stat($file);
#
# to get:
# (
# $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
# $size, $atime, $mtime, $ctime, $blksize, $blocks
# ) = stat($file);
#
# which makes the parens line up.
#
# Another example, from Joe Matarazzo, probably looks best
# with the 'or' clause appended to the trailing paren:
# $self->some_method(
# PARAM1 => 'foo',
# PARAM2 => 'bar'
# ) or die "Some_method didn't work";
#
# But we do not want to do this for something like the -lp
# option where the paren is not outdentable because the
# trailing clause will be far to the right.
#
# The logic here is synchronized with the logic in sub
# sub set_adjusted_indentation, which actually does
# the outdenting.
#
$skip_Section_3 ||= $this_line_is_semicolon_terminated
# only one token on last line
&& $ibeg_1 == $iend_1
# must be structural paren
&& $tokens_to_go[$iend_1] eq ')'
# style must allow outdenting,
&& !$closing_token_indentation{')'}
# only leading '&&', '||', and ':' if no others seen
# (but note: our count made below could be wrong
# due to intervening comments)
&& ( $leading_amp_count == 0
|| $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
# but leading colons probably line up with a
# previous colon or question (count could be wrong).
&& $type_ibeg_2 ne ':'
# only one step in depth allowed. this line must not
# begin with a ')' itself.
&& ( $nesting_depth_to_go[$iend_1] ==
$nesting_depth_to_go[$iend_2] + 1 );
# YVES patch 2 of 2:
# Allow cuddled eval chains, like this:
# eval {
# #STUFF;
# 1; # return true
# } or do {
# #handle error
# };
# This patch works together with a patch in
# setting adjusted indentation (where the closing eval
# brace is outdented if possible).
# The problem is that an 'eval' block has continuation
# indentation and it looks better to undo it in some
# cases. If we do not use this patch we would get:
# eval {
# #STUFF;
# 1; # return true
# }
# or do {
# #handle error
# };
# The alternative, for uncuddled style, is to create
# a patch in set_adjusted_indentation which undoes
# the indentation of a leading line like 'or do {'.
# This doesn't work well with -icb through
if (
$block_type_to_go[$iend_1] eq 'eval'
&& !$rOpts->{'line-up-parentheses'}
&& !$rOpts->{'indent-closing-brace'}
&& $tokens_to_go[$iend_2] eq '{'
&& (
( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
|| ( $type_ibeg_2 eq 'k'
&& $is_and_or{ $tokens_to_go[$ibeg_2] } )
|| $is_if_unless{ $tokens_to_go[$ibeg_2] }
)
)
{
$skip_Section_3 ||= 1;
}
next
unless (
$skip_Section_3
# handle '.' and '?' specially below
|| ( $type_ibeg_2 =~ /^[\.\?]$/ )
);
}
elsif ( $type_iend_1 eq '{' ) {
# YVES
# honor breaks at opening brace
# Added to prevent recombining something like this:
# } || eval { package main;
next if $forced_breakpoint_to_go[$iend_1];
}
# do not recombine lines with ending &&, ||,
elsif ( $is_amp_amp{$type_iend_1} ) {
next unless $want_break_before{$type_iend_1};
}
# Identify and recombine a broken ?/: chain
elsif ( $type_iend_1 eq '?' ) {
# Do not recombine different levels
next
if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
# do not recombine unless next line ends in :
next unless $type_iend_2 eq ':';
}
# for lines ending in a comma...
elsif ( $type_iend_1 eq ',' ) {
# Do not recombine at comma which is following the
# input bias.
# TODO: might be best to make a special flag
next if ( $old_breakpoint_to_go[$iend_1] );
# an isolated '},' may join with an identifier + ';'
# this is useful for the class of a 'bless' statement (bless.t)
if ( $type_ibeg_1 eq '}'
&& $type_ibeg_2 eq 'i' )
{
next
unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
&& ( $iend_2 == ( $ibeg_2 + 1 ) )
&& $this_line_is_semicolon_terminated );
# override breakpoint
$forced_breakpoint_to_go[$iend_1] = 0;
}
# but otherwise ..
else {
# do not recombine after a comma unless this will leave
# just 1 more line
next unless ( $n + 1 >= $nmax );
# do not recombine if there is a change in indentation depth
next
if (
$levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
# do not recombine a "complex expression" after a
# comma. "complex" means no parens.
my $saw_paren;
foreach my $ii ( $ibeg_2 .. $iend_2 ) {
if ( $tokens_to_go[$ii] eq '(' ) {
$saw_paren = 1;
last;
}
}
next if $saw_paren;
}
}
# opening paren..
elsif ( $type_iend_1 eq '(' ) {
# No longer doing this
}
elsif ( $type_iend_1 eq ')' ) {
# No longer doing this
}
# keep a terminal for-semicolon
elsif ( $type_iend_1 eq 'f' ) {
next;
}
# if '=' at end of line ...
elsif ( $is_assignment{$type_iend_1} ) {
# keep break after = if it was in input stream
# this helps prevent 'blinkers'
next if $old_breakpoint_to_go[$iend_1]
# don't strand an isolated '='
&& $iend_1 != $ibeg_1;
my $is_short_quote =
( $type_ibeg_2 eq 'Q'
&& $ibeg_2 == $iend_2
&& token_sequence_length( $ibeg_2, $ibeg_2 ) <
$rOpts_short_concatenation_item_length );
my $is_ternary =
( $type_ibeg_1 eq '?'
&& ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
# always join an isolated '=', a short quote, or if this
# will put ?/: at start of adjacent lines
if ( $ibeg_1 != $iend_1
&& !$is_short_quote
&& !$is_ternary )
{
next
unless (
(
# unless we can reduce this to two lines
$nmax < $n + 2
# or three lines, the last with a leading semicolon
|| ( $nmax == $n + 2
&& $types_to_go[$ibeg_nmax] eq ';' )
# or the next line ends with a here doc
|| $type_iend_2 eq 'h'
# or the next line ends in an open paren or brace
# and the break hasn't been forced [dima.t]
|| ( !$forced_breakpoint_to_go[$iend_1]
&& $type_iend_2 eq '{' )
)
# do not recombine if the two lines might align well
# this is a very approximate test for this
&& (
# RT#127633 - the leading tokens are not operators
( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
# or they are different
|| ( $ibeg_3 >= 0
&& $type_ibeg_2 ne $types_to_go[$ibeg_3] )
)
);
if (
# Recombine if we can make two lines
$nmax >= $n + 2
# -lp users often prefer this:
# my $title = function($env, $env, $sysarea,
# "bubba Borrower Entry");
# so we will recombine if -lp is used we have
# ending comma
&& ( !$rOpts_line_up_parentheses
|| $type_iend_2 ne ',' )
)
{
# otherwise, scan the rhs line up to last token for
# complexity. Note that we are not counting the last
# token in case it is an opening paren.
my $tv = 0;
my $depth = $nesting_depth_to_go[$ibeg_2];
foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
if ( $nesting_depth_to_go[$i] != $depth ) {
$tv++;
last if ( $tv > 1 );
}
$depth = $nesting_depth_to_go[$i];
}
# ok to recombine if no level changes before last token
if ( $tv > 0 ) {
# otherwise, do not recombine if more than two
# level changes.
next if ( $tv > 1 );
# check total complexity of the two adjacent lines
# that will occur if we do this join
my $istop =
( $n < $nmax )
? $ri_end->[ $n + 1 ]
: $iend_2;
foreach my $i ( $iend_2 .. $istop ) {
if ( $nesting_depth_to_go[$i] != $depth ) {
$tv++;
last if ( $tv > 2 );
}
$depth = $nesting_depth_to_go[$i];
}
# do not recombine if total is more than 2 level changes
next if ( $tv > 2 );
}
}
}
unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
$forced_breakpoint_to_go[$iend_1] = 0;
}
}
# for keywords..
elsif ( $type_iend_1 eq 'k' ) {
# make major control keywords stand out
# (recombine.t)
next
if (
#/^(last|next|redo|return)$/
$is_last_next_redo_return{ $tokens_to_go[$iend_1] }
# but only if followed by multiple lines
&& $n < $nmax
);
if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
next
unless $want_break_before{ $tokens_to_go[$iend_1] };
}
}
#----------------------------------------------------------
# Recombine Section 3:
# Examine token at $ibeg_2 (left end of second line of pair)
#----------------------------------------------------------
# join lines identified above as capable of
# causing an outdented line with leading closing paren
# Note that we are skipping the rest of this section
# and the rest of the loop to do the join
if ($skip_Section_3) {
$forced_breakpoint_to_go[$iend_1] = 0;
$n_best = $n;
last;
}
# handle lines with leading &&, ||
elsif ( $is_amp_amp{$type_ibeg_2} ) {
$leading_amp_count++;
# ok to recombine if it follows a ? or :
# and is followed by an open paren..
my $ok =
( $is_ternary{$type_ibeg_1}
&& $tokens_to_go[$iend_2] eq '(' )
# or is followed by a ? or : at same depth
#
# We are looking for something like this. We can
# recombine the && line with the line above to make the
# structure more clear:
# return
# exists $G->{Attr}->{V}
# && exists $G->{Attr}->{V}->{$u}
# ? %{ $G->{Attr}->{V}->{$u} }
# : ();
#
# We should probably leave something like this alone:
# return
# exists $G->{Attr}->{E}
# && exists $G->{Attr}->{E}->{$u}
# && exists $G->{Attr}->{E}->{$u}->{$v}
# ? %{ $G->{Attr}->{E}->{$u}->{$v} }
# : ();
# so that we either have all of the &&'s (or ||'s)
# on one line, as in the first example, or break at
# each one as in the second example. However, it
# sometimes makes things worse to check for this because
# it prevents multiple recombinations. So this is not done.
|| ( $ibeg_3 >= 0
&& $is_ternary{ $types_to_go[$ibeg_3] }
&& $nesting_depth_to_go[$ibeg_3] ==
$nesting_depth_to_go[$ibeg_2] );
next if !$ok && $want_break_before{$type_ibeg_2};
$forced_breakpoint_to_go[$iend_1] = 0;
# tweak the bond strength to give this joint priority
# over ? and :
$bs_tweak = 0.25;
}
# Identify and recombine a broken ?/: chain
elsif ( $type_ibeg_2 eq '?' ) {
# Do not recombine different levels
my $lev = $levels_to_go[$ibeg_2];
next if ( $lev ne $levels_to_go[$ibeg_1] );
# Do not recombine a '?' if either next line or
# previous line does not start with a ':'. The reasons
# are that (1) no alignment of the ? will be possible
# and (2) the expression is somewhat complex, so the
# '?' is harder to see in the interior of the line.
my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
my $precedes_colon =
$ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
next unless ( $follows_colon || $precedes_colon );
# we will always combining a ? line following a : line
if ( !$follows_colon ) {
# ...otherwise recombine only if it looks like a chain.
# we will just look at a few nearby lines to see if
# this looks like a chain.
my $local_count = 0;
foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
$local_count++
if $ii >= 0
&& $types_to_go[$ii] eq ':'
&& $levels_to_go[$ii] == $lev;
}
next unless ( $local_count > 1 );
}
$forced_breakpoint_to_go[$iend_1] = 0;
}
# do not recombine lines with leading '.'
elsif ( $type_ibeg_2 eq '.' ) {
my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
next
unless (
# ... unless there is just one and we can reduce
# this to two lines if we do. For example, this
#
#
# $bodyA .=
# '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
#
# looks better than this:
# $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
# . '$args .= $pat;'
(
$n == 2
&& $n == $nmax
&& $type_ibeg_1 ne $type_ibeg_2
)
# ... or this would strand a short quote , like this
# . "some long quote"
# . "\n";
|| ( $types_to_go[$i_next_nonblank] eq 'Q'
&& $i_next_nonblank >= $iend_2 - 1
&& $token_lengths_to_go[$i_next_nonblank] <
$rOpts_short_concatenation_item_length )
);
}
# handle leading keyword..
elsif ( $type_ibeg_2 eq 'k' ) {
# handle leading "or"
if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
next
unless (
$this_line_is_semicolon_terminated
&& (
$type_ibeg_1 eq '}'
|| (
# following 'if' or 'unless' or 'or'
$type_ibeg_1 eq 'k'
&& $is_if_unless{ $tokens_to_go[$ibeg_1] }
# important: only combine a very simple or
# statement because the step below may have
# combined a trailing 'and' with this or,
# and we do not want to then combine
# everything together
&& ( $iend_2 - $ibeg_2 <= 7 )
)
)
);
#X: RT #81854
$forced_breakpoint_to_go[$iend_1] = 0
unless $old_breakpoint_to_go[$iend_1];
}
# handle leading 'and' and 'xor'
elsif ($tokens_to_go[$ibeg_2] eq 'and'
|| $tokens_to_go[$ibeg_2] eq 'xor' )
{
# Decide if we will combine a single terminal 'and'
# after an 'if' or 'unless'.
# This looks best with the 'and' on the same
# line as the 'if':
#
# $a = 1
# if $seconds and $nu < 2;
#
# But this looks better as shown:
#
# $a = 1
# if !$this->{Parents}{$_}
# or $this->{Parents}{$_} eq $_;
#
next
unless (
$this_line_is_semicolon_terminated
&& (
# following 'if' or 'unless' or 'or'
$type_ibeg_1 eq 'k'
&& ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
|| $tokens_to_go[$ibeg_1] eq 'or' )
)
);
}
# handle leading "if" and "unless"
elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
# FIXME: This is still experimental..may not be too useful
next
unless (
$this_line_is_semicolon_terminated
# previous line begins with 'and' or 'or'
&& $type_ibeg_1 eq 'k'
&& $is_and_or{ $tokens_to_go[$ibeg_1] }
);
}
# handle all other leading keywords
else {
# keywords look best at start of lines,
# but combine things like "1 while"
unless ( $is_assignment{$type_iend_1} ) {
next
if ( ( $type_iend_1 ne 'k' )
&& ( $tokens_to_go[$ibeg_2] ne 'while' ) );
}
}
}
# similar treatment of && and || as above for 'and' and 'or':
# NOTE: This block of code is currently bypassed because
# of a previous block but is retained for possible future use.
elsif ( $is_amp_amp{$type_ibeg_2} ) {
# maybe looking at something like:
# unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
next
unless (
$this_line_is_semicolon_terminated
# previous line begins with an 'if' or 'unless' keyword
&& $type_ibeg_1 eq 'k'
&& $is_if_unless{ $tokens_to_go[$ibeg_1] }
);
}
# handle line with leading = or similar
elsif ( $is_assignment{$type_ibeg_2} ) {
next unless ( $n == 1 || $n == $nmax );
next if $old_breakpoint_to_go[$iend_1];
next
unless (
# unless we can reduce this to two lines
$nmax == 2
# or three lines, the last with a leading semicolon
|| ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
# or the next line ends with a here doc
|| $type_iend_2 eq 'h'
# or this is a short line ending in ;
|| ( $n == $nmax && $this_line_is_semicolon_terminated )
);
$forced_breakpoint_to_go[$iend_1] = 0;
}
#----------------------------------------------------------
# Recombine Section 4:
# Combine the lines if we arrive here and it is possible
#----------------------------------------------------------
# honor hard breakpoints
next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
# Require a few extra spaces before recombining lines if we are
# at an old breakpoint unless this is a simple list or terminal
# line. The goal is to avoid oscillating between two
# quasi-stable end states. For example this snippet caused
# problems:
## my $this =
## bless {
## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
## },
## $type;
next
if ( $old_breakpoint_to_go[$iend_1]
&& !$this_line_is_semicolon_terminated
&& $n < $nmax
&& $excess + 4 > 0
&& $type_iend_2 ne ',' );
# do not recombine if we would skip in indentation levels
if ( $n < $nmax ) {
my $if_next = $ri_beg->[ $n + 1 ];
next
if (
$levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
&& $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
# but an isolated 'if (' is undesirable
&& !(
$n == 1
&& $iend_1 - $ibeg_1 <= 2
&& $type_ibeg_1 eq 'k'
&& $tokens_to_go[$ibeg_1] eq 'if'
&& $tokens_to_go[$iend_1] ne '('
)
);
}
# honor no-break's
next if ( $bs >= NO_BREAK - 1 );
# remember the pair with the greatest bond strength
if ( !$n_best ) {
$n_best = $n;
$bs_best = $bs;
}
else {
if ( $bs > $bs_best ) {
$n_best = $n;
$bs_best = $bs;
}
}
}
# recombine the pair with the greatest bond strength
if ($n_best) {
splice @{$ri_beg}, $n_best, 1;
splice @{$ri_end}, $n_best - 1, 1;
splice @joint, $n_best, 1;
# keep going if we are still making progress
$more_to_do++;
}
}
return ( $ri_beg, $ri_end );
}
} ## end closure recombine_breakpoints
sub insert_final_ternary_breaks {
my ( $self, $ri_left, $ri_right ) = @_;
# Called once per batch to look for and do any final line breaks for
# long ternary chains
my $nmax = @{$ri_right} - 1;
# scan the left and right end tokens of all lines
my $count = 0;
my $i_first_colon = -1;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
my $typel = $types_to_go[$il];
my $typer = $types_to_go[$ir];
return if ( $typel eq '?' );
return if ( $typer eq '?' );
if ( $typel eq ':' ) { $i_first_colon = $il; last; }
elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
}
# For long ternary chains,
# if the first : we see has its ? is in the interior
# of a preceding line, then see if there are any good
# breakpoints before the ?.
if ( $i_first_colon > 0 ) {
my $i_question = $mate_index_to_go[$i_first_colon];
if ( $i_question > 0 ) {
my @insert_list;
for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
my $token = $tokens_to_go[$ii];
my $type = $types_to_go[$ii];
# For now, a good break is either a comma or,
# in a long chain, a 'return'.
# Patch for RT #126633: added the $nmax>1 check to avoid
# breaking after a return for a simple ternary. For longer
# chains the break after return allows vertical alignment, so
# it is still done. So perltidy -wba='?' will not break
# immediately after the return in the following statement:
# sub x {
# return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
# 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
# }
if (
(
$type eq ','
|| $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
)
&& $self->in_same_container_i( $ii, $i_question )
)
{
push @insert_list, $ii;
last;
}
}
# insert any new break points
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left,
$ri_right );
}
}
}
return;
}
sub insert_breaks_before_list_opening_containers {
my ( $self, $ri_left, $ri_right ) = @_;
# This routine is called once per batch to implement the parameters
# --break-before-hash-brace, etc.
# Nothing to do if none of these parameters has been set
return unless %break_before_container_types;
my $nmax = @{$ri_right} - 1;
return unless ( $nmax >= 0 );
my $rLL = $self->[_rLL_];
my $ris_broken_container = $self->[_ris_broken_container_];
my $rhas_broken_container = $self->[_rhas_broken_container_];
my $rparent_of_seqno = $self->[_rparent_of_seqno_];
# scan the ends of all lines
my @insert_list;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
next unless ( $ir > $il );
my $Kl = $K_to_go[$il];
my $Kr = $K_to_go[$ir];
my $Kend = $Kr;
my $iend = $ir;
my $type_end = $rLL->[$Kr]->[_TYPE_];
# Backup before any side comment
if ( $type_end eq '#' ) {
$Kend = $self->K_previous_nonblank($Kr);
next unless defined($Kend);
$type_end = $rLL->[$Kend]->[_TYPE_];
$iend = $ir + ( $Kend - $Kr );
}
next unless ( $Kl < $Kend - 1 );
my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
next unless ( defined($seqno) );
# Only for types of container tokens with a non-default break option
my $token_end = $rLL->[$Kend]->[_TOKEN_];
my $break_option = $break_before_container_types{$token_end};
next unless ($break_option);
# Require previous nonblank to be certain types (= and =>)
# Note similar coding in sub adjust_container_indentation
my $Kprev = $Kend - 1;
my $prev_type = $rLL->[$Kprev]->[_TYPE_];
if ( $prev_type eq 'b' ) {
$Kprev--;
next if ( $Kprev <= $Kl );
$prev_type = $rLL->[$Kprev]->[_TYPE_];
}
next unless ( $is_equal_or_fat_comma{$prev_type} );
# This must be a list (this will exclude all code blocks)
next unless $self->is_list_by_seqno($seqno);
# Never break a weld
next if ( $self->weld_len_left( $seqno, $token_end ) );
# Final decision is based on selected option:
# Option 1 = stable, try to follow input
my $ok_to_break;
if ( $break_option == 1 ) {
if ( $ir - 2 > $il ) {
$ok_to_break = $old_breakpoint_to_go[ $ir - 2 ];
}
}
# Option 2 = only if complex list, meaning:
# - this list contains a broken container, or
# - this list is contained in a broken list
elsif ( $break_option == 2 ) {
$ok_to_break = $rhas_broken_container->{$seqno};
if ( !$ok_to_break ) {
my $parent = $rparent_of_seqno->{$seqno};
$ok_to_break = $self->is_list_by_seqno($parent);
}
}
# Option 3 = always break
elsif ( $break_option == 3 ) {
$ok_to_break = 1;
}
# Shouldn't happen! Bad flag, but make behavior same as 3
else {
$ok_to_break = 1;
}
next unless ($ok_to_break);
# This meets the criteria, so install a break before the opening token.
my $Kbreak = $self->K_previous_nonblank($Kend);
my $ibreak = $Kbreak - $Kl + $il;
next if ( $ibreak < $il );
next if ( $nobreak_to_go[$ibreak] );
push @insert_list, $ibreak;
}
# insert any new break points
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
return;
}
sub note_added_semicolon {
my ( $self, $line_number ) = @_;
$self->[_last_added_semicolon_at_] = $line_number;
if ( $self->[_added_semicolon_count_] == 0 ) {
$self->[_first_added_semicolon_at_] = $line_number;
}
$self->[_added_semicolon_count_]++;
write_logfile_entry("Added ';' here\n");
return;
}
sub note_deleted_semicolon {
my ( $self, $line_number ) = @_;
$self->[_last_deleted_semicolon_at_] = $line_number;
if ( $self->[_deleted_semicolon_count_] == 0 ) {
$self->[_first_deleted_semicolon_at_] = $line_number;
}
$self->[_deleted_semicolon_count_]++;
write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
return;
}
sub note_embedded_tab {
my ( $self, $line_number ) = @_;
$self->[_embedded_tab_count_]++;
$self->[_last_embedded_tab_at_] = $line_number;
if ( !$self->[_first_embedded_tab_at_] ) {
$self->[_first_embedded_tab_at_] = $line_number;
}
if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
write_logfile_entry("Embedded tabs in quote or pattern\n");
}
return;
}
sub correct_lp_indentation {
# When the -lp option is used, we need to make a last pass through
# each line to correct the indentation positions in case they differ
# from the predictions. This is necessary because perltidy uses a
# predictor/corrector method for aligning with opening parens. The
# predictor is usually good, but sometimes stumbles. The corrector
# tries to patch things up once the actual opening paren locations
# are known.
my ( $self, $ri_first, $ri_last ) = @_;
my $do_not_pad = 0;
# Note on flag '$do_not_pad':
# We want to avoid a situation like this, where the aligner inserts
# whitespace before the '=' to align it with a previous '=', because
# otherwise the parens might become mis-aligned in a situation like
# this, where the '=' has become aligned with the previous line,
# pushing the opening '(' forward beyond where we want it.
#
# $mkFloor::currentRoom = '';
# $mkFloor::c_entry = $c->Entry(
# -width => '10',
# -relief => 'sunken',
# ...
# );
#
# We leave it to the aligner to decide how to do this.
# first remove continuation indentation if appropriate
my $max_line = @{$ri_first} - 1;
# looking at each line of this batch..
my ( $ibeg, $iend );
foreach my $line ( 0 .. $max_line ) {
$ibeg = $ri_first->[$line];
$iend = $ri_last->[$line];
# looking at each token in this output line..
foreach my $i ( $ibeg .. $iend ) {
# How many space characters to place before this token
# for special alignment. Actual padding is done in the
# continue block.
# looking for next unvisited indentation item
my $indentation = $leading_spaces_to_go[$i];
if ( !$indentation->get_marked() ) {
$indentation->set_marked(1);
# looking for indentation item for which we are aligning
# with parens, braces, and brackets
next unless ( $indentation->get_align_paren() );
# skip closed container on this line
if ( $i > $ibeg ) {
my $im = max( $ibeg, $iprev_to_go[$i] );
if ( $type_sequence_to_go[$im]
&& $mate_index_to_go[$im] <= $iend )
{
next;
}
}
if ( $line == 1 && $i == $ibeg ) {
$do_not_pad = 1;
}
# Ok, let's see what the error is and try to fix it
my $actual_pos;
my $predicted_pos = $indentation->get_spaces();
if ( $i > $ibeg ) {
# token is mid-line - use length to previous token
$actual_pos = total_line_length( $ibeg, $i - 1 );
# for mid-line token, we must check to see if all
# additional lines have continuation indentation,
# and remove it if so. Otherwise, we do not get
# good alignment.
my $closing_index = $indentation->get_closed();
if ( $closing_index > $iend ) {
my $ibeg_next = $ri_first->[ $line + 1 ];
if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
$self->undo_lp_ci( $line, $i, $closing_index,
$ri_first, $ri_last );
}
}
}
elsif ( $line > 0 ) {
# handle case where token starts a new line;
# use length of previous line
my $ibegm = $ri_first->[ $line - 1 ];
my $iendm = $ri_last->[ $line - 1 ];
$actual_pos = total_line_length( $ibegm, $iendm );
# follow -pt style
++$actual_pos
if ( $types_to_go[ $iendm + 1 ] eq 'b' );
}
else {
# token is first character of first line of batch
$actual_pos = $predicted_pos;
}
my $move_right = $actual_pos - $predicted_pos;
# done if no error to correct (gnu2.t)
if ( $move_right == 0 ) {
$indentation->set_recoverable_spaces($move_right);
next;
}
# if we have not seen closure for this indentation in
# this batch, we can only pass on a request to the
# vertical aligner
my $closing_index = $indentation->get_closed();
if ( $closing_index < 0 ) {
$indentation->set_recoverable_spaces($move_right);
next;
}
# If necessary, look ahead to see if there is really any
# leading whitespace dependent on this whitespace, and
# also find the longest line using this whitespace.
# Since it is always safe to move left if there are no
# dependents, we only need to do this if we may have
# dependent nodes or need to move right.
my $right_margin = 0;
my $have_child = $indentation->get_have_child();
my %saw_indentation;
my $line_count = 1;
$saw_indentation{$indentation} = $indentation;
if ( $have_child || $move_right > 0 ) {
$have_child = 0;
my $max_length = 0;
if ( $i == $ibeg ) {
$max_length = total_line_length( $ibeg, $iend );
}
# look ahead at the rest of the lines of this batch..
foreach my $line_t ( $line + 1 .. $max_line ) {
my $ibeg_t = $ri_first->[$line_t];
my $iend_t = $ri_last->[$line_t];
last if ( $closing_index <= $ibeg_t );
# remember all different indentation objects
my $indentation_t = $leading_spaces_to_go[$ibeg_t];
$saw_indentation{$indentation_t} = $indentation_t;
$line_count++;
# remember longest line in the group
my $length_t = total_line_length( $ibeg_t, $iend_t );
if ( $length_t > $max_length ) {
$max_length = $length_t;
}
}
$right_margin =
$maximum_line_length[ $levels_to_go[$ibeg] ] -
$max_length;
if ( $right_margin < 0 ) { $right_margin = 0 }
}
my $first_line_comma_count =
grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
my $comma_count = $indentation->get_comma_count();
my $arrow_count = $indentation->get_arrow_count();
# This is a simple approximate test for vertical alignment:
# if we broke just after an opening paren, brace, bracket,
# and there are 2 or more commas in the first line,
# and there are no '=>'s,
# then we are probably vertically aligned. We could set
# an exact flag in sub scan_list, but this is good
# enough.
my $indentation_count = keys %saw_indentation;
my $is_vertically_aligned =
( $i == $ibeg
&& $first_line_comma_count > 1
&& $indentation_count == 1
&& ( $arrow_count == 0 || $arrow_count == $line_count ) );
# Make the move if possible ..
if (
# we can always move left
$move_right < 0
# but we should only move right if we are sure it will
# not spoil vertical alignment
|| ( $comma_count == 0 )
|| ( $comma_count > 0 && !$is_vertically_aligned )
)
{
my $move =
( $move_right <= $right_margin )
? $move_right
: $right_margin;
foreach ( keys %saw_indentation ) {
$saw_indentation{$_}
->permanently_decrease_available_spaces( -$move );
}
}
# Otherwise, record what we want and the vertical aligner
# will try to recover it.
else {
$indentation->set_recoverable_spaces($move_right);
}
}
}
}
return $do_not_pad;
}
sub undo_lp_ci {
# If there is a single, long parameter within parens, like this:
#
# $self->command( "/msg "
# . $infoline->chan
# . " You said $1, but did you know that it's square was "
# . $1 * $1 . " ?" );
#
# we can remove the continuation indentation of the 2nd and higher lines
# to achieve this effect, which is more pleasing:
#
# $self->command("/msg "
# . $infoline->chan
# . " You said $1, but did you know that it's square was "
# . $1 * $1 . " ?");
my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
@_;
my $max_line = @{$ri_first} - 1;
# must be multiple lines
return unless $max_line > $line_open;
my $lev_start = $levels_to_go[$i_start];
my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
# see if all additional lines in this container have continuation
# indentation
my $n;
my $line_1 = 1 + $line_open;
for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
my $ibeg = $ri_first->[$n];
my $iend = $ri_last->[$n];
if ( $ibeg eq $closing_index ) { $n--; last }
return if ( $lev_start != $levels_to_go[$ibeg] );
return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
last if ( $closing_index <= $iend );
}
# we can reduce the indentation of all continuation lines
my $continuation_line_count = $n - $line_open;
@ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
(0) x ($continuation_line_count);
@leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
@reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
return;
}
###############################################
# CODE SECTION 10: Code to break long statments
###############################################
sub set_continuation_breaks {
# Called once per batch to set breaks in long lines.
# Define an array of indexes for inserting newline characters to
# keep the line lengths below the maximum desired length. There is
# an implied break after the last token, so it need not be included.
# Method:
# This routine is part of series of routines which adjust line
# lengths. It is only called if a statement is longer than the
# maximum line length, or if a preliminary scanning located
# desirable break points. Sub scan_list has already looked at
# these tokens and set breakpoints (in array
# $forced_breakpoint_to_go[$i]) where it wants breaks (for example
# after commas, after opening parens, and before closing parens).
# This routine will honor these breakpoints and also add additional
# breakpoints as necessary to keep the line length below the maximum
# requested. It bases its decision on where the 'bond strength' is
# lowest.
# Output: returns references to the arrays:
# @i_first
# @i_last
# which contain the indexes $i of the first and last tokens on each
# line.
# In addition, the array:
# $forced_breakpoint_to_go[$i]
# may be updated to be =1 for any index $i after which there must be
# a break. This signals later routines not to undo the breakpoint.
my ( $self, $saw_good_break, $rcolon_list ) = @_;
# @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
# order.
use constant DEBUG_BREAKPOINTS => 0;
my @i_first = (); # the first index to output
my @i_last = (); # the last index to output
my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
my $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
$self->set_bond_strengths();
my $imin = 0;
my $imax = $max_index_to_go;
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
my $i_begin = $imin; # index for starting next iteration
my $leading_spaces = leading_spaces_to_go($imin);
my $line_count = 0;
my $last_break_strength = NO_BREAK;
my $i_last_break = -1;
my $max_bias = 0.001;
my $tiny_bias = 0.0001;
my $leading_alignment_token = "";
my $leading_alignment_type = "";
# see if any ?/:'s are in order
my $colons_in_order = 1;
my $last_tok = "";
foreach ( @{$rcolon_list} ) {
if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
$last_tok = $_;
}
# This is a sufficient but not necessary condition for colon chain
my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
#-------------------------------------------------------
# BEGINNING of main loop to set continuation breakpoints
# Keep iterating until we reach the end
#-------------------------------------------------------
while ( $i_begin <= $imax ) {
my $lowest_strength = NO_BREAK;
my $starting_sum = $summed_lengths_to_go[$i_begin];
my $i_lowest = -1;
my $i_test = -1;
my $lowest_next_token = '';
my $lowest_next_type = 'b';
my $i_lowest_next_nonblank = -1;
my $maximum_line_length =
$maximum_line_length[ $levels_to_go[$i_begin] ];
#-------------------------------------------------------
# BEGINNING of inner loop to find the best next breakpoint
#-------------------------------------------------------
my $strength = NO_BREAK;
for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
my $type = $types_to_go[$i_test];
my $token = $tokens_to_go[$i_test];
my $next_type = $types_to_go[ $i_test + 1 ];
my $next_token = $tokens_to_go[ $i_test + 1 ];
my $i_next_nonblank = $inext_to_go[$i_test];
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
# adjustments to the previous bond strength may have been made, and
# we must keep the bond strength of a token and its following blank
# the same;
my $last_strength = $strength;
$strength = $bond_strength_to_go[$i_test];
if ( $type eq 'b' ) { $strength = $last_strength }
# use old breaks as a tie-breaker. For example to
# prevent blinkers with -pbp in this code:
##@keywords{
## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
## = ();
# At the same time try to prevent a leading * in this code
# with the default formatting:
#
## return
## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
## * ( $x**( $a - 1 ) )
## * ( ( 1 - $x )**( $b - 1 ) );
# reduce strength a bit to break ties at an old breakpoint ...
if (
$old_breakpoint_to_go[$i_test]
# which is a 'good' breakpoint, meaning ...
# we don't want to break before it
&& !$want_break_before{$type}
# and either we want to break before the next token
# or the next token is not short (i.e. not a '*', '/' etc.)
&& $i_next_nonblank <= $imax
&& ( $want_break_before{$next_nonblank_type}
|| $token_lengths_to_go[$i_next_nonblank] > 2
|| $next_nonblank_type eq ','
|| $is_opening_type{$next_nonblank_type} )
)
{
$strength -= $tiny_bias;
}
# otherwise increase strength a bit if this token would be at the
# maximum line length. This is necessary to avoid blinking
# in the above example when the -iob flag is added.
else {
my $len =
$leading_spaces +
$summed_lengths_to_go[ $i_test + 1 ] -
$starting_sum;
if ( $len >= $maximum_line_length ) {
$strength += $tiny_bias;
}
}
my $must_break = 0;
# Force an immediate break at certain operators
# with lower level than the start of the line,
# unless we've already seen a better break.
#
##############################################
# Note on an issue with a preceding ?
##############################################
# We don't include a ? in the above list, but there may
# be a break at a previous ? if the line is long.
# Because of this we do not want to force a break if
# there is a previous ? on this line. For now the best way
# to do this is to not break if we have seen a lower strength
# point, which is probably a ?.
#
# Example of unwanted breaks we are avoiding at a '.' following a ?
# from pod2html using perltidy -gnu:
# )
# ? "\n<A NAME=\""
# . $value
# . "\">\n$text</A>\n"
# : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
if (
( $strength <= $lowest_strength )
&& ( $nesting_depth_to_go[$i_begin] >
$nesting_depth_to_go[$i_next_nonblank] )
&& (
$next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
|| ( $next_nonblank_type eq 'k'
&& $next_nonblank_token =~ /^(and|or)$/ )
)
)
{
$self->set_forced_breakpoint($i_next_nonblank);
}
if (
# Try to put a break where requested by scan_list
$forced_breakpoint_to_go[$i_test]
# break between ) { in a continued line so that the '{' can
# be outdented
# See similar logic in scan_list which catches instances
# where a line is just something like ') {'. We have to
# be careful because the corresponding block keyword might
# not be on the first line, such as 'for' here:
#
# eval {
# for ("a") {
# for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
# }
# };
#
|| (
$line_count
&& ( $token eq ')' )
&& ( $next_nonblank_type eq '{' )
&& ($next_nonblank_block_type)
&& ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
# RT #104427: Dont break before opening sub brace because
# sub block breaks handled at higher level, unless
# it looks like the preceding list is long and broken
&& !(
$next_nonblank_block_type =~ /$ANYSUB_PATTERN/
&& ( $nesting_depth_to_go[$i_begin] ==
$nesting_depth_to_go[$i_next_nonblank] )
)
&& !$rOpts->{'opening-brace-always-on-right'}
)
# There is an implied forced break at a terminal opening brace
|| ( ( $type eq '{' ) && ( $i_test == $imax ) )
)
{
# Forced breakpoints must sometimes be overridden, for example
# because of a side comment causing a NO_BREAK. It is easier
# to catch this here than when they are set.
if ( $strength < NO_BREAK - 1 ) {
$strength = $lowest_strength - $tiny_bias;
$must_break = 1;
}
}
# quit if a break here would put a good terminal token on
# the next line and we already have a possible break
if (
!$must_break
&& ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
&& (
(
$leading_spaces +
$summed_lengths_to_go[ $i_next_nonblank + 1 ] -
$starting_sum
) > $maximum_line_length
)
)
{
last if ( $i_lowest >= 0 );
}
# Avoid a break which would strand a single punctuation
# token. For example, we do not want to strand a leading
# '.' which is followed by a long quoted string.
# But note that we do want to do this with -extrude (l=1)
# so please test any changes to this code on -extrude.
if (
!$must_break
&& ( $i_test == $i_begin )
&& ( $i_test < $imax )
&& ( $token eq $type )
&& (
(
$leading_spaces +
$summed_lengths_to_go[ $i_test + 1 ] -
$starting_sum
) < $maximum_line_length
)
)
{
$i_test = min( $imax, $inext_to_go[$i_test] );
redo;
}
if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
{
# break at previous best break if it would have produced
# a leading alignment of certain common tokens, and it
# is different from the latest candidate break
last
if ($leading_alignment_type);
# Force at least one breakpoint if old code had good
# break It is only called if a breakpoint is required or
# desired. This will probably need some adjustments
# over time. A goal is to try to be sure that, if a new
# side comment is introduced into formatted text, then
# the same breakpoints will occur. scbreak.t
last
if (
$i_test == $imax # we are at the end
&& !get_forced_breakpoint_count()
&& $saw_good_break # old line had good break
&& $type =~ /^[#;\{]$/ # and this line ends in
# ';' or side comment
&& $i_last_break < 0 # and we haven't made a break
&& $i_lowest >= 0 # and we saw a possible break
&& $i_lowest < $imax - 1 # (but not just before this ;)
&& $strength - $lowest_strength < 0.5 * WEAK # and it's good
);
# Do not skip past an important break point in a short final
# segment. For example, without this check we would miss the
# break at the final / in the following code:
#
# $depth_stop =
# ( $tau * $mass_pellet * $q_0 *
# ( 1. - exp( -$t_stop / $tau ) ) -
# 4. * $pi * $factor * $k_ice *
# ( $t_melt - $t_ice ) *
# $r_pellet *
# $t_stop ) /
# ( $rho_ice * $Qs * $pi * $r_pellet**2 );
#
if (
$line_count > 2
&& $i_lowest >= 0 # and we saw a possible break
&& $i_lowest < $i_test
&& $i_test > $imax - 2
&& $nesting_depth_to_go[$i_begin] >
$nesting_depth_to_go[$i_lowest]
&& $lowest_strength < $last_break_strength - .5 * WEAK
)
{
# Make this break for math operators for now
my $ir = $inext_to_go[$i_lowest];
my $il = $iprev_to_go[$ir];
last
if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
|| $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
}
# Update the minimum bond strength location
$lowest_strength = $strength;
$i_lowest = $i_test;
$lowest_next_token = $next_nonblank_token;
$lowest_next_type = $next_nonblank_type;
$i_lowest_next_nonblank = $i_next_nonblank;
last if $must_break;
# set flags to remember if a break here will produce a
# leading alignment of certain common tokens
if ( $line_count > 0
&& $i_test < $imax
&& ( $lowest_strength - $last_break_strength <= $max_bias )
)
{
my $i_last_end = $iprev_to_go[$i_begin];
my $tok_beg = $tokens_to_go[$i_begin];
my $type_beg = $types_to_go[$i_begin];
if (
# check for leading alignment of certain tokens
(
$tok_beg eq $next_nonblank_token
&& $is_chain_operator{$tok_beg}
&& ( $type_beg eq 'k'
|| $type_beg eq $tok_beg )
&& $nesting_depth_to_go[$i_begin] >=
$nesting_depth_to_go[$i_next_nonblank]
)
|| ( $tokens_to_go[$i_last_end] eq $token
&& $is_chain_operator{$token}
&& ( $type eq 'k' || $type eq $token )
&& $nesting_depth_to_go[$i_last_end] >=
$nesting_depth_to_go[$i_test] )
)
{
$leading_alignment_token = $next_nonblank_token;
$leading_alignment_type = $next_nonblank_type;
}
}
}
my $too_long = ( $i_test >= $imax );
if ( !$too_long ) {
my $next_length =
$leading_spaces +
$summed_lengths_to_go[ $i_test + 2 ] -
$starting_sum;
$too_long = $next_length > $maximum_line_length;
# To prevent blinkers we will avoid leaving a token exactly at
# the line length limit unless it is the last token or one of
# several "good" types.
#
# The following code was a blinker with -pbp before this
# modification:
## $last_nonblank_token eq '('
## && $is_indirect_object_taker{ $paren_type
## [$paren_depth] }
# The issue causing the problem is that if the
# term [$paren_depth] gets broken across a line then
# the whitespace routine doesn't see both opening and closing
# brackets and will format like '[ $paren_depth ]'. This
# leads to an oscillation in length depending if we break
# before the closing bracket or not.
if ( !$too_long
&& $i_test + 1 < $imax
&& $next_nonblank_type ne ','
&& !$is_closing_type{$next_nonblank_type} )
{
$too_long = $next_length >= $maximum_line_length;
}
}
DEBUG_BREAKPOINTS && do {
my $ltok = $token;
my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
my $i_testp2 = $i_test + 2;
if ( $i_testp2 > $max_index_to_go + 1 ) {
$i_testp2 = $max_index_to_go + 1;
}
if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
print STDOUT
"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
};
# allow one extra terminal token after exceeding line length
# if it would strand this token.
if ( $rOpts_fuzzy_line_length
&& $too_long
&& $i_lowest == $i_test
&& $token_lengths_to_go[$i_test] > 1
&& ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
)
{
$too_long = 0;
}
# we are done if...
last
if (
# ... no more space and we have a break
$too_long && $i_lowest >= 0
# ... or no more tokens
|| $i_test == $imax
);
}
#-------------------------------------------------------
# END of inner loop to find the best next breakpoint
# Now decide exactly where to put the breakpoint
#-------------------------------------------------------
# it's always ok to break at imax if no other break was found
if ( $i_lowest < 0 ) { $i_lowest = $imax }
# semi-final index calculation
my $i_next_nonblank = $inext_to_go[$i_lowest];
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
#-------------------------------------------------------
# ?/: rule 1 : if a break here will separate a '?' on this
# line from its closing ':', then break at the '?' instead.
#-------------------------------------------------------
foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
next unless ( $tokens_to_go[$i] eq '?' );
# do not break if probable sequence of ?/: statements
next if ($is_colon_chain);
# do not break if statement is broken by side comment
next
if ( $tokens_to_go[$max_index_to_go] eq '#'
&& terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ );
# no break needed if matching : is also on the line
next
if ( $mate_index_to_go[$i] >= 0
&& $mate_index_to_go[$i] <= $i_next_nonblank );
$i_lowest = $i;
if ( $want_break_before{'?'} ) { $i_lowest-- }
last;
}
#-------------------------------------------------------
# END of inner loop to find the best next breakpoint:
# Break the line after the token with index i=$i_lowest
#-------------------------------------------------------
# final index calculation
$i_next_nonblank = $inext_to_go[$i_lowest];
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
DEBUG_BREAKPOINTS
&& print STDOUT
"BREAK: best is i = $i_lowest strength = $lowest_strength\n";
#-------------------------------------------------------
# ?/: rule 2 : if we break at a '?', then break at its ':'
#
# Note: this rule is also in sub scan_list to handle a break
# at the start and end of a line (in case breaks are dictated
# by side comments).
#-------------------------------------------------------
if ( $next_nonblank_type eq '?' ) {
$self->set_closing_breakpoint($i_next_nonblank);
}
elsif ( $types_to_go[$i_lowest] eq '?' ) {
$self->set_closing_breakpoint($i_lowest);
}
#-------------------------------------------------------
# ?/: rule 3 : if we break at a ':' then we save
# its location for further work below. We may need to go
# back and break at its '?'.
#-------------------------------------------------------
if ( $next_nonblank_type eq ':' ) {
push @i_colon_breaks, $i_next_nonblank;
}
elsif ( $types_to_go[$i_lowest] eq ':' ) {
push @i_colon_breaks, $i_lowest;
}
# here we should set breaks for all '?'/':' pairs which are
# separated by this line
$line_count++;
# save this line segment, after trimming blanks at the ends
push( @i_first,
( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
push( @i_last,
( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
# set a forced breakpoint at a container opening, if necessary, to
# signal a break at a closing container. Excepting '(' for now.
if (
(
$tokens_to_go[$i_lowest] eq '{'
|| $tokens_to_go[$i_lowest] eq '['
)
&& !$forced_breakpoint_to_go[$i_lowest]
)
{
$self->set_closing_breakpoint($i_lowest);
}
# get ready to go again
$i_begin = $i_lowest + 1;
$last_break_strength = $lowest_strength;
$i_last_break = $i_lowest;
$leading_alignment_token = "";
$leading_alignment_type = "";
$lowest_next_token = '';
$lowest_next_type = 'b';
if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
$i_begin++;
}
# update indentation size
if ( $i_begin <= $imax ) {
$leading_spaces = leading_spaces_to_go($i_begin);
}
}
#-------------------------------------------------------
# END of main loop to set continuation breakpoints
# Now go back and make any necessary corrections
#-------------------------------------------------------
#-------------------------------------------------------
# ?/: rule 4 -- if we broke at a ':', then break at
# corresponding '?' unless this is a chain of ?: expressions
#-------------------------------------------------------
if (@i_colon_breaks) {
# using a simple method for deciding if we are in a ?/: chain --
# this is a chain if it has multiple ?/: pairs all in order;
# otherwise not.
# Note that if line starts in a ':' we count that above as a break
my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
unless ($is_chain) {
my @insert_list = ();
foreach (@i_colon_breaks) {
my $i_question = $mate_index_to_go[$_];
if ( $i_question >= 0 ) {
if ( $want_break_before{'?'} ) {
$i_question = $iprev_to_go[$i_question];
}
if ( $i_question >= 0 ) {
push @insert_list, $i_question;
}
}
$self->insert_additional_breaks( \@insert_list, \@i_first,
\@i_last );
}
}
}
return ( \@i_first, \@i_last );
}
###########################################
# CODE SECTION 11: Code to break long lists
###########################################
{ ## begin closure scan_list
# These routines and variables are involved in finding good
# places to break long lists.
my (
$block_type, $current_depth,
$depth, $i,
$i_last_nonblank_token, $last_colon_sequence_number,
$last_nonblank_token, $last_nonblank_type,
$last_nonblank_block_type, $last_old_breakpoint_count,
$minimum_depth, $next_nonblank_block_type,
$next_nonblank_token, $next_nonblank_type,
$old_breakpoint_count, $starting_breakpoint_count,
$starting_depth, $token,
$type, $type_sequence,
);
my (
@breakpoint_stack, @breakpoint_undo_stack,
@comma_index, @container_type,
@identifier_count_stack, @index_before_arrow,
@interrupted_list, @item_count_stack,
@last_comma_index, @last_dot_index,
@last_nonblank_type, @old_breakpoint_count_stack,
@opening_structure_index_stack, @rfor_semicolon_list,
@has_old_logical_breakpoints, @rand_or_list,
@i_equals,
);
# these arrays must retain values between calls
my ( @has_broken_sublist, @dont_align, @want_comma_break );
sub initialize_scan_list {
@dont_align = ();
@has_broken_sublist = ();
@want_comma_break = ();
return;
}
# routine to define essential variables when we go 'up' to
# a new depth
sub check_for_new_minimum_depth {
my $depth = shift;
if ( $depth < $minimum_depth ) {
$minimum_depth = $depth;
# these arrays need not retain values between calls
$breakpoint_stack[$depth] = $starting_breakpoint_count;
$container_type[$depth] = "";
$identifier_count_stack[$depth] = 0;
$index_before_arrow[$depth] = -1;
$interrupted_list[$depth] = 1;
$item_count_stack[$depth] = 0;
$last_nonblank_type[$depth] = "";
$opening_structure_index_stack[$depth] = -1;
$breakpoint_undo_stack[$depth] = undef;
$comma_index[$depth] = undef;
$last_comma_index[$depth] = undef;
$last_dot_index[$depth] = undef;
$old_breakpoint_count_stack[$depth] = undef;
$has_old_logical_breakpoints[$depth] = 0;
$rand_or_list[$depth] = [];
$rfor_semicolon_list[$depth] = [];
$i_equals[$depth] = -1;
# these arrays must retain values between calls
if ( !defined( $has_broken_sublist[$depth] ) ) {
$dont_align[$depth] = 0;
$has_broken_sublist[$depth] = 0;
$want_comma_break[$depth] = 0;
}
}
return;
}
# routine to decide which commas to break at within a container;
# returns:
# $bp_count = number of comma breakpoints set
# $do_not_break_apart = a flag indicating if container need not
# be broken open
sub set_comma_breakpoints {
my ( $self, $dd ) = @_;
my $bp_count = 0;
my $do_not_break_apart = 0;
# anything to do?
if ( $item_count_stack[$dd] ) {
# handle commas not in containers...
if ( $dont_align[$dd] ) {
$self->do_uncontained_comma_breaks($dd);
}
# handle commas within containers...
else {
my $fbc = get_forced_breakpoint_count();
# always open comma lists not preceded by keywords,
# barewords, identifiers (that is, anything that doesn't
# look like a function call)
my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
$self->set_comma_breakpoints_do(
{
depth => $dd,
i_opening_paren => $opening_structure_index_stack[$dd],
i_closing_paren => $i,
item_count => $item_count_stack[$dd],
identifier_count => $identifier_count_stack[$dd],
rcomma_index => $comma_index[$dd],
next_nonblank_type => $next_nonblank_type,
list_type => $container_type[$dd],
interrupted => $interrupted_list[$dd],
rdo_not_break_apart => \$do_not_break_apart,
must_break_open => $must_break_open,
has_broken_sublist => $has_broken_sublist[$dd],
}
);
$bp_count = get_forced_breakpoint_count() - $fbc;
$do_not_break_apart = 0 if $must_break_open;
}
}
return ( $bp_count, $do_not_break_apart );
}
sub do_uncontained_comma_breaks {
# Handle commas not in containers...
# This is a catch-all routine for commas that we
# don't know what to do with because the don't fall
# within containers. We will bias the bond strength
# to break at commas which ended lines in the input
# file. This usually works better than just trying
# to put as many items on a line as possible. A
# downside is that if the input file is garbage it
# won't work very well. However, the user can always
# prevent following the old breakpoints with the
# -iob flag.
my ( $self, $dd ) = @_;
my $bias = -.01;
my $old_comma_break_count = 0;
foreach my $ii ( @{ $comma_index[$dd] } ) {
if ( $old_breakpoint_to_go[$ii] ) {
$old_comma_break_count++;
$bond_strength_to_go[$ii] = $bias;
# reduce bias magnitude to force breaks in order
$bias *= 0.99;
}
}
# Also put a break before the first comma if
# (1) there was a break there in the input, and
# (2) there was exactly one old break before the first comma break
# (3) OLD: there are multiple old comma breaks
# (3) NEW: there are one or more old comma breaks (see return example)
#
# For example, we will follow the user and break after
# 'print' in this snippet:
# print
# "conformability (Not the same dimension)\n",
# "\t", $have, " is ", text_unit($hu), "\n",
# "\t", $want, " is ", text_unit($wu), "\n",
# ;
#
# Another example, just one comma, where we will break after
# the return:
# return
# $x * cos($a) - $y * sin($a),
# $x * sin($a) + $y * cos($a);
# Breaking a print statement:
# print SAVEOUT
# ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
# ( $? & 128 ) ? " -- core dumped" : "", "\n";
#
# But we will not force a break after the opening paren here
# (causes a blinker):
# $heap->{stream}->set_output_filter(
# poe::filter::reference->new('myotherfreezer') ),
# ;
#
my $i_first_comma = $comma_index[$dd]->[0];
if ( $old_breakpoint_to_go[$i_first_comma] ) {
my $level_comma = $levels_to_go[$i_first_comma];
my $ibreak = -1;
my $obp_count = 0;
for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
if ( $old_breakpoint_to_go[$ii] ) {
$obp_count++;
last if ( $obp_count > 1 );
$ibreak = $ii
if ( $levels_to_go[$ii] == $level_comma );
}
}
# Changed rule from multiple old commas to just one here:
if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
{
# Do not to break before an opening token because
# it can lead to "blinkers".
my $ibreakm = $ibreak;
$ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
{
$self->set_forced_breakpoint($ibreak);
}
}
}
return;
}
my %is_logical_container;
my %quick_filter;
BEGIN {
my @q = qw# if elsif unless while and or err not && | || ? : ! #;
@is_logical_container{@q} = (1) x scalar(@q);
# This filter will allow most tokens to skip past a section of code
%quick_filter = %is_assignment;
@q = qw# => . ; < > ~ #;
push @q, ',';
@quick_filter{@q} = (1) x scalar(@q);
}
sub set_for_semicolon_breakpoints {
my ( $self, $dd ) = @_;
foreach ( @{ $rfor_semicolon_list[$dd] } ) {
$self->set_forced_breakpoint($_);
}
return;
}
sub set_logical_breakpoints {
my ( $self, $dd ) = @_;
if (
$item_count_stack[$dd] == 0
&& $is_logical_container{ $container_type[$dd] }
|| $has_old_logical_breakpoints[$dd]
)
{
# Look for breaks in this order:
# 0 1 2 3
# or and || &&
foreach my $i ( 0 .. 3 ) {
if ( $rand_or_list[$dd][$i] ) {
foreach ( @{ $rand_or_list[$dd][$i] } ) {
$self->set_forced_breakpoint($_);
}
# break at any 'if' and 'unless' too
foreach ( @{ $rand_or_list[$dd][4] } ) {
$self->set_forced_breakpoint($_);
}
$rand_or_list[$dd] = [];
last;
}
}
}
return;
}
sub is_unbreakable_container {
# never break a container of one of these types
# because bad things can happen (map1.t)
my $dd = shift;
return $is_sort_map_grep{ $container_type[$dd] };
}
sub scan_list {
my ( $self, $is_long_line ) = @_;
# This routine is responsible for setting line breaks for all lists,
# so that hierarchical structure can be displayed and so that list
# items can be vertically aligned. The output of this routine is
# stored in the array @forced_breakpoint_to_go, which is used to set
# final breakpoints.
# It is called once per batch if the batch is a list.
my $rOpts_break_at_old_attribute_breakpoints =
$rOpts->{'break-at-old-attribute-breakpoints'};
my $rOpts_break_at_old_keyword_breakpoints =
$rOpts->{'break-at-old-keyword-breakpoints'};
my $rOpts_break_at_old_logical_breakpoints =
$rOpts->{'break-at-old-logical-breakpoints'};
my $rOpts_break_at_old_method_breakpoints =
$rOpts->{'break-at-old-method-breakpoints'};
my $rOpts_break_at_old_ternary_breakpoints =
$rOpts->{'break-at-old-ternary-breakpoints'};
$starting_depth = $nesting_depth_to_go[0];
$block_type = ' ';
$current_depth = $starting_depth;
$i = -1;
$last_colon_sequence_number = -1;
$last_nonblank_token = ';';
$last_nonblank_type = ';';
$last_nonblank_block_type = ' ';
$last_old_breakpoint_count = 0;
$minimum_depth = $current_depth + 1; # forces update in check below
$old_breakpoint_count = 0;
$starting_breakpoint_count = get_forced_breakpoint_count();
$token = ';';
$type = ';';
$type_sequence = '';
my $total_depth_variation = 0;
my $i_old_assignment_break;
my $depth_last = $starting_depth;
check_for_new_minimum_depth($current_depth);
my $want_previous_breakpoint = -1;
my $saw_good_breakpoint;
my $i_line_end = -1;
my $i_line_start = -1;
# loop over all tokens in this batch
while ( ++$i <= $max_index_to_go ) {
if ( $type ne 'b' ) {
$i_last_nonblank_token = $i - 1;
$last_nonblank_type = $type;
$last_nonblank_token = $token;
$last_nonblank_block_type = $block_type;
} ## end if ( $type ne 'b' )
$type = $types_to_go[$i];
$block_type = $block_type_to_go[$i];
$token = $tokens_to_go[$i];
$type_sequence = $type_sequence_to_go[$i];
my $next_type = $types_to_go[ $i + 1 ];
my $next_token = $tokens_to_go[ $i + 1 ];
my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
$next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
# set break if flag was set
if ( $want_previous_breakpoint >= 0 ) {
$self->set_forced_breakpoint($want_previous_breakpoint);
$want_previous_breakpoint = -1;
}
$last_old_breakpoint_count = $old_breakpoint_count;
if ( $old_breakpoint_to_go[$i] ) {
$i_line_end = $i;
$i_line_start = $i_next_nonblank;
$old_breakpoint_count++;
# Break before certain keywords if user broke there and
# this is a 'safe' break point. The idea is to retain
# any preferred breaks for sequential list operations,
# like a schwartzian transform.
if ($rOpts_break_at_old_keyword_breakpoints) {
if (
$next_nonblank_type eq 'k'
&& $is_keyword_returning_list{$next_nonblank_token}
&& ( $type =~ /^[=\)\]\}Riw]$/
|| $type eq 'k'
&& $is_keyword_returning_list{$token} )
)
{
# we actually have to set this break next time through
# the loop because if we are at a closing token (such
# as '}') which forms a one-line block, this break might
# get undone.
$want_previous_breakpoint = $i;
} ## end if ( $next_nonblank_type...)
} ## end if ($rOpts_break_at_old_keyword_breakpoints)
# Break before attributes if user broke there
if ($rOpts_break_at_old_attribute_breakpoints) {
if ( $next_nonblank_type eq 'A' ) {
$want_previous_breakpoint = $i;
}
}
# remember an = break as possible good break point
if ( $is_assignment{$type} ) {
$i_old_assignment_break = $i;
}
elsif ( $is_assignment{$next_nonblank_type} ) {
$i_old_assignment_break = $i_next_nonblank;
}
} ## end if ( $old_breakpoint_to_go...)
next if ( $type eq 'b' );
$depth = $nesting_depth_to_go[ $i + 1 ];
$total_depth_variation += abs( $depth - $depth_last );
$depth_last = $depth;
# safety check - be sure we always break after a comment
# Shouldn't happen .. an error here probably means that the
# nobreak flag did not get turned off correctly during
# formatting.
if ( $type eq '#' ) {
if ( $i != $max_index_to_go ) {
warning(
"Non-fatal program bug: backup logic required to break after a comment\n"
);
report_definite_bug();
$nobreak_to_go[$i] = 0;
$self->set_forced_breakpoint($i);
} ## end if ( $i != $max_index_to_go)
} ## end if ( $type eq '#' )
# Force breakpoints at certain tokens in long lines.
# Note that such breakpoints will be undone later if these tokens
# are fully contained within parens on a line.
if (
# break before a keyword within a line
$type eq 'k'
&& $i > 0
# if one of these keywords:
# /^(if|unless|while|until|for)$/
&& $is_if_unless_while_until_for{$token}
# but do not break at something like '1 while'
&& ( $last_nonblank_type ne 'n' || $i > 2 )
# and let keywords follow a closing 'do' brace
&& $last_nonblank_block_type ne 'do'
&& (
$is_long_line
# or container is broken (by side-comment, etc)
|| ( $next_nonblank_token eq '('
&& $mate_index_to_go[$i_next_nonblank] < $i )
)
)
{
$self->set_forced_breakpoint( $i - 1 );
} ## end if ( $type eq 'k' && $i...)
# remember locations of -> if this is a pre-broken method chain
if ( $type eq '->' ) {
if ($rOpts_break_at_old_method_breakpoints) {
# Case 1: look for lines with leading pointers
if ( $i == $i_line_start ) {
$self->set_forced_breakpoint( $i - 1 );
}
# Case 2: look for cuddled pointer calls
else {
# look for old lines with leading ')->' or ') ->'
# and, when found, force a break before the
# opening paren and after the previous closing paren.
if (
$i_line_start >= 0
&& $types_to_go[$i_line_start] eq '}'
&& ( $i == $i_line_start + 1
|| $i == $i_line_start + 2
&& $types_to_go[ $i - 1 ] eq 'b' )
)
{
$self->set_forced_breakpoint( $i_line_start - 1 );
$self->set_forced_breakpoint(
$mate_index_to_go[$i_line_start] );
}
}
}
} ## end if ( $type eq '->' )
# remember locations of '||' and '&&' for possible breaks if we
# decide this is a long logical expression.
elsif ( $type eq '||' ) {
push @{ $rand_or_list[$depth][2] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
} ## end elsif ( $type eq '||' )
elsif ( $type eq '&&' ) {
push @{ $rand_or_list[$depth][3] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
} ## end elsif ( $type eq '&&' )
elsif ( $type eq 'f' ) {
push @{ $rfor_semicolon_list[$depth] }, $i;
}
elsif ( $type eq 'k' ) {
if ( $token eq 'and' ) {
push @{ $rand_or_list[$depth][1] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
} ## end if ( $token eq 'and' )
# break immediately at 'or's which are probably not in a logical
# block -- but we will break in logical breaks below so that
# they do not add to the forced_breakpoint_count
elsif ( $token eq 'or' ) {
push @{ $rand_or_list[$depth][0] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
if ( $is_logical_container{ $container_type[$depth] } ) {
}
else {
if ($is_long_line) { $self->set_forced_breakpoint($i) }
elsif ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints )
{
$saw_good_breakpoint = 1;
}
} ## end else [ if ( $is_logical_container...)]
} ## end elsif ( $token eq 'or' )
elsif ( $token eq 'if' || $token eq 'unless' ) {
push @{ $rand_or_list[$depth][4] }, $i;
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints )
{
$self->set_forced_breakpoint($i);
}
} ## end elsif ( $token eq 'if' ||...)
} ## end elsif ( $type eq 'k' )
elsif ( $is_assignment{$type} ) {
$i_equals[$depth] = $i;
}
if ($type_sequence) {
# handle any postponed closing breakpoints
if ( $is_closing_sequence_token{$token} ) {
if ( $type eq ':' ) {
$last_colon_sequence_number = $type_sequence;
# retain break at a ':' line break
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_ternary_breakpoints )
{
$self->set_forced_breakpoint($i);
# break at previous '='
if ( $i_equals[$depth] > 0 ) {
$self->set_forced_breakpoint(
$i_equals[$depth] );
$i_equals[$depth] = -1;
}
} ## end if ( ( $i == $i_line_start...))
} ## end if ( $type eq ':' )
if ( has_postponed_breakpoint($type_sequence) ) {
my $inc = ( $type eq ':' ) ? 0 : 1;
$self->set_forced_breakpoint( $i - $inc );
}
} ## end if ( $is_closing_sequence_token{$token} )
# set breaks at ?/: if they will get separated (and are
# not a ?/: chain), or if the '?' is at the end of the
# line
elsif ( $token eq '?' ) {
my $i_colon = $mate_index_to_go[$i];
if (
$i_colon <= 0 # the ':' is not in this batch
|| $i == 0 # this '?' is the first token of the line
|| $i ==
$max_index_to_go # or this '?' is the last token
)
{
# don't break at a '?' if preceded by ':' on
# this line of previous ?/: pair on this line.
# This is an attempt to preserve a chain of ?/:
# expressions (elsif2.t). And don't break if
# this has a side comment.
$self->set_forced_breakpoint($i)
unless (
$type_sequence == (
$last_colon_sequence_number +
TYPE_SEQUENCE_INCREMENT
)
|| $tokens_to_go[$max_index_to_go] eq '#'
);
$self->set_closing_breakpoint($i);
} ## end if ( $i_colon <= 0 ||...)
} ## end elsif ( $token eq '?' )
} ## end if ($type_sequence)
#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
#------------------------------------------------------------
# Handle Increasing Depth..
#
# prepare for a new list when depth increases
# token $i is a '(','{', or '['
#------------------------------------------------------------
if ( $depth > $current_depth ) {
$breakpoint_stack[$depth] = get_forced_breakpoint_count();
$breakpoint_undo_stack[$depth] =
get_forced_breakpoint_undo_count();
$has_broken_sublist[$depth] = 0;
$identifier_count_stack[$depth] = 0;
$index_before_arrow[$depth] = -1;
$interrupted_list[$depth] = 0;
$item_count_stack[$depth] = 0;
$last_comma_index[$depth] = undef;
$last_dot_index[$depth] = undef;
$last_nonblank_type[$depth] = $last_nonblank_type;
$old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
$opening_structure_index_stack[$depth] = $i;
$rand_or_list[$depth] = [];
$rfor_semicolon_list[$depth] = [];
$i_equals[$depth] = -1;
$want_comma_break[$depth] = 0;
$container_type[$depth] =
# k => && || ? : .
$is_container_label_type{$last_nonblank_type}
? $last_nonblank_token
: "";
$has_old_logical_breakpoints[$depth] = 0;
# if line ends here then signal closing token to break
if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
{
$self->set_closing_breakpoint($i);
}
# Not all lists of values should be vertically aligned..
$dont_align[$depth] =
# code BLOCKS are handled at a higher level
( $block_type ne "" )
# certain paren lists
|| ( $type eq '(' ) && (
# it does not usually look good to align a list of
# identifiers in a parameter list, as in:
# my($var1, $var2, ...)
# (This test should probably be refined, for now I'm just
# testing for any keyword)
( $last_nonblank_type eq 'k' )
# a trailing '(' usually indicates a non-list
|| ( $next_nonblank_type eq '(' )
);
# patch to outdent opening brace of long if/for/..
# statements (like this one). See similar coding in
# set_continuation breaks. We have also catch it here for
# short line fragments which otherwise will not go through
# set_continuation_breaks.
if (
$block_type
# if we have the ')' but not its '(' in this batch..
&& ( $last_nonblank_token eq ')' )
&& $mate_index_to_go[$i_last_nonblank_token] < 0
# and user wants brace to left
&& !$rOpts->{'opening-brace-always-on-right'}
&& ( $type eq '{' ) # should be true
&& ( $token eq '{' ) # should be true
)
{
$self->set_forced_breakpoint( $i - 1 );
} ## end if ( $block_type && ( ...))
} ## end if ( $depth > $current_depth)
#------------------------------------------------------------
# Handle Decreasing Depth..
#
# finish off any old list when depth decreases
# token $i is a ')','}', or ']'
#------------------------------------------------------------
elsif ( $depth < $current_depth ) {
check_for_new_minimum_depth($depth);
# force all outer logical containers to break after we see on
# old breakpoint
$has_old_logical_breakpoints[$depth] ||=
$has_old_logical_breakpoints[$current_depth];
# Patch to break between ') {' if the paren list is broken.
# There is similar logic in set_continuation_breaks for
# non-broken lists.
if ( $token eq ')'
&& $next_nonblank_block_type
&& $interrupted_list[$current_depth]
&& $next_nonblank_type eq '{'
&& !$rOpts->{'opening-brace-always-on-right'} )
{
$self->set_forced_breakpoint($i);
} ## end if ( $token eq ')' && ...
#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
# set breaks at commas if necessary
my ( $bp_count, $do_not_break_apart ) =
$self->set_comma_breakpoints($current_depth);
my $i_opening = $opening_structure_index_stack[$current_depth];
my $saw_opening_structure = ( $i_opening >= 0 );
# this term is long if we had to break at interior commas..
my $is_long_term = $bp_count > 0;
# If this is a short container with one or more comma arrows,
# then we will mark it as a long term to open it if requested.
# $rOpts_comma_arrow_breakpoints =
# 0 - open only if comma precedes closing brace
# 1 - stable: except for one line blocks
# 2 - try to form 1 line blocks
# 3 - ignore =>
# 4 - always open up if vt=0
# 5 - stable: even for one line blocks if vt=0
if ( !$is_long_term
&& $saw_opening_structure
&& $is_opening_token{ $tokens_to_go[$i_opening] }
&& $index_before_arrow[ $depth + 1 ] > 0
&& !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
)
{
$is_long_term = $rOpts_comma_arrow_breakpoints == 4
|| ( $rOpts_comma_arrow_breakpoints == 0
&& $last_nonblank_token eq ',' )
|| ( $rOpts_comma_arrow_breakpoints == 5
&& $old_breakpoint_to_go[$i_opening] );
} ## end if ( !$is_long_term &&...)
# mark term as long if the length between opening and closing
# parens exceeds allowed line length
if ( !$is_long_term && $saw_opening_structure ) {
my $i_opening_minus =
$self->find_token_starting_list($i_opening);
# Note: we have to allow for one extra space after a
# closing token so that we do not strand a comma or
# semicolon, hence the '>=' here (oneline.t)
$is_long_term =
$self->excess_line_length( $i_opening_minus, $i ) >= 0;
} ## end if ( !$is_long_term &&...)
# We've set breaks after all comma-arrows. Now we have to
# undo them if this can be a one-line block
# (the only breakpoints set will be due to comma-arrows)
if (
# user doesn't require breaking after all comma-arrows
( $rOpts_comma_arrow_breakpoints != 0 )
&& ( $rOpts_comma_arrow_breakpoints != 4 )
# and if the opening structure is in this batch
&& $saw_opening_structure
# and either on the same old line
&& (
$old_breakpoint_count_stack[$current_depth] ==
$last_old_breakpoint_count
# or user wants to form long blocks with arrows
|| $rOpts_comma_arrow_breakpoints == 2
)
# and we made some breakpoints between the opening and closing
&& ( $breakpoint_undo_stack[$current_depth] <
get_forced_breakpoint_undo_count() )
# and this block is short enough to fit on one line
# Note: use < because need 1 more space for possible comma
&& !$is_long_term
)
{
$self->undo_forced_breakpoint_stack(
$breakpoint_undo_stack[$current_depth] );
} ## end if ( ( $rOpts_comma_arrow_breakpoints...))
# now see if we have any comma breakpoints left
my $has_comma_breakpoints =
( $breakpoint_stack[$current_depth] !=
get_forced_breakpoint_count() );
# update broken-sublist flag of the outer container
$has_broken_sublist[$depth] =
$has_broken_sublist[$depth]
|| $has_broken_sublist[$current_depth]
|| $is_long_term
|| $has_comma_breakpoints;
# Having come to the closing ')', '}', or ']', now we have to decide if we
# should 'open up' the structure by placing breaks at the opening and
# closing containers. This is a tricky decision. Here are some of the
# basic considerations:
#
# -If this is a BLOCK container, then any breakpoints will have already
# been set (and according to user preferences), so we need do nothing here.
#
# -If we have a comma-separated list for which we can align the list items,
# then we need to do so because otherwise the vertical aligner cannot
# currently do the alignment.
#
# -If this container does itself contain a container which has been broken
# open, then it should be broken open to properly show the structure.
#
# -If there is nothing to align, and no other reason to break apart,
# then do not do it.
#
# We will not break open the parens of a long but 'simple' logical expression.
# For example:
#
# This is an example of a simple logical expression and its formatting:
#
# if ( $bigwasteofspace1 && $bigwasteofspace2
# || $bigwasteofspace3 && $bigwasteofspace4 )
#
# Most people would prefer this than the 'spacey' version:
#
# if (
# $bigwasteofspace1 && $bigwasteofspace2
# || $bigwasteofspace3 && $bigwasteofspace4
# )
#
# To illustrate the rules for breaking logical expressions, consider:
#
# FULLY DENSE:
# if ( $opt_excl
# and ( exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc ))
#
# This is on the verge of being difficult to read. The current default is to
# open it up like this:
#
# DEFAULT:
# if (
# $opt_excl
# and ( exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc )
# )
#
# This is a compromise which tries to avoid being too dense and to spacey.
# A more spaced version would be:
#
# SPACEY:
# if (
# $opt_excl
# and (
# exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc
# )
# )
#
# Some people might prefer the spacey version -- an option could be added. The
# innermost expression contains a long block '( exists $ids_... ')'.
#
# Here is how the logic goes: We will force a break at the 'or' that the
# innermost expression contains, but we will not break apart its opening and
# closing containers because (1) it contains no multi-line sub-containers itself,
# and (2) there is no alignment to be gained by breaking it open like this
#
# and (
# exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc
# )
#
# (although this looks perfectly ok and might be good for long expressions). The
# outer 'if' container, though, contains a broken sub-container, so it will be
# broken open to avoid too much density. Also, since it contains no 'or's, there
# will be a forced break at its 'and'.
# set some flags telling something about this container..
my $is_simple_logical_expression = 0;
if ( $item_count_stack[$current_depth] == 0
&& $saw_opening_structure
&& $tokens_to_go[$i_opening] eq '('
&& $is_logical_container{ $container_type[$current_depth] }
)
{
# This seems to be a simple logical expression with
# no existing breakpoints. Set a flag to prevent
# opening it up.
if ( !$has_comma_breakpoints ) {
$is_simple_logical_expression = 1;
}
# This seems to be a simple logical expression with
# breakpoints (broken sublists, for example). Break
# at all 'or's and '||'s.
else {
$self->set_logical_breakpoints($current_depth);
}
} ## end if ( $item_count_stack...)
if ( $is_long_term
&& @{ $rfor_semicolon_list[$current_depth] } )
{
$self->set_for_semicolon_breakpoints($current_depth);
# open up a long 'for' or 'foreach' container to allow
# leading term alignment unless -lp is used.
$has_comma_breakpoints = 1
unless $rOpts_line_up_parentheses;
} ## end if ( $is_long_term && ...)
if (
# breaks for code BLOCKS are handled at a higher level
!$block_type
# we do not need to break at the top level of an 'if'
# type expression
&& !$is_simple_logical_expression
## modification to keep ': (' containers vertically tight;
## but probably better to let user set -vt=1 to avoid
## inconsistency with other paren types
## && ($container_type[$current_depth] ne ':')
# otherwise, we require one of these reasons for breaking:
&& (
# - this term has forced line breaks
$has_comma_breakpoints
# - the opening container is separated from this batch
# for some reason (comment, blank line, code block)
# - this is a non-paren container spanning multiple lines
|| !$saw_opening_structure
# - this is a long block contained in another breakable
# container
|| ( $is_long_term
&& $container_environment_to_go[$i_opening] ne
'BLOCK' )
)
)
{
# For -lp option, we must put a breakpoint before
# the token which has been identified as starting
# this indentation level. This is necessary for
# proper alignment.
if ( $rOpts_line_up_parentheses && $saw_opening_structure )
{
my $item = $leading_spaces_to_go[ $i_opening + 1 ];
if ( $i_opening + 1 < $max_index_to_go
&& $types_to_go[ $i_opening + 1 ] eq 'b' )
{
$item = $leading_spaces_to_go[ $i_opening + 2 ];
}
if ( defined($item) ) {
my $i_start_2;
my $K_start_2 = $item->get_starting_index_K();
if ( defined($K_start_2) ) {
$i_start_2 = $K_start_2 - $K_to_go[0];
}
if (
defined($i_start_2)
# we are breaking after an opening brace, paren,
# so don't break before it too
&& $i_start_2 ne $i_opening
&& $i_start_2 >= 0
&& $i_start_2 <= $max_index_to_go
)
{
# Only break for breakpoints at the same
# indentation level as the opening paren
my $test1 = $nesting_depth_to_go[$i_opening];
my $test2 = $nesting_depth_to_go[$i_start_2];
if ( $test2 == $test1 ) {
$self->set_forced_breakpoint(
$i_start_2 - 1 );
}
} ## end if ( defined($i_start_2...))
} ## end if ( defined($item) )
} ## end if ( $rOpts_line_up_parentheses...)
# break after opening structure.
# note: break before closing structure will be automatic
if ( $minimum_depth <= $current_depth ) {
$self->set_forced_breakpoint($i_opening)
unless ( $do_not_break_apart
|| is_unbreakable_container($current_depth) );
# break at ',' of lower depth level before opening token
if ( $last_comma_index[$depth] ) {
$self->set_forced_breakpoint(
$last_comma_index[$depth] );
}
# break at '.' of lower depth level before opening token
if ( $last_dot_index[$depth] ) {
$self->set_forced_breakpoint(
$last_dot_index[$depth] );
}
# break before opening structure if preceded by another
# closing structure and a comma. This is normally
# done by the previous closing brace, but not
# if it was a one-line block.
if ( $i_opening > 2 ) {
my $i_prev =
( $types_to_go[ $i_opening - 1 ] eq 'b' )
? $i_opening - 2
: $i_opening - 1;
if (
$types_to_go[$i_prev] eq ','
&& ( $types_to_go[ $i_prev - 1 ] eq ')'
|| $types_to_go[ $i_prev - 1 ] eq '}' )
)
{
$self->set_forced_breakpoint($i_prev);
}
# also break before something like ':(' or '?('
# if appropriate.
elsif (
$types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
{
my $token_prev = $tokens_to_go[$i_prev];
if ( $want_break_before{$token_prev} ) {
$self->set_forced_breakpoint($i_prev);
}
} ## end elsif ( $types_to_go[$i_prev...])
} ## end if ( $i_opening > 2 )
} ## end if ( $minimum_depth <=...)
# break after comma following closing structure
if ( $next_type eq ',' ) {
$self->set_forced_breakpoint( $i + 1 );
}
# break before an '=' following closing structure
if (
$is_assignment{$next_nonblank_type}
&& ( $breakpoint_stack[$current_depth] !=
get_forced_breakpoint_count() )
)
{
$self->set_forced_breakpoint($i);
} ## end if ( $is_assignment{$next_nonblank_type...})
# break at any comma before the opening structure Added
# for -lp, but seems to be good in general. It isn't
# obvious how far back to look; the '5' below seems to
# work well and will catch the comma in something like
# push @list, myfunc( $param, $param, ..
my $icomma = $last_comma_index[$depth];
if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
unless ( $forced_breakpoint_to_go[$icomma] ) {
$self->set_forced_breakpoint($icomma);
}
}
} # end logic to open up a container
# Break open a logical container open if it was already open
elsif ($is_simple_logical_expression
&& $has_old_logical_breakpoints[$current_depth] )
{
$self->set_logical_breakpoints($current_depth);
}
# Handle long container which does not get opened up
elsif ($is_long_term) {
# must set fake breakpoint to alert outer containers that
# they are complex
set_fake_breakpoint();
} ## end elsif ($is_long_term)
} ## end elsif ( $depth < $current_depth)
#------------------------------------------------------------
# Handle this token
#------------------------------------------------------------
$current_depth = $depth;
# most token types can skip the rest of this loop
next unless ( $quick_filter{$type} );
# handle comma-arrow
if ( $type eq '=>' ) {
next if ( $last_nonblank_type eq '=>' );
next if $rOpts_break_at_old_comma_breakpoints;
next if $rOpts_comma_arrow_breakpoints == 3;
$want_comma_break[$depth] = 1;
$index_before_arrow[$depth] = $i_last_nonblank_token;
next;
} ## end if ( $type eq '=>' )
elsif ( $type eq '.' ) {
$last_dot_index[$depth] = $i;
}
# Turn off alignment if we are sure that this is not a list
# environment. To be safe, we will do this if we see certain
# non-list tokens, such as ';', and also the environment is
# not a list. Note that '=' could be in any of the = operators
# (lextest.t). We can't just use the reported environment
# because it can be incorrect in some cases.
elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
&& $container_environment_to_go[$i] ne 'LIST' )
{
$dont_align[$depth] = 1;
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
} ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
# now just handle any commas
next unless ( $type eq ',' );
$last_dot_index[$depth] = undef;
$last_comma_index[$depth] = $i;
# break here if this comma follows a '=>'
# but not if there is a side comment after the comma
if ( $want_comma_break[$depth] ) {
if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
if ($rOpts_comma_arrow_breakpoints) {
$want_comma_break[$depth] = 0;
next;
}
}
$self->set_forced_breakpoint($i)
unless ( $next_nonblank_type eq '#' );
# break before the previous token if it looks safe
# Example of something that we will not try to break before:
# DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
# Also we don't want to break at a binary operator (like +):
# $c->createOval(
# $x + $R, $y +
# $R => $x - $R,
# $y - $R, -fill => 'black',
# );
my $ibreak = $index_before_arrow[$depth] - 1;
if ( $ibreak > 0
&& $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
{
if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
# don't break pointer calls, such as the following:
# File::Spec->curdir => 1,
# (This is tokenized as adjacent 'w' tokens)
##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
# And don't break before a comma, as in the following:
# ( LONGER_THAN,=> 1,
# EIGHTY_CHARACTERS,=> 2,
# CAUSES_FORMATTING,=> 3,
# LIKE_THIS,=> 4,
# );
# This example is for -tso but should be general rule
if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
&& $tokens_to_go[ $ibreak + 1 ] ne ',' )
{
$self->set_forced_breakpoint($ibreak);
}
} ## end if ( $types_to_go[$ibreak...])
} ## end if ( $ibreak > 0 && $tokens_to_go...)
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
# handle list which mixes '=>'s and ','s:
# treat any list items so far as an interrupted list
$interrupted_list[$depth] = 1;
next;
} ## end if ( $want_comma_break...)
# break after all commas above starting depth
if ( $depth < $starting_depth && !$dont_align[$depth] ) {
$self->set_forced_breakpoint($i)
unless ( $next_nonblank_type eq '#' );
next;
}
# add this comma to the list..
my $item_count = $item_count_stack[$depth];
if ( $item_count == 0 ) {
# but do not form a list with no opening structure
# for example:
# open INFILE_COPY, ">$input_file_copy"
# or die ("very long message");
if ( ( $opening_structure_index_stack[$depth] < 0 )
&& $container_environment_to_go[$i] eq 'BLOCK' )
{
$dont_align[$depth] = 1;
}
} ## end if ( $item_count == 0 )
$comma_index[$depth][$item_count] = $i;
++$item_count_stack[$depth];
if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
$identifier_count_stack[$depth]++;
}
} ## end while ( ++$i <= $max_index_to_go)
#-------------------------------------------
# end of loop over all tokens in this batch
#-------------------------------------------
# set breaks for any unfinished lists ..
for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
$interrupted_list[$dd] = 1;
$has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
$self->set_comma_breakpoints($dd);
$self->set_logical_breakpoints($dd)
if ( $has_old_logical_breakpoints[$dd] );
$self->set_for_semicolon_breakpoints($dd);
# break open container...
my $i_opening = $opening_structure_index_stack[$dd];
$self->set_forced_breakpoint($i_opening)
unless (
is_unbreakable_container($dd)
# Avoid a break which would place an isolated ' or "
# on a line
|| ( $type eq 'Q'
&& $i_opening >= $max_index_to_go - 2
&& ( $token eq "'" || $token eq '"' ) )
);
} ## end for ( my $dd = $current_depth...)
# Return a flag indicating if the input file had some good breakpoints.
# This flag will be used to force a break in a line shorter than the
# allowed line length.
if ( $has_old_logical_breakpoints[$current_depth] ) {
$saw_good_breakpoint = 1;
}
# A complex line with one break at an = has a good breakpoint.
# This is not complex ($total_depth_variation=0):
# $res1
# = 10;
#
# This is complex ($total_depth_variation=6):
# $res2 =
# (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
elsif ($i_old_assignment_break
&& $total_depth_variation > 4
&& $old_breakpoint_count == 1 )
{
$saw_good_breakpoint = 1;
} ## end elsif ( $i_old_assignment_break...)
return $saw_good_breakpoint;
} ## end sub scan_list
} ## end closure scan_list
sub find_token_starting_list {
# When testing to see if a block will fit on one line, some
# previous token(s) may also need to be on the line; particularly
# if this is a sub call. So we will look back at least one
# token. NOTE: This isn't perfect, but not critical, because
# if we mis-identify a block, it will be wrapped and therefore
# fixed the next time it is formatted.
my ( $self, $i_opening_paren ) = @_;
my $i_opening_minus = $i_opening_paren;
my $im1 = $i_opening_paren - 1;
my $im2 = $i_opening_paren - 2;
my $typem1 = $im1 >= 0 ? $types_to_go[$im1] : 'b';
my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
$i_opening_minus = $i_opening_paren;
}
elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
$i_opening_minus = $im1 if $im1 >= 0;
# walk back to improve length estimate
for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
$i_opening_minus = $j;
}
if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
}
elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
$i_opening_minus = $im2;
}
return $i_opening_minus;
}
{ ## begin closure set_comma_breakpoints_do
my %is_keyword_with_special_leading_term;
BEGIN {
# These keywords have prototypes which allow a special leading item
# followed by a list
my @q =
qw(formline grep kill map printf sprintf push chmod join pack unshift);
@is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
}
use constant DEBUG_SPARSE => 0;
sub set_comma_breakpoints_do {
# Given a list with some commas, set breakpoints at some of the
# commas, if necessary, to make it easy to read.
my ( $self, $rinput_hash ) = @_;
my $depth = $rinput_hash->{depth};
my $i_opening_paren = $rinput_hash->{i_opening_paren};
my $i_closing_paren = $rinput_hash->{i_closing_paren};
my $item_count = $rinput_hash->{item_count};
my $identifier_count = $rinput_hash->{identifier_count};
my $rcomma_index = $rinput_hash->{rcomma_index};
my $next_nonblank_type = $rinput_hash->{next_nonblank_type};
my $list_type = $rinput_hash->{list_type};
my $interrupted = $rinput_hash->{interrupted};
my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
my $must_break_open = $rinput_hash->{must_break_open};
my $has_broken_sublist = $rinput_hash->{has_broken_sublist};
# nothing to do if no commas seen
return if ( $item_count < 1 );
my $i_first_comma = $rcomma_index->[0];
my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
my $i_last_comma = $i_true_last_comma;
if ( $i_last_comma >= $max_index_to_go ) {
$i_last_comma = $rcomma_index->[ --$item_count - 1 ];
return if ( $item_count < 1 );
}
#---------------------------------------------------------------
# find lengths of all items in the list to calculate page layout
#---------------------------------------------------------------
my $comma_count = $item_count;
my @item_lengths;
my @i_term_begin;
my @i_term_end;
my @i_term_comma;
my $i_prev_plus;
my @max_length = ( 0, 0 );
my $first_term_length;
my $i = $i_opening_paren;
my $is_odd = 1;
foreach my $j ( 0 .. $comma_count - 1 ) {
$is_odd = 1 - $is_odd;
$i_prev_plus = $i + 1;
$i = $rcomma_index->[$j];
my $i_term_end =
( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
my $i_term_begin =
( $types_to_go[$i_prev_plus] eq 'b' )
? $i_prev_plus + 1
: $i_prev_plus;
push @i_term_begin, $i_term_begin;
push @i_term_end, $i_term_end;
push @i_term_comma, $i;
# note: currently adding 2 to all lengths (for comma and space)
my $length =
2 + token_sequence_length( $i_term_begin, $i_term_end );
push @item_lengths, $length;
if ( $j == 0 ) {
$first_term_length = $length;
}
else {
if ( $length > $max_length[$is_odd] ) {
$max_length[$is_odd] = $length;
}
}
}
# now we have to make a distinction between the comma count and item
# count, because the item count will be one greater than the comma
# count if the last item is not terminated with a comma
my $i_b =
( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
? $i_last_comma + 1
: $i_last_comma;
my $i_e =
( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
? $i_closing_paren - 2
: $i_closing_paren - 1;
my $i_effective_last_comma = $i_last_comma;
my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
if ( $last_item_length > 0 ) {
# add 2 to length because other lengths include a comma and a blank
$last_item_length += 2;
push @item_lengths, $last_item_length;
push @i_term_begin, $i_b + 1;
push @i_term_end, $i_e;
push @i_term_comma, undef;
my $i_odd = $item_count % 2;
if ( $last_item_length > $max_length[$i_odd] ) {
$max_length[$i_odd] = $last_item_length;
}
$item_count++;
$i_effective_last_comma = $i_e + 1;
if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
$identifier_count++;
}
}
#---------------------------------------------------------------
# End of length calculations
#---------------------------------------------------------------
#---------------------------------------------------------------
# Compound List Rule 1:
# Break at (almost) every comma for a list containing a broken
# sublist. This has higher priority than the Interrupted List
# Rule.
#---------------------------------------------------------------
if ($has_broken_sublist) {
# Break at every comma except for a comma between two
# simple, small terms. This prevents long vertical
# columns of, say, just 0's.
my $small_length = 10; # 2 + actual maximum length wanted
# We'll insert a break in long runs of small terms to
# allow alignment in uniform tables.
my $skipped_count = 0;
my $columns = table_columns_available($i_first_comma);
my $fields = int( $columns / $small_length );
if ( $rOpts_maximum_fields_per_table
&& $fields > $rOpts_maximum_fields_per_table )
{
$fields = $rOpts_maximum_fields_per_table;
}
my $max_skipped_count = $fields - 1;
my $is_simple_last_term = 0;
my $is_simple_next_term = 0;
foreach my $j ( 0 .. $item_count ) {
$is_simple_last_term = $is_simple_next_term;
$is_simple_next_term = 0;
if ( $j < $item_count
&& $i_term_end[$j] == $i_term_begin[$j]
&& $item_lengths[$j] <= $small_length )
{
$is_simple_next_term = 1;
}
next if $j == 0;
if ( $is_simple_last_term
&& $is_simple_next_term
&& $skipped_count < $max_skipped_count )
{
$skipped_count++;
}
else {
$skipped_count = 0;
my $i = $i_term_comma[ $j - 1 ];
last unless defined $i;
$self->set_forced_breakpoint($i);
}
}
# always break at the last comma if this list is
# interrupted; we wouldn't want to leave a terminal '{', for
# example.
if ($interrupted) {
$self->set_forced_breakpoint($i_true_last_comma);
}
return;
}
#my ( $a, $b, $c ) = caller();
#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
#---------------------------------------------------------------
# Interrupted List Rule:
# A list is forced to use old breakpoints if it was interrupted
# by side comments or blank lines, or requested by user.
#---------------------------------------------------------------
if ( $rOpts_break_at_old_comma_breakpoints
|| $interrupted
|| $i_opening_paren < 0 )
{
$self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
return;
}
#---------------------------------------------------------------
# Looks like a list of items. We have to look at it and size it up.
#---------------------------------------------------------------
my $opening_token = $tokens_to_go[$i_opening_paren];
my $opening_environment =
$container_environment_to_go[$i_opening_paren];
#-------------------------------------------------------------------
# Return if this will fit on one line
#-------------------------------------------------------------------
my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
return
unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
> 0;
#-------------------------------------------------------------------
# Now we know that this block spans multiple lines; we have to set
# at least one breakpoint -- real or fake -- as a signal to break
# open any outer containers.
#-------------------------------------------------------------------
set_fake_breakpoint();
# be sure we do not extend beyond the current list length
if ( $i_effective_last_comma >= $max_index_to_go ) {
$i_effective_last_comma = $max_index_to_go - 1;
}
# Set a flag indicating if we need to break open to keep -lp
# items aligned. This is necessary if any of the list terms
# exceeds the available space after the '('.
my $need_lp_break_open = $must_break_open;
if ( $rOpts_line_up_parentheses && !$must_break_open ) {
my $columns_if_unbroken =
$maximum_line_length[ $levels_to_go[$i_opening_minus] ] -
total_line_length( $i_opening_minus, $i_opening_paren );
$need_lp_break_open =
( $max_length[0] > $columns_if_unbroken )
|| ( $max_length[1] > $columns_if_unbroken )
|| ( $first_term_length > $columns_if_unbroken );
}
# Specify if the list must have an even number of fields or not.
# It is generally safest to assume an even number, because the
# list items might be a hash list. But if we can be sure that
# it is not a hash, then we can allow an odd number for more
# flexibility.
my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
if ( $identifier_count >= $item_count - 1
|| $is_assignment{$next_nonblank_type}
|| ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
)
{
$odd_or_even = 1;
}
# do we have a long first term which should be
# left on a line by itself?
my $use_separate_first_term = (
$odd_or_even == 1 # only if we can use 1 field/line
&& $item_count > 3 # need several items
&& $first_term_length >
2 * $max_length[0] - 2 # need long first term
&& $first_term_length >
2 * $max_length[1] - 2 # need long first term
);
# or do we know from the type of list that the first term should
# be placed alone?
if ( !$use_separate_first_term ) {
if ( $is_keyword_with_special_leading_term{$list_type} ) {
$use_separate_first_term = 1;
# should the container be broken open?
if ( $item_count < 3 ) {
if ( $i_first_comma - $i_opening_paren < 4 ) {
${$rdo_not_break_apart} = 1;
}
}
elsif ($first_term_length < 20
&& $i_first_comma - $i_opening_paren < 4 )
{
my $columns = table_columns_available($i_first_comma);
if ( $first_term_length < $columns ) {
${$rdo_not_break_apart} = 1;
}
}
}
}
# if so,
if ($use_separate_first_term) {
# ..set a break and update starting values
$use_separate_first_term = 1;
$self->set_forced_breakpoint($i_first_comma);
$i_opening_paren = $i_first_comma;
$i_first_comma = $rcomma_index->[1];
$item_count--;
return if $comma_count == 1;
shift @item_lengths;
shift @i_term_begin;
shift @i_term_end;
shift @i_term_comma;
}
# if not, update the metrics to include the first term
else {
if ( $first_term_length > $max_length[0] ) {
$max_length[0] = $first_term_length;
}
}
# Field width parameters
my $pair_width = ( $max_length[0] + $max_length[1] );
my $max_width =
( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
# Number of free columns across the page width for laying out tables
my $columns = table_columns_available($i_first_comma);
# Estimated maximum number of fields which fit this space
# This will be our first guess
my $number_of_fields_max =
maximum_number_of_fields( $columns, $odd_or_even, $max_width,
$pair_width );
my $number_of_fields = $number_of_fields_max;
# Find the best-looking number of fields
# and make this our second guess if possible
my ( $number_of_fields_best, $ri_ragged_break_list,
$new_identifier_count )
= $self->study_list_complexity( \@i_term_begin, \@i_term_end,
\@item_lengths, $max_width );
if ( $number_of_fields_best != 0
&& $number_of_fields_best < $number_of_fields_max )
{
$number_of_fields = $number_of_fields_best;
}
# ----------------------------------------------------------------------
# If we are crowded and the -lp option is being used, try to
# undo some indentation
# ----------------------------------------------------------------------
if (
$rOpts_line_up_parentheses
&& (
$number_of_fields == 0
|| ( $number_of_fields == 1
&& $number_of_fields != $number_of_fields_best )
)
)
{
my $available_spaces =
$self->get_available_spaces_to_go($i_first_comma);
if ( $available_spaces > 0 ) {
my $spaces_wanted = $max_width - $columns; # for 1 field
if ( $number_of_fields_best == 0 ) {
$number_of_fields_best =
get_maximum_fields_wanted( \@item_lengths );
}
if ( $number_of_fields_best != 1 ) {
my $spaces_wanted_2 =
1 + $pair_width - $columns; # for 2 fields
if ( $available_spaces > $spaces_wanted_2 ) {
$spaces_wanted = $spaces_wanted_2;
}
}
if ( $spaces_wanted > 0 ) {
my $deleted_spaces =
$self->reduce_lp_indentation( $i_first_comma,
$spaces_wanted );
# redo the math
if ( $deleted_spaces > 0 ) {
$columns = table_columns_available($i_first_comma);
$number_of_fields_max =
maximum_number_of_fields( $columns, $odd_or_even,
$max_width, $pair_width );
$number_of_fields = $number_of_fields_max;
if ( $number_of_fields_best == 1
&& $number_of_fields >= 1 )
{
$number_of_fields = $number_of_fields_best;
}
}
}
}
}
# try for one column if two won't work
if ( $number_of_fields <= 0 ) {
$number_of_fields = int( $columns / $max_width );
}
# The user can place an upper bound on the number of fields,
# which can be useful for doing maintenance on tables
if ( $rOpts_maximum_fields_per_table
&& $number_of_fields > $rOpts_maximum_fields_per_table )
{
$number_of_fields = $rOpts_maximum_fields_per_table;
}
# How many columns (characters) and lines would this container take
# if no additional whitespace were added?
my $packed_columns = token_sequence_length( $i_opening_paren + 1,
$i_effective_last_comma + 1 );
if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
my $packed_lines = 1 + int( $packed_columns / $columns );
# are we an item contained in an outer list?
my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
if ( $number_of_fields <= 0 ) {
# #---------------------------------------------------------------
# # We're in trouble. We can't find a single field width that works.
# # There is no simple answer here; we may have a single long list
# # item, or many.
# #---------------------------------------------------------------
#
# In many cases, it may be best to not force a break if there is just one
# comma, because the standard continuation break logic will do a better
# job without it.
#
# In the common case that all but one of the terms can fit
# on a single line, it may look better not to break open the
# containing parens. Consider, for example
#
# $color =
# join ( '/',
# sort { $color_value{$::a} <=> $color_value{$::b}; }
# keys %colors );
#
# which will look like this with the container broken:
#
# $color = join (
# '/',
# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
# );
#
# Here is an example of this rule for a long last term:
#
# log_message( 0, 256, 128,
# "Number of routes in adj-RIB-in to be considered: $peercount" );
#
# And here is an example with a long first term:
#
# $s = sprintf(
# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
# $r, $pu, $ps, $cu, $cs, $tt
# )
# if $style eq 'all';
my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
my $long_last_term =
$self->excess_line_length( 0, $i_last_comma ) <= 0;
my $long_first_term =
$self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
<= 0;
# break at every comma ...
if (
# if requested by user or is best looking
$number_of_fields_best == 1
# or if this is a sublist of a larger list
|| $in_hierarchical_list
# or if multiple commas and we don't have a long first or last
# term
|| ( $comma_count > 1
&& !( $long_last_term || $long_first_term ) )
)
{
foreach ( 0 .. $comma_count - 1 ) {
$self->set_forced_breakpoint( $rcomma_index->[$_] );
}
}
elsif ($long_last_term) {
$self->set_forced_breakpoint($i_last_comma);
${$rdo_not_break_apart} = 1 unless $must_break_open;
}
elsif ($long_first_term) {
$self->set_forced_breakpoint($i_first_comma);
}
else {
# let breaks be defined by default bond strength logic
}
return;
}
# --------------------------------------------------------
# We have a tentative field count that seems to work.
# How many lines will this require?
# --------------------------------------------------------
my $formatted_lines = $item_count / ($number_of_fields);
if ( $formatted_lines != int $formatted_lines ) {
$formatted_lines = 1 + int $formatted_lines;
}
# So far we've been trying to fill out to the right margin. But
# compact tables are easier to read, so let's see if we can use fewer
# fields without increasing the number of lines.
$number_of_fields =
compactify_table( $item_count, $number_of_fields, $formatted_lines,
$odd_or_even );
# How many spaces across the page will we fill?
my $columns_per_line =
( int $number_of_fields / 2 ) * $pair_width +
( $number_of_fields % 2 ) * $max_width;
my $formatted_columns;
if ( $number_of_fields > 1 ) {
$formatted_columns =
( $pair_width * ( int( $item_count / 2 ) ) +
( $item_count % 2 ) * $max_width );
}
else {
$formatted_columns = $max_width * $item_count;
}
if ( $formatted_columns < $packed_columns ) {
$formatted_columns = $packed_columns;
}
my $unused_columns = $formatted_columns - $packed_columns;
# set some empirical parameters to help decide if we should try to
# align; high sparsity does not look good, especially with few lines
my $sparsity = ($unused_columns) / ($formatted_columns);
my $max_allowed_sparsity =
( $item_count < 3 ) ? 0.1
: ( $packed_lines == 1 ) ? 0.15
: ( $packed_lines == 2 ) ? 0.4
: 0.7;
# Begin check for shortcut methods, which avoid treating a list
# as a table for relatively small parenthesized lists. These
# are usually easier to read if not formatted as tables.
if (
$packed_lines <= 2 # probably can fit in 2 lines
&& $item_count < 9 # doesn't have too many items
&& $opening_environment eq 'BLOCK' # not a sub-container
&& $opening_token eq '(' # is paren list
)
{
# Shortcut method 1: for -lp and just one comma:
# This is a no-brainer, just break at the comma.
if (
$rOpts_line_up_parentheses # -lp
&& $item_count == 2 # two items, one comma
&& !$must_break_open
)
{
my $i_break = $rcomma_index->[0];
$self->set_forced_breakpoint($i_break);
${$rdo_not_break_apart} = 1;
return;
}
# method 2 is for most small ragged lists which might look
# best if not displayed as a table.
if (
( $number_of_fields == 2 && $item_count == 3 )
|| (
$new_identifier_count > 0 # isn't all quotes
&& $sparsity > 0.15
) # would be fairly spaced gaps if aligned
)
{
my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
# NOTE: we should really use the true break count here,
# which can be greater if there are large terms and
# little space, but usually this will work well enough.
unless ($must_break_open) {
if ( $break_count <= 1 ) {
${$rdo_not_break_apart} = 1;
}
elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
{
${$rdo_not_break_apart} = 1;
}
}
return;
}
} # end shortcut methods
# debug stuff
DEBUG_SPARSE && do {
print STDOUT
"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
};
#---------------------------------------------------------------
# Compound List Rule 2:
# If this list is too long for one line, and it is an item of a
# larger list, then we must format it, regardless of sparsity
# (ian.t). One reason that we have to do this is to trigger
# Compound List Rule 1, above, which causes breaks at all commas of
# all outer lists. In this way, the structure will be properly
# displayed.
#---------------------------------------------------------------
# Decide if this list is too long for one line unless broken
my $total_columns = table_columns_available($i_opening_paren);
my $too_long = $packed_columns > $total_columns;
# For a paren list, include the length of the token just before the
# '(' because this is likely a sub call, and we would have to
# include the sub name on the same line as the list. This is still
# imprecise, but not too bad. (steve.t)
if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
$too_long = $self->excess_line_length( $i_opening_minus,
$i_effective_last_comma + 1 ) > 0;
}
# FIXME: For an item after a '=>', try to include the length of the
# thing before the '=>'. This is crude and should be improved by
# actually looking back token by token.
if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
my $i_opening_minus = $i_opening_paren - 4;
if ( $i_opening_minus >= 0 ) {
$too_long = $self->excess_line_length( $i_opening_minus,
$i_effective_last_comma + 1 ) > 0;
}
}
# Always break lists contained in '[' and '{' if too long for 1 line,
# and always break lists which are too long and part of a more complex
# structure.
my $must_break_open_container = $must_break_open
|| ( $too_long
&& ( $in_hierarchical_list || $opening_token ne '(' ) );
#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
#---------------------------------------------------------------
# The main decision:
# Now decide if we will align the data into aligned columns. Do not
# attempt to align columns if this is a tiny table or it would be
# too spaced. It seems that the more packed lines we have, the
# sparser the list that can be allowed and still look ok.
#---------------------------------------------------------------
if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
|| ( $formatted_lines < 2 )
|| ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
)
{
#---------------------------------------------------------------
# too sparse: would look ugly if aligned in a table;
#---------------------------------------------------------------
# use old breakpoints if this is a 'big' list
# FIXME: See if this is still necessary. sub sweep_left_to_right
# now fixes a lot of problems.
if ( $packed_lines > 2 && $item_count > 10 ) {
write_logfile_entry("List sparse: using old breakpoints\n");
$self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
}
# let the continuation logic handle it if 2 lines
else {
my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
unless ($must_break_open_container) {
if ( $break_count <= 1 ) {
${$rdo_not_break_apart} = 1;
}
elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
{
${$rdo_not_break_apart} = 1;
}
}
}
return;
}
#---------------------------------------------------------------
# go ahead and format as a table
#---------------------------------------------------------------
write_logfile_entry(
"List: auto formatting with $number_of_fields fields/row\n");
my $j_first_break =
$use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
for (
my $j = $j_first_break ;
$j < $comma_count ;
$j += $number_of_fields
)
{
my $i = $rcomma_index->[$j];
$self->set_forced_breakpoint($i);
}
return;
}
} ## end closure set_comma_breakpoints_do
sub study_list_complexity {
# Look for complex tables which should be formatted with one term per line.
# Returns the following:
#
# \@i_ragged_break_list = list of good breakpoints to avoid lines
# which are hard to read
# $number_of_fields_best = suggested number of fields based on
# complexity; = 0 if any number may be used.
#
my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
my $item_count = @{$ri_term_begin};
my $complex_item_count = 0;
my $number_of_fields_best = $rOpts_maximum_fields_per_table;
my $i_max = @{$ritem_lengths} - 1;
##my @item_complexity;
my $i_last_last_break = -3;
my $i_last_break = -2;
my @i_ragged_break_list;
my $definitely_complex = 30;
my $definitely_simple = 12;
my $quote_count = 0;
for my $i ( 0 .. $i_max ) {
my $ib = $ri_term_begin->[$i];
my $ie = $ri_term_end->[$i];
# define complexity: start with the actual term length
my $weighted_length = ( $ritem_lengths->[$i] - 2 );
##TBD: join types here and check for variations
##my $str=join "", @tokens_to_go[$ib..$ie];
my $is_quote = 0;
if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
$is_quote = 1;
$quote_count++;
}
elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
$quote_count++;
}
if ( $ib eq $ie ) {
if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
$complex_item_count++;
$weighted_length *= 2;
}
else {
}
}
else {
if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
$complex_item_count++;
$weighted_length *= 2;
}
if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
$weighted_length += 4;
}
}
# add weight for extra tokens.
$weighted_length += 2 * ( $ie - $ib );
## my $BUB = join '', @tokens_to_go[$ib..$ie];
## print "# COMPLEXITY:$weighted_length $BUB\n";
##push @item_complexity, $weighted_length;
# now mark a ragged break after this item it if it is 'long and
# complex':
if ( $weighted_length >= $definitely_complex ) {
# if we broke after the previous term
# then break before it too
if ( $i_last_break == $i - 1
&& $i > 1
&& $i_last_last_break != $i - 2 )
{
## FIXME: don't strand a small term
pop @i_ragged_break_list;
push @i_ragged_break_list, $i - 2;
push @i_ragged_break_list, $i - 1;
}
push @i_ragged_break_list, $i;
$i_last_last_break = $i_last_break;
$i_last_break = $i;
}
# don't break before a small last term -- it will
# not look good on a line by itself.
elsif ($i == $i_max
&& $i_last_break == $i - 1
&& $weighted_length <= $definitely_simple )
{
pop @i_ragged_break_list;
}
}
my $identifier_count = $i_max + 1 - $quote_count;
# Need more tuning here..
if ( $max_width > 12
&& $complex_item_count > $item_count / 2
&& $number_of_fields_best != 2 )
{
$number_of_fields_best = 1;
}
return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
}
sub get_maximum_fields_wanted {
# Not all tables look good with more than one field of items.
# This routine looks at a table and decides if it should be
# formatted with just one field or not.
# This coding is still under development.
my ($ritem_lengths) = @_;
my $number_of_fields_best = 0;
# For just a few items, we tentatively assume just 1 field.
my $item_count = @{$ritem_lengths};
if ( $item_count <= 5 ) {
$number_of_fields_best = 1;
}
# For larger tables, look at it both ways and see what looks best
else {
my $is_odd = 1;
my @max_length = ( 0, 0 );
my @last_length_2 = ( undef, undef );
my @first_length_2 = ( undef, undef );
my $last_length = undef;
my $total_variation_1 = 0;
my $total_variation_2 = 0;
my @total_variation_2 = ( 0, 0 );
foreach my $j ( 0 .. $item_count - 1 ) {
$is_odd = 1 - $is_odd;
my $length = $ritem_lengths->[$j];
if ( $length > $max_length[$is_odd] ) {
$max_length[$is_odd] = $length;
}
if ( defined($last_length) ) {
my $dl = abs( $length - $last_length );
$total_variation_1 += $dl;
}
$last_length = $length;
my $ll = $last_length_2[$is_odd];
if ( defined($ll) ) {
my $dl = abs( $length - $ll );
$total_variation_2[$is_odd] += $dl;
}
else {
$first_length_2[$is_odd] = $length;
}
$last_length_2[$is_odd] = $length;
}
$total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
unless ( $total_variation_2 < $factor * $total_variation_1 ) {
$number_of_fields_best = 1;
}
}
return ($number_of_fields_best);
}
sub table_columns_available {
my $i_first_comma = shift;
my $columns =
$maximum_line_length[ $levels_to_go[$i_first_comma] ] -
leading_spaces_to_go($i_first_comma);
# Patch: the vertical formatter does not line up lines whose lengths
# exactly equal the available line length because of allowances
# that must be made for side comments. Therefore, the number of
# available columns is reduced by 1 character.
$columns -= 1;
return $columns;
}
sub maximum_number_of_fields {
# how many fields will fit in the available space?
my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
my $max_pairs = int( $columns / $pair_width );
my $number_of_fields = $max_pairs * 2;
if ( $odd_or_even == 1
&& $max_pairs * $pair_width + $max_width <= $columns )
{
$number_of_fields++;
}
return $number_of_fields;
}
sub compactify_table {
# given a table with a certain number of fields and a certain number
# of lines, see if reducing the number of fields will make it look
# better.
my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
my $min_fields;
for (
$min_fields = $number_of_fields ;
$min_fields >= $odd_or_even
&& $min_fields * $formatted_lines >= $item_count ;
$min_fields -= $odd_or_even
)
{
$number_of_fields = $min_fields;
}
}
return $number_of_fields;
}
sub set_ragged_breakpoints {
# Set breakpoints in a list that cannot be formatted nicely as a
# table.
my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
my $break_count = 0;
foreach ( @{$ri_ragged_break_list} ) {
my $j = $ri_term_comma->[$_];
if ($j) {
$self->set_forced_breakpoint($j);
$break_count++;
}
}
return $break_count;
}
sub copy_old_breakpoints {
my ( $self, $i_first_comma, $i_last_comma ) = @_;
for my $i ( $i_first_comma .. $i_last_comma ) {
if ( $old_breakpoint_to_go[$i] ) {
$self->set_forced_breakpoint($i);
}
}
return;
}
sub set_nobreaks {
my ( $self, $i, $j ) = @_;
if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
0 && do {
my ( $a, $b, $c ) = caller();
my $forced_breakpoint_count = get_forced_breakpoint_count();
print STDOUT
"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
};
@nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
}
# shouldn't happen; non-critical error
else {
0 && do {
my ( $a, $b, $c ) = caller();
print STDOUT
"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
};
}
return;
}
###############################################
# CODE SECTION 12: Code for setting indentation
###############################################
sub token_sequence_length {
# return length of tokens ($ibeg .. $iend) including $ibeg & $iend
# returns 0 if $ibeg > $iend (shouldn't happen)
my ( $ibeg, $iend ) = @_;
return 0 if ( !defined($iend) || $iend < 0 || $ibeg > $iend );
return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
}
sub total_line_length {
# return length of a line of tokens ($ibeg .. $iend)
my ( $ibeg, $iend ) = @_;
# original coding:
#return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
# this is basically sub 'leading_spaces_to_go':
my $indentation = $leading_spaces_to_go[$ibeg];
if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
return $indentation + $summed_lengths_to_go[ $iend + 1 ] -
$summed_lengths_to_go[$ibeg];
}
sub excess_line_length {
# return number of characters by which a line of tokens ($ibeg..$iend)
# exceeds the allowable line length.
# NOTE: Profiling shows that this is a critical routine for efficiency.
# Therefore I have eliminated additional calls to subs from it.
my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
# Original expression for line length
##$length = leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
# This is basically sub 'leading_spaces_to_go':
my $indentation = $leading_spaces_to_go[$ibeg];
if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
my $length =
$indentation +
$summed_lengths_to_go[ $iend + 1 ] -
$summed_lengths_to_go[$ibeg];
# Include right weld lengths unless requested not to.
if ( !$ignore_right_weld
&& $type_sequence_to_go[$iend]
&& $total_weld_count )
{
my $wr = $self->weld_len_right( $type_sequence_to_go[$iend],
$types_to_go[$iend] );
$length += $wr;
}
# return the excess
return $length - $maximum_line_length[ $levels_to_go[$ibeg] ];
}
sub get_spaces {
# return the number of leading spaces associated with an indentation
# variable $indentation is either a constant number of spaces or an object
# with a get_spaces method.
my $indentation = shift;
return ref($indentation) ? $indentation->get_spaces() : $indentation;
}
sub get_recoverable_spaces {
# return the number of spaces (+ means shift right, - means shift left)
# that we would like to shift a group of lines with the same indentation
# to get them to line up with their opening parens
my $indentation = shift;
return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
}
sub get_available_spaces_to_go {
my ( $self, $ii ) = @_;
my $item = $leading_spaces_to_go[$ii];
# return the number of available leading spaces associated with an
# indentation variable. $indentation is either a constant number of
# spaces or an object with a get_available_spaces method.
return ref($item) ? $item->get_available_spaces() : 0;
}
{ ## begin closure set_leading_whitespace (for -lp indentation)
# These routines are called batch-by-batch to handle the -lp indentation
# option. The coding is rather complex, but is only for -lp.
my $gnu_position_predictor;
my $gnu_sequence_number;
my $line_start_index_to_go;
my $max_gnu_item_index;
my $max_gnu_stack_index;
my %gnu_arrow_count;
my %gnu_comma_count;
my %last_gnu_equals;
my @gnu_item_list;
my @gnu_stack;
sub initialize_gnu_vars {
# initialize gnu variables for a new file;
# must be called once at the start of a new file.
# initialize the leading whitespace stack to negative levels
# so that we can never run off the end of the stack
$gnu_position_predictor =
0; # where the current token is predicted to be
$max_gnu_stack_index = 0;
$max_gnu_item_index = -1;
$gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
@gnu_item_list = ();
return;
}
sub initialize_gnu_batch_vars {
# initialize gnu variables for a new batch;
# must be called before each new batch
$gnu_sequence_number++; # increment output batch counter
%last_gnu_equals = ();
%gnu_comma_count = ();
%gnu_arrow_count = ();
$line_start_index_to_go = 0;
$max_gnu_item_index = UNDEFINED_INDEX;
return;
}
sub new_lp_indentation_item {
# this is an interface to the IndentationItem class
my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
# A negative level implies not to store the item in the item_list
my $index = 0;
if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
my $starting_index_K = 0;
if ( defined($line_start_index_to_go)
&& $line_start_index_to_go >= 0
&& $line_start_index_to_go <= $max_index_to_go )
{
$starting_index_K = $K_to_go[$line_start_index_to_go];
}
my $item = Perl::Tidy::IndentationItem->new(
spaces => $spaces,
level => $level,
ci_level => $ci_level,
available_spaces => $available_spaces,
index => $index,
gnu_sequence_number => $gnu_sequence_number,
align_paren => $align_paren,
stack_depth => $max_gnu_stack_index,
starting_index_K => $starting_index_K,
);
if ( $level >= 0 ) {
$gnu_item_list[$max_gnu_item_index] = $item;
}
return $item;
}
sub set_leading_whitespace {
# This routine defines leading whitespace for the case of -lp formatting
# given: the level and continuation_level of a token,
# define: space count of leading string which would apply if it
# were the first token of a new line.
my ( $self, $Kj, $K_last_nonblank, $K_last_last_nonblank,
$level_abs, $ci_level, $in_continued_quote )
= @_;
return unless ($rOpts_line_up_parentheses);
return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
my $rbreak_container = $self->[_rbreak_container_];
my $rshort_nested = $self->[_rshort_nested_];
my $rLL = $self->[_rLL_];
# find needed previous nonblank tokens
my $last_nonblank_token = '';
my $last_nonblank_type = '';
my $last_nonblank_block_type = '';
# and previous nonblank tokens, just in this batch:
my $last_nonblank_token_in_batch = '';
my $last_nonblank_type_in_batch = '';
my $last_last_nonblank_type_in_batch = '';
if ( defined($K_last_nonblank) ) {
$last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
$last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
$last_nonblank_block_type =
$rLL->[$K_last_nonblank]->[_BLOCK_TYPE_];
if ( $K_last_nonblank >= $K_to_go[0] ) {
$last_nonblank_token_in_batch = $last_nonblank_token;
$last_nonblank_type_in_batch = $last_nonblank_type;
if ( defined($K_last_last_nonblank)
&& $K_last_last_nonblank > $K_to_go[0] )
{
$last_last_nonblank_type_in_batch =
$rLL->[$K_last_last_nonblank]->[_TYPE_];
}
}
}
################################################################
# Adjust levels if necessary to recycle whitespace:
my $level = $level_abs;
my $radjusted_levels = $self->[_radjusted_levels_];
my $nK = @{$rLL};
my $nws = @{$radjusted_levels};
if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
$level = $radjusted_levels->[$Kj];
if ( $level < 0 ) { $level = 0 } # note: this should not happen
}
# The continued_quote flag means that this is the first token of a
# line, and it is the continuation of some kind of multi-line quote
# or pattern. It requires special treatment because it must have no
# added leading whitespace. So we create a special indentation item
# which is not in the stack.
if ($in_continued_quote) {
my $space_count = 0;
my $available_space = 0;
$level = -1; # flag to prevent storing in item_list
$leading_spaces_to_go[$max_index_to_go] =
$reduced_spaces_to_go[$max_index_to_go] =
new_lp_indentation_item( $space_count, $level, $ci_level,
$available_space, 0 );
return;
}
# get the top state from the stack
my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
my $type = $types_to_go[$max_index_to_go];
my $token = $tokens_to_go[$max_index_to_go];
my $total_depth = $nesting_depth_to_go[$max_index_to_go];
if ( $type eq '{' || $type eq '(' ) {
$gnu_comma_count{ $total_depth + 1 } = 0;
$gnu_arrow_count{ $total_depth + 1 } = 0;
# If we come to an opening token after an '=' token of some type,
# see if it would be helpful to 'break' after the '=' to save space
my $last_equals = $last_gnu_equals{$total_depth};
if ( $last_equals && $last_equals > $line_start_index_to_go ) {
# find the position if we break at the '='
my $i_test = $last_equals;
if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
# TESTING
##my $too_close = ($i_test==$max_index_to_go-1);
my $test_position =
total_line_length( $i_test, $max_index_to_go );
my $mll = $maximum_line_length[ $levels_to_go[$i_test] ];
if (
# the equals is not just before an open paren (testing)
##!$too_close &&
# if we are beyond the midpoint
$gnu_position_predictor >
$mll - $rOpts_maximum_line_length / 2
# or we are beyond the 1/4 point and there was an old
# break at the equals
|| (
$gnu_position_predictor >
$mll - $rOpts_maximum_line_length * 3 / 4
&& (
$old_breakpoint_to_go[$last_equals]
|| ( $last_equals > 0
&& $old_breakpoint_to_go[ $last_equals - 1 ] )
|| ( $last_equals > 1
&& $types_to_go[ $last_equals - 1 ] eq 'b'
&& $old_breakpoint_to_go[ $last_equals - 2 ] )
)
)
)
{
# then make the switch -- note that we do not set a real
# breakpoint here because we may not really need one; sub
# scan_list will do that if necessary
$line_start_index_to_go = $i_test + 1;
$gnu_position_predictor = $test_position;
}
}
}
my $halfway =
$maximum_line_length[$level] - $rOpts_maximum_line_length / 2;
# Check for decreasing depth ..
# Note that one token may have both decreasing and then increasing
# depth. For example, (level, ci) can go from (1,1) to (2,0). So,
# in this example we would first go back to (1,0) then up to (2,0)
# in a single call.
if ( $level < $current_level || $ci_level < $current_ci_level ) {
# loop to find the first entry at or completely below this level
my ( $lev, $ci_lev );
while (1) {
if ($max_gnu_stack_index) {
# save index of token which closes this level
$gnu_stack[$max_gnu_stack_index]
->set_closed($max_index_to_go);
# Undo any extra indentation if we saw no commas
my $available_spaces =
$gnu_stack[$max_gnu_stack_index]->get_available_spaces();
my $comma_count = 0;
my $arrow_count = 0;
if ( $type eq '}' || $type eq ')' ) {
$comma_count = $gnu_comma_count{$total_depth};
$arrow_count = $gnu_arrow_count{$total_depth};
$comma_count = 0 unless $comma_count;
$arrow_count = 0 unless $arrow_count;
}
$gnu_stack[$max_gnu_stack_index]
->set_comma_count($comma_count);
$gnu_stack[$max_gnu_stack_index]
->set_arrow_count($arrow_count);
if ( $available_spaces > 0 ) {
if ( $comma_count <= 0 || $arrow_count > 0 ) {
my $i =
$gnu_stack[$max_gnu_stack_index]->get_index();
my $seqno =
$gnu_stack[$max_gnu_stack_index]
->get_sequence_number();
# Be sure this item was created in this batch. This
# should be true because we delete any available
# space from open items at the end of each batch.
if ( $gnu_sequence_number != $seqno
|| $i > $max_gnu_item_index )
{
warning(
"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
);
report_definite_bug();
}
else {
if ( $arrow_count == 0 ) {
$gnu_item_list[$i]
->permanently_decrease_available_spaces(
$available_spaces);
}
else {
$gnu_item_list[$i]
->tentatively_decrease_available_spaces(
$available_spaces);
}
foreach my $j ( $i + 1 .. $max_gnu_item_index )
{
$gnu_item_list[$j]
->decrease_SPACES($available_spaces);
}
}
}
}
# go down one level
--$max_gnu_stack_index;
$lev = $gnu_stack[$max_gnu_stack_index]->get_level();
$ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
# stop when we reach a level at or below the current level
if ( $lev <= $level && $ci_lev <= $ci_level ) {
$space_count =
$gnu_stack[$max_gnu_stack_index]->get_spaces();
$current_level = $lev;
$current_ci_level = $ci_lev;
last;
}
}
# reached bottom of stack .. should never happen because
# only negative levels can get here, and $level was forced
# to be positive above.
else {
warning(
"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
);
report_definite_bug();
last;
}
}
}
# handle increasing depth
if ( $level > $current_level || $ci_level > $current_ci_level ) {
# Compute the standard incremental whitespace. This will be
# the minimum incremental whitespace that will be used. This
# choice results in a smooth transition between the gnu-style
# and the standard style.
my $standard_increment =
( $level - $current_level ) *
$rOpts_indent_columns +
( $ci_level - $current_ci_level ) *
$rOpts_continuation_indentation;
# Now we have to define how much extra incremental space
# ("$available_space") we want. This extra space will be
# reduced as necessary when long lines are encountered or when
# it becomes clear that we do not have a good list.
my $available_space = 0;
my $align_paren = 0;
my $excess = 0;
my $last_nonblank_seqno;
if ( defined($K_last_nonblank) ) {
$last_nonblank_seqno =
$rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
}
# initialization on empty stack..
if ( $max_gnu_stack_index == 0 ) {
$space_count = $level * $rOpts_indent_columns;
}
# if this is a BLOCK, add the standard increment
elsif ($last_nonblank_block_type) {
$space_count += $standard_increment;
}
# if last nonblank token was not structural indentation,
# just use standard increment
elsif ( $last_nonblank_type ne '{' ) {
$space_count += $standard_increment;
}
# if this container holds a qw, add the standard increment
elsif ($last_nonblank_seqno
&& $self->[_rcontains_multiline_qw_by_seqno_]
->{$last_nonblank_seqno} )
{
$space_count += $standard_increment;
}
# otherwise use the space to the first non-blank level change token
else {
$space_count = $gnu_position_predictor;
my $min_gnu_indentation =
$gnu_stack[$max_gnu_stack_index]->get_spaces();
$available_space = $space_count - $min_gnu_indentation;
if ( $available_space >= $standard_increment ) {
$min_gnu_indentation += $standard_increment;
}
elsif ( $available_space > 1 ) {
$min_gnu_indentation += $available_space + 1;
}
elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
$min_gnu_indentation += 2;
}
else {
$min_gnu_indentation += 1;
}
}
else {
$min_gnu_indentation += $standard_increment;
}
$available_space = $space_count - $min_gnu_indentation;
if ( $available_space < 0 ) {
$space_count = $min_gnu_indentation;
$available_space = 0;
}
$align_paren = 1;
}
# update state, but not on a blank token
if ( $types_to_go[$max_index_to_go] ne 'b' ) {
$gnu_stack[$max_gnu_stack_index]->set_have_child(1);
++$max_gnu_stack_index;
$gnu_stack[$max_gnu_stack_index] =
new_lp_indentation_item( $space_count, $level, $ci_level,
$available_space, $align_paren );
# If the opening paren is beyond the half-line length, then
# we will use the minimum (standard) indentation. This will
# help avoid problems associated with running out of space
# near the end of a line. As a result, in deeply nested
# lists, there will be some indentations which are limited
# to this minimum standard indentation. But the most deeply
# nested container will still probably be able to shift its
# parameters to the right for proper alignment, so in most
# cases this will not be noticeable.
if ( $available_space > 0 && $space_count > $halfway ) {
$gnu_stack[$max_gnu_stack_index]
->tentatively_decrease_available_spaces($available_space);
}
}
}
# Count commas and look for non-list characters. Once we see a
# non-list character, we give up and don't look for any more commas.
if ( $type eq '=>' ) {
$gnu_arrow_count{$total_depth}++;
# tentatively treating '=>' like '=' for estimating breaks
# TODO: this could use some experimentation
$last_gnu_equals{$total_depth} = $max_index_to_go;
}
elsif ( $type eq ',' ) {
$gnu_comma_count{$total_depth}++;
}
elsif ( $is_assignment{$type} ) {
$last_gnu_equals{$total_depth} = $max_index_to_go;
}
# this token might start a new line
# if this is a non-blank..
if ( $type ne 'b' ) {
# and if ..
if (
# this is the first nonblank token of the line
$max_index_to_go == 1 && $types_to_go[0] eq 'b'
# or previous character was one of these:
|| $last_nonblank_type_in_batch =~ /^([\:\?\,f])$/
# or previous character was opening and this does not close it
|| ( $last_nonblank_type_in_batch eq '{' && $type ne '}' )
|| ( $last_nonblank_type_in_batch eq '(' and $type ne ')' )
# or this token is one of these:
|| $type =~ /^([\.]|\|\||\&\&)$/
# or this is a closing structure
|| ( $last_nonblank_type_in_batch eq '}'
&& $last_nonblank_token_in_batch eq
$last_nonblank_type_in_batch )
# or previous token was keyword 'return'
|| (
$last_nonblank_type_in_batch eq 'k'
&& ( $last_nonblank_token_in_batch eq 'return'
&& $type ne '{' )
)
# or starting a new line at certain keywords is fine
|| ( $type eq 'k'
&& $is_if_unless_and_or_last_next_redo_return{$token} )
# or this is after an assignment after a closing structure
|| (
$is_assignment{$last_nonblank_type_in_batch}
&& (
$last_last_nonblank_type_in_batch =~ /^[\}\)\]]$/
# and it is significantly to the right
|| $gnu_position_predictor > $halfway
)
)
)
{
check_for_long_gnu_style_lines($max_index_to_go);
$line_start_index_to_go = $max_index_to_go;
# back up 1 token if we want to break before that type
# otherwise, we may strand tokens like '?' or ':' on a line
if ( $line_start_index_to_go > 0 ) {
if ( $last_nonblank_type_in_batch eq 'k' ) {
if ( $want_break_before{$last_nonblank_token_in_batch} )
{
$line_start_index_to_go--;
}
}
elsif ( $want_break_before{$last_nonblank_type_in_batch} ) {
$line_start_index_to_go--;
}
}
}
}
# remember the predicted position of this token on the output line
if ( $max_index_to_go > $line_start_index_to_go ) {
$gnu_position_predictor =
total_line_length( $line_start_index_to_go, $max_index_to_go );
}
else {
$gnu_position_predictor =
$space_count + $token_lengths_to_go[$max_index_to_go];
}
# store the indentation object for this token
# this allows us to manipulate the leading whitespace
# (in case we have to reduce indentation to fit a line) without
# having to change any token values
$leading_spaces_to_go[$max_index_to_go] =
$gnu_stack[$max_gnu_stack_index];
$reduced_spaces_to_go[$max_index_to_go] =
( $max_gnu_stack_index > 0 && $ci_level )
? $gnu_stack[ $max_gnu_stack_index - 1 ]
: $gnu_stack[$max_gnu_stack_index];
return;
}
sub check_for_long_gnu_style_lines {
# look at the current estimated maximum line length, and
# remove some whitespace if it exceeds the desired maximum
my ($mx_index_to_go) = @_;
# this is only for the '-lp' style
return unless ($rOpts_line_up_parentheses);
# nothing can be done if no stack items defined for this line
return if ( $max_gnu_item_index == UNDEFINED_INDEX );
# see if we have exceeded the maximum desired line length
# keep 2 extra free because they are needed in some cases
# (result of trial-and-error testing)
my $spaces_needed =
$gnu_position_predictor -
$maximum_line_length[ $levels_to_go[$mx_index_to_go] ] + 2;
return if ( $spaces_needed <= 0 );
# We are over the limit, so try to remove a requested number of
# spaces from leading whitespace. We are only allowed to remove
# from whitespace items created on this batch, since others have
# already been used and cannot be undone.
my @candidates = ();
my $i;
# loop over all whitespace items created for the current batch
for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
my $item = $gnu_item_list[$i];
# item must still be open to be a candidate (otherwise it
# cannot influence the current token)
next if ( $item->get_closed() >= 0 );
my $available_spaces = $item->get_available_spaces();
if ( $available_spaces > 0 ) {
push( @candidates, [ $i, $available_spaces ] );
}
}
return unless (@candidates);
# sort by available whitespace so that we can remove whitespace
# from the maximum available first
@candidates = sort { $b->[1] <=> $a->[1] } @candidates;
# keep removing whitespace until we are done or have no more
foreach my $candidate (@candidates) {
my ( $i, $available_spaces ) = @{$candidate};
my $deleted_spaces =
( $available_spaces > $spaces_needed )
? $spaces_needed
: $available_spaces;
# remove the incremental space from this item
$gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
my $i_debug = $i;
# update the leading whitespace of this item and all items
# that came after it
for ( ; $i <= $max_gnu_item_index ; $i++ ) {
my $old_spaces = $gnu_item_list[$i]->get_spaces();
if ( $old_spaces >= $deleted_spaces ) {
$gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
}
# shouldn't happen except for code bug:
else {
my $level = $gnu_item_list[$i_debug]->get_level();
my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
my $old_level = $gnu_item_list[$i]->get_level();
my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
warning(
"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
);
report_definite_bug();
}
}
$gnu_position_predictor -= $deleted_spaces;
$spaces_needed -= $deleted_spaces;
last unless ( $spaces_needed > 0 );
}
return;
}
sub finish_lp_batch {
# This routine is called once after each output stream batch is
# finished to undo indentation for all incomplete -lp
# indentation levels. It is too risky to leave a level open,
# because then we can't backtrack in case of a long line to follow.
# This means that comments and blank lines will disrupt this
# indentation style. But the vertical aligner may be able to
# get the space back if there are side comments.
# this is only for the 'lp' style
return unless ($rOpts_line_up_parentheses);
# nothing can be done if no stack items defined for this line
return if ( $max_gnu_item_index == UNDEFINED_INDEX );
# loop over all whitespace items created for the current batch
foreach my $i ( 0 .. $max_gnu_item_index ) {
my $item = $gnu_item_list[$i];
# only look for open items
next if ( $item->get_closed() >= 0 );
# Tentatively remove all of the available space
# (The vertical aligner will try to get it back later)
my $available_spaces = $item->get_available_spaces();
if ( $available_spaces > 0 ) {
# delete incremental space for this item
$gnu_item_list[$i]
->tentatively_decrease_available_spaces($available_spaces);
# Reduce the total indentation space of any nodes that follow
# Note that any such nodes must necessarily be dependents
# of this node.
foreach ( $i + 1 .. $max_gnu_item_index ) {
$gnu_item_list[$_]->decrease_SPACES($available_spaces);
}
}
}
return;
}
} ## end closure set_leading_whitespace
sub reduce_lp_indentation {
# reduce the leading whitespace at token $i if possible by $spaces_needed
# (a large value of $spaces_needed will remove all excess space)
# NOTE: to be called from scan_list only for a sequence of tokens
# contained between opening and closing parens/braces/brackets
my ( $self, $i, $spaces_wanted ) = @_;
my $deleted_spaces = 0;
my $item = $leading_spaces_to_go[$i];
my $available_spaces = $item->get_available_spaces();
if (
$available_spaces > 0
&& ( ( $spaces_wanted <= $available_spaces )
|| !$item->get_have_child() )
)
{
# we'll remove these spaces, but mark them as recoverable
$deleted_spaces =
$item->tentatively_decrease_available_spaces($spaces_wanted);
}
return $deleted_spaces;
}
###########################################################
# CODE SECTION 13: Preparing batches for vertical alignment
###########################################################
sub send_lines_to_vertical_aligner {
my ($self) = @_;
# This routine receives a batch of code for which the final line breaks
# have been defined. Here we prepare the lines for passing to the vertical
# aligner. We do the following tasks:
# - mark certain vertical alignment tokens, such as '=', in each line
# - make minor indentation adjustments
# - do logical padding: insert extra blank spaces to help display certain
# logical constructions
my $this_batch = $self->[_this_batch_];
my $rlines_K = $this_batch->[_rlines_K_];
if ( !@{$rlines_K} ) {
# This can't happen because sub grind_batch_of_CODE always receives
# tokens which it turns into one or more lines. If we get here it means
# that a programming error has caused those lines to be lost.
Fault("Unexpected call with no lines");
return;
}
my $n_last_line = @{$rlines_K} - 1;
my $do_not_pad = $this_batch->[_do_not_pad_];
my $peak_batch_size = $this_batch->[_peak_batch_size_];
my $starting_in_quote = $this_batch->[_starting_in_quote_];
my $ending_in_quote = $this_batch->[_ending_in_quote_];
my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
my $ibeg0 = $this_batch->[_ibeg0_];
my $rK_to_go = $this_batch->[_rK_to_go_];
my $batch_count = $this_batch->[_batch_count_];
my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_];
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] };
my $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
my $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
# Construct indexes to the global_to_go arrays so that called routines can
# still access those arrays. This might eventually be removed
# when all called routines have been converted to access token values
# in the rLL array instead.
my $Kbeg0 = $Kbeg_next;
my ( $ri_first, $ri_last );
foreach my $rline ( @{$rlines_K} ) {
my ( $Kbeg, $Kend ) = @{$rline};
my $ibeg = $ibeg0 + $Kbeg - $Kbeg0;
my $iend = $ibeg0 + $Kend - $Kbeg0;
push @{$ri_first}, $ibeg;
push @{$ri_last}, $iend;
}
my ( $cscw_block_comment, $closing_side_comment );
if ( $rOpts->{'closing-side-comments'} ) {
( $closing_side_comment, $cscw_block_comment ) =
$self->add_closing_side_comment();
}
my $rindentation_list = [0]; # ref to indentations for each line
# define the array @{$ralignment_type_to_go} for the output tokens
# which will be non-blank for each special token (such as =>)
# for which alignment is required.
my $ralignment_type_to_go =
$self->set_vertical_alignment_markers( $ri_first, $ri_last );
# flush before a long if statement to avoid unwanted alignment
if ( $n_last_line > 0
&& $type_beg_next eq 'k'
&& $token_beg_next =~ /^(if|unless)$/ )
{
$self->flush_vertical_aligner();
}
$self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci );
$self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
$starting_in_quote )
if ( $rOpts->{'logical-padding'} );
# Resum lengths. We need accurate lengths for making alignment patterns,
# and we may have unmasked a semicolon which was not included at the start.
for ( 0 .. $max_index_to_go ) {
$summed_lengths_to_go[ $_ + 1 ] =
$summed_lengths_to_go[$_] + $token_lengths_to_go[$_];
}
# loop to prepare each line for shipment
my ( $Kbeg, $type_beg, $token_beg );
my ( $Kend, $type_end );
for my $n ( 0 .. $n_last_line ) {
my $ibeg = $ri_first->[$n];
my $iend = $ri_last->[$n];
my $rline = $rlines_K->[$n];
my $forced_breakpoint = $rline->[2];
# we may need to look at variables on three consecutive lines ...
# Some vars on line [n-1], if any:
my $Kbeg_last = $Kbeg;
my $type_beg_last = $type_beg;
my $token_beg_last = $token_beg;
my $Kend_last = $Kend;
my $type_end_last = $type_end;
# Some vars on line [n]:
$Kbeg = $Kbeg_next;
$type_beg = $type_beg_next;
$token_beg = $token_beg_next;
$Kend = $Kend_next;
$type_end = $type_end_next;
# Only forward ending K values of non-comments down the pipeline.
# This is equivalent to checking that the last CODE_type is blank or
# equal to 'VER'. See also sub resync_lines_and_tokens for related
# coding. Note that '$batch_CODE_type' is the code type of the line
# to which the ending token belongs.
my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
my $Kend_code =
$batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
# We use two slightly different definitions of level jump at the end
# of line:
# $ljump is the level jump needed by 'sub set_adjusted_indentation'
# $level_jump is the level jump needed by the vertical aligner.
my $ljump = 0; # level jump at end of line
# Get some vars on line [n+1], if any:
if ( $n < $n_last_line ) {
( $Kbeg_next, $Kend_next ) =
@{ $rlines_K->[ $n + 1 ] };
$type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
$token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
$type_end_next = $rLL->[$Kend_next]->[_TYPE_];
$ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
}
# level jump at end of line for the vertical aligner:
my $level_jump =
$Kend >= $Klimit
? 0
: $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_];
$self->delete_needless_alignments( $ibeg, $iend,
$ralignment_type_to_go );
my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
$self->make_alignment_patterns( $ibeg, $iend,
$ralignment_type_to_go );
my ( $indentation, $lev, $level_end, $terminal_type,
$terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
= $self->set_adjusted_indentation( $ibeg, $iend, $rfields,
$rpatterns, $ri_first, $ri_last,
$rindentation_list, $ljump, $starting_in_quote,
$is_static_block_comment, );
# we will allow outdenting of long lines..
my $outdent_long_lines = (
# which are long quotes, if allowed
( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} )
# which are long block comments, if allowed
|| (
$type_beg eq '#'
&& $rOpts->{'outdent-long-comments'}
# but not if this is a static block comment
&& !$is_static_block_comment
)
);
my $break_alignment_before = $is_outdented_line || $do_not_pad;
my $break_alignment_after = $is_outdented_line;
# flush at an 'if' which follows a line with (1) terminal semicolon
# or (2) terminal block_type which is not an 'if'. This prevents
# unwanted alignment between the lines.
if ( $type_beg eq 'k' && $token_beg eq 'if' ) {
my $Km = $self->K_previous_code($Kbeg);
my $type_m = 'b';
my $block_type_m = 'b';
if ( defined($Km) ) {
$type_m = $rLL->[$Km]->[_TYPE_];
$block_type_m = $rLL->[$Km]->[_BLOCK_TYPE_];
}
# break after anything that is not if-like
$break_alignment_before ||= $type_m eq ';'
|| ( $type_m eq '}'
&& $block_type_m ne 'if'
&& $block_type_m ne 'unless'
&& $block_type_m ne 'elsif'
&& $block_type_m ne 'else' );
}
my $rvertical_tightness_flags =
$self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
$ri_first, $ri_last, $ending_in_quote, $closing_side_comment );
# Set a flag at the final ':' of a ternary chain to request
# vertical alignment of the final term. Here is a
# slightly complex example:
#
# $self->{_text} = (
# !$section ? ''
# : $type eq 'item' ? "the $section entry"
# : "the section on $section"
# )
# . (
# $page
# ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
# : ' elsewhere in this document'
# );
#
my $is_terminal_ternary = 0;
if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
my $last_leading_type = $n > 0 ? $type_beg_last : ':';
if ( $terminal_type ne ';'
&& $n_last_line > $n
&& $level_end == $lev )
{
$level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
$terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
}
if (
$last_leading_type eq ':'
&& ( ( $terminal_type eq ';' && $level_end <= $lev )
|| ( $terminal_type ne ':' && $level_end < $lev ) )
)
{
# the terminal term must not contain any ternary terms, as in
# my $ECHO = (
# $Is_MSWin32 ? ".\\echo$$"
# : $Is_MacOS ? ":echo$$"
# : ( $Is_NetWare ? "echo$$" : "./echo$$" )
# );
$is_terminal_ternary = 1;
my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
while ( defined($KP) && $KP <= $Kend ) {
my $type_KP = $rLL->[$KP]->[_TYPE_];
if ( $type_KP eq '?' || $type_KP eq ':' ) {
$is_terminal_ternary = 0;
last;
}
$KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
}
}
}
my $level_adj = $lev;
my $radjusted_levels = $self->[_radjusted_levels_];
if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
$level_adj = $radjusted_levels->[$Kbeg];
if ( $level_adj < 0 ) { $level_adj = 0 }
}
# add any new closing side comment to the last line
if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
$rfields->[-1] .= " $closing_side_comment";
# NOTE: Patch for csc. We can just use 1 for the length of the csc
# because its length should not be a limiting factor from here on.
$rfield_lengths->[-1] += 2;
}
# Set flag which tells if this line is contained in a multi-line list
my $list_seqno = $self->is_list_by_K($Kbeg);
# send this new line down the pipe
my $rvalign_hash = {};
$rvalign_hash->{level} = $lev;
$rvalign_hash->{level_end} = $level_end;
$rvalign_hash->{level_adj} = $level_adj;
$rvalign_hash->{indentation} = $indentation;
$rvalign_hash->{list_seqno} = $list_seqno;
$rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
$rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
$rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated;
$rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
$rvalign_hash->{level_jump} = $level_jump;
$rvalign_hash->{rfields} = $rfields;
$rvalign_hash->{rpatterns} = $rpatterns;
$rvalign_hash->{rtokens} = $rtokens;
$rvalign_hash->{rfield_lengths} = $rfield_lengths;
$rvalign_hash->{terminal_block_type} = $terminal_block_type;
$rvalign_hash->{batch_count} = $batch_count;
$rvalign_hash->{break_alignment_before} = $break_alignment_before;
$rvalign_hash->{break_alignment_after} = $break_alignment_after;
$rvalign_hash->{Kend} = $Kend_code;
$rvalign_hash->{ci_level} = $ci_levels_to_go[$ibeg];
my $vao = $self->[_vertical_aligner_object_];
$vao->valign_input($rvalign_hash);
$do_not_pad = 0;
# Set flag indicating if this line ends in an opening
# token and is very short, so that a blank line is not
# needed if the subsequent line is a comment.
# Examples of what we are looking for:
# {
# && (
# BEGIN {
# default {
# sub {
$self->[_last_output_short_opening_token_]
# line ends in opening token
# /^[\{\(\[L]$/
= $is_opening_type{$type_end}
# and either
&& (
# line has either single opening token
$Kend == $Kbeg
# or is a single token followed by opening token.
# Note that sub identifiers have blanks like 'sub doit'
# $token_beg !~ /\s+/
|| ( $Kend - $Kbeg <= 2 && index( $token_beg, ' ' ) < 0 )
)
# and limit total to 10 character widths
&& token_sequence_length( $ibeg, $iend ) <= 10;
} # end of loop to output each line
# remember indentation of lines containing opening containers for
# later use by sub set_adjusted_indentation
$self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
# output any new -cscw block comment
if ($cscw_block_comment) {
$self->flush_vertical_aligner();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_code_line( $cscw_block_comment . "\n" );
}
return;
}
{ ## begin closure set_vertical_alignment_markers
my %is_vertical_alignment_type;
my %is_not_vertical_alignment_token;
my %is_vertical_alignment_keyword;
my %is_terminal_alignment_type;
my %is_low_level_alignment_token;
BEGIN {
my @q;
# Replaced =~ and // in the list. // had been removed in RT 119588
@q = qw#
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
{ ? : => && || ~~ !~~ =~ !~ // <=> ->
#;
@is_vertical_alignment_type{@q} = (1) x scalar(@q);
# These 'tokens' are not aligned. We need this to remove [
# from the above list because it has type ='{'
@q = qw([);
@is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
# these are the only types aligned at a line end
@q = qw(&& || =>);
@is_terminal_alignment_type{@q} = (1) x scalar(@q);
# these tokens only align at line level
@q = ( '{', '(' );
@is_low_level_alignment_token{@q} = (1) x scalar(@q);
# eq and ne were removed from this list to improve alignment chances
@q = qw(if unless and or err for foreach while until);
@is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
}
sub set_vertical_alignment_markers {
# This routine takes the first step toward vertical alignment of the
# lines of output text. It looks for certain tokens which can serve as
# vertical alignment markers (such as an '=').
#
# Method: We look at each token $i in this output batch and set
# $ralignment_type_to_go->[$i] equal to those tokens at which we would
# accept vertical alignment.
my ( $self, $ri_first, $ri_last ) = @_;
my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
my $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
my $ralignment_type_to_go;
# Initialize the alignment array. Note that closing side comments can
# insert up to 2 additional tokens beyond the original
# $max_index_to_go, so we need to check ri_last for the last index.
my $max_line = @{$ri_first} - 1;
my $iend = $ri_last->[$max_line];
if ( $iend < $max_index_to_go ) { $iend = $max_index_to_go }
# nothing to do if we aren't allowed to change whitespace
# or there is only 1 token
if ( $iend == 0 || !$rOpts_add_whitespace ) {
for my $i ( 0 .. $iend ) {
$ralignment_type_to_go->[$i] = '';
}
return $ralignment_type_to_go;
}
# remember the index of last nonblank token before any sidecomment
my $i_terminal = $max_index_to_go;
if ( $types_to_go[$i_terminal] eq '#' ) {
if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
if ( $i_terminal > 0 ) { --$i_terminal }
}
}
# look at each line of this batch..
my $last_vertical_alignment_before_index;
my $vert_last_nonblank_type;
my $vert_last_nonblank_token;
my $vert_last_nonblank_block_type;
foreach my $line ( 0 .. $max_line ) {
my $ibeg = $ri_first->[$line];
my $iend = $ri_last->[$line];
$last_vertical_alignment_before_index = -1;
$vert_last_nonblank_type = '';
$vert_last_nonblank_token = '';
$vert_last_nonblank_block_type = '';
# look at each token in this output line..
my $level_beg = $levels_to_go[$ibeg];
foreach my $i ( $ibeg .. $iend ) {
my $alignment_type = '';
my $type = $types_to_go[$i];
my $block_type = $block_type_to_go[$i];
my $token = $tokens_to_go[$i];
# do not align tokens at lower level then start of line
# except for side comments
if ( $levels_to_go[$i] < $levels_to_go[$ibeg]
&& $type ne '#' )
{
$ralignment_type_to_go->[$i] = '';
next;
}
#--------------------------------------------------------
# First see if we want to align BEFORE this token
#--------------------------------------------------------
# The first possible token that we can align before
# is index 2 because: 1) it doesn't normally make sense to
# align before the first token and 2) the second
# token must be a blank if we are to align before
# the third
if ( $i < $ibeg + 2 ) { }
# must follow a blank token
elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
# align a side comment --
elsif ( $type eq '#' ) {
my $KK = $K_to_go[$i];
my $sc_type = $rspecial_side_comment_type->{$KK};
unless (
# it is any specially marked side comment
$sc_type
# or it is a static side comment
|| ( $rOpts->{'static-side-comments'}
&& $token =~ /$static_side_comment_pattern/ )
# or a closing side comment
|| ( $vert_last_nonblank_block_type
&& $token =~
/$closing_side_comment_prefix_pattern/ )
)
{
$alignment_type = $type;
} ## Example of a static side comment
}
# otherwise, do not align two in a row to create a
# blank field
elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
# align before one of these keywords
# (within a line, since $i>1)
elsif ( $type eq 'k' ) {
# /^(if|unless|and|or|eq|ne)$/
if ( $is_vertical_alignment_keyword{$token} ) {
$alignment_type = $token;
}
}
# align before one of these types..
# Note: add '.' after new vertical aligner is operational
elsif ( $is_vertical_alignment_type{$type}
&& !$is_not_vertical_alignment_token{$token} )
{
$alignment_type = $token;
# Do not align a terminal token. Although it might
# occasionally look ok to do this, this has been found to be
# a good general rule. The main problems are:
# (1) that the terminal token (such as an = or :) might get
# moved far to the right where it is hard to see because
# nothing follows it, and
# (2) doing so may prevent other good alignments.
# Current exceptions are && and || and =>
if ( $i == $iend || $i >= $i_terminal ) {
$alignment_type = ""
unless ( $is_terminal_alignment_type{$type} );
}
# Do not align leading ': (' or '. ('. This would prevent
# alignment in something like the following:
# $extra_space .=
# ( $input_line_number < 10 ) ? " "
# : ( $input_line_number < 100 ) ? " "
# : "";
# or
# $code =
# ( $case_matters ? $accessor : " lc($accessor) " )
# . ( $yesno ? " eq " : " ne " )
# Also, do not align a ( following a leading ? so we can
# align something like this:
# $converter{$_}->{ushortok} =
# $PDL::IO::Pic::biggrays
# ? ( m/GIF/ ? 0 : 1 )
# : ( m/GIF|RAST|IFF/ ? 0 : 1 );
if (
$i == $ibeg + 2
&& $types_to_go[ $i - 1 ] eq 'b'
&& ( $types_to_go[$ibeg] eq '.'
|| $types_to_go[$ibeg] eq ':'
|| $types_to_go[$ibeg] eq '?' )
)
{
$alignment_type = "";
}
# Certain tokens only align at the same level as the
# initial line level
if ( $is_low_level_alignment_token{$token}
&& $levels_to_go[$i] != $level_beg )
{
$alignment_type = "";
}
# For a paren after keyword, only align something like this:
# if ( $a ) { &a }
# elsif ( $b ) { &b }
if ( $token eq '(' ) {
if ( $vert_last_nonblank_type eq 'k' ) {
$alignment_type = ""
unless $vert_last_nonblank_token =~
/^(if|unless|elsif)$/;
}
}
# be sure the alignment tokens are unique
# This didn't work well: reason not determined
# if ($token ne $type) {$alignment_type .= $type}
}
# NOTE: This is deactivated because it causes the previous
# if/elsif alignment to fail
#elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
#{ $alignment_type = $type; }
if ($alignment_type) {
$last_vertical_alignment_before_index = $i;
}
#--------------------------------------------------------
# Next see if we want to align AFTER the previous nonblank
#--------------------------------------------------------
# We want to line up ',' and interior ';' tokens, with the added
# space AFTER these tokens. (Note: interior ';' is included
# because it may occur in short blocks).
if (
# we haven't already set it
!$alignment_type
# and its not the first token of the line
&& ( $i > $ibeg )
# and it follows a blank
&& $types_to_go[ $i - 1 ] eq 'b'
# and previous token IS one of these:
&& ( $vert_last_nonblank_type eq ','
|| $vert_last_nonblank_type eq ';' )
# and it's NOT one of these
&& ( $type ne 'b'
&& $type ne '#'
&& !$is_closing_token{$type} )
# then go ahead and align
)
{
$alignment_type = $vert_last_nonblank_type;
}
#--------------------------------------------------------
# Undo alignment in special cases
#--------------------------------------------------------
if ($alignment_type) {
# do not align the opening brace of an anonymous sub
if ( $token eq '{' && $block_type =~ /$ASUB_PATTERN/ ) {
$alignment_type = "";
}
}
#--------------------------------------------------------
# then store the value
#--------------------------------------------------------
$ralignment_type_to_go->[$i] = $alignment_type;
if ( $type ne 'b' ) {
$vert_last_nonblank_type = $type;
$vert_last_nonblank_token = $token;
$vert_last_nonblank_block_type = $block_type;
}
}
}
return $ralignment_type_to_go;
}
} ## end closure set_vertical_alignment_markers
sub get_seqno {
# get opening and closing sequence numbers of a token for the vertical
# aligner. Assign qw quotes a value to allow qw opening and closing tokens
# to be treated somewhat like opening and closing tokens for stacking
# tokens by the vertical aligner.
my ( $self, $ii, $ending_in_quote ) = @_;
my $rLL = $self->[_rLL_];
my $this_batch = $self->[_this_batch_];
my $rK_to_go = $this_batch->[_rK_to_go_];
my $KK = $rK_to_go->[$ii];
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
my $SEQ_QW = -1;
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $ii > 0 ) {
$seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
}
else {
if ( !$ending_in_quote ) {
$seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
}
}
}
return ($seqno);
}
{
my %undo_extended_ci;
sub initialize_undo_ci {
%undo_extended_ci = ();
return;
}
sub undo_ci {
# Undo continuation indentation in certain sequences
my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
my ( $line_1, $line_2, $lev_last );
my $this_line_is_semicolon_terminated;
my $max_line = @{$ri_first} - 1;
my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
# Prepare a list of controlling indexes for each line if required.
# This is used for efficient processing below. Note: this is
# critical for speed. In the initial implementation I just looped
# through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
# found that this routine was causing a huge run time in large lists.
# On a very large list test case, this new coding dropped the run time
# of this routine from 30 seconds to 169 milliseconds.
my @i_controlling_ci;
if ( @{$rix_seqno_controlling_ci} ) {
my @tmp = reverse @{$rix_seqno_controlling_ci};
my $ix_next = pop @tmp;
foreach my $line ( 0 .. $max_line ) {
my $iend = $ri_last->[$line];
while ( defined($ix_next) && $ix_next <= $iend ) {
push @{ $i_controlling_ci[$line] }, $ix_next;
$ix_next = pop @tmp;
}
}
}
# Loop over all lines of the batch ...
foreach my $line ( 0 .. $max_line ) {
####################################
# SECTION 1: Undo needless common CI
####################################
# We are looking at leading tokens and looking for a sequence all
# at the same level and all at a higher level than enclosing lines.
# For example, we can undo continuation indentation in sort/map/grep
# chains
# my $dat1 = pack( "n*",
# map { $_, $lookup->{$_} }
# sort { $a <=> $b }
# grep { $lookup->{$_} ne $default } keys %$lookup );
# to become
# my $dat1 = pack( "n*",
# map { $_, $lookup->{$_} }
# sort { $a <=> $b }
# grep { $lookup->{$_} ne $default } keys %$lookup );
my $ibeg = $ri_first->[$line];
my $iend = $ri_last->[$line];
my $lev = $levels_to_go[$ibeg];
if ( $line > 0 ) {
# if we have started a chain..
if ($line_1) {
# see if it continues..
if ( $lev == $lev_last ) {
if ( $types_to_go[$ibeg] eq 'k'
&& $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
{
# chain continues...
# check for chain ending at end of a statement
if ( $line == $max_line ) {
# see of this line ends a statement
$this_line_is_semicolon_terminated =
$types_to_go[$iend] eq ';'
# with possible side comment
|| ( $types_to_go[$iend] eq '#'
&& $iend - $ibeg >= 2
&& $types_to_go[ $iend - 2 ] eq ';'
&& $types_to_go[ $iend - 1 ] eq 'b' );
}
$line_2 = $line
if ($this_line_is_semicolon_terminated);
}
else {
# kill chain
$line_1 = undef;
}
}
elsif ( $lev < $lev_last ) {
# chain ends with previous line
$line_2 = $line - 1;
}
elsif ( $lev > $lev_last ) {
# kill chain
$line_1 = undef;
}
# undo the continuation indentation if a chain ends
if ( defined($line_2) && defined($line_1) ) {
my $continuation_line_count = $line_2 - $line_1 + 1;
@ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
= (0) x ($continuation_line_count)
if ( $continuation_line_count >= 0 );
@leading_spaces_to_go[ @{$ri_first}
[ $line_1 .. $line_2 ] ] =
@reduced_spaces_to_go[ @{$ri_first}
[ $line_1 .. $line_2 ] ];
$line_1 = undef;
}
}
# not in a chain yet..
else {
# look for start of a new sort/map/grep chain
if ( $lev > $lev_last ) {
if ( $types_to_go[$ibeg] eq 'k'
&& $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
{
$line_1 = $line;
}
}
}
}
######################################
# SECTION 2: Undo ci at cuddled blocks
######################################
# Note that sub set_adjusted_indentation will be called later to
# actually do this, but for now we will tentatively mark cuddled
# lines with ci=0 so that the the -xci loop which follows will be
# correct at cuddles.
if (
$types_to_go[$ibeg] eq '}'
&& ( $nesting_depth_to_go[$iend] + 1 ==
$nesting_depth_to_go[$ibeg] )
)
{
my $terminal_type = $types_to_go[$iend];
if ( $terminal_type eq '#' && $iend > $ibeg ) {
$terminal_type = $types_to_go[ $iend - 1 ];
if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
$terminal_type = $types_to_go[ $iend - 2 ];
}
}
if ( $terminal_type eq '{' ) {
my $Kbeg = $K_to_go[$ibeg];
$ci_levels_to_go[$ibeg] = 0;
}
}
#########################################################
# SECTION 3: Undo ci set by sub extended_ci if not needed
#########################################################
# Undo the ci of the leading token if its controlling token
# went out on a previous line without ci
if ( $ci_levels_to_go[$ibeg] ) {
my $Kbeg = $K_to_go[$ibeg];
my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
if ( $seqno && $undo_extended_ci{$seqno} ) {
# but do not undo ci set by the -lp flag
if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
$ci_levels_to_go[$ibeg] = 0;
$leading_spaces_to_go[$ibeg] =
$reduced_spaces_to_go[$ibeg];
}
}
}
# Flag any controlling opening tokens in lines without ci. This
# will be used later in the above if statement to undo the ci which
# they added. The array i_controlling_ci[$line] was prepared at
# the top of this routine.
if ( !$ci_levels_to_go[$ibeg]
&& defined( $i_controlling_ci[$line] ) )
{
foreach my $i ( @{ $i_controlling_ci[$line] } ) {
my $seqno = $type_sequence_to_go[$i];
$undo_extended_ci{$seqno} = 1;
}
}
$lev_last = $lev;
}
return;
}
}
{ ## begin closure set_logical_padding
my %is_math_op;
BEGIN {
my @q = qw( + - * / );
@is_math_op{@q} = (1) x scalar(@q);
}
sub set_logical_padding {
# Look at a batch of lines and see if extra padding can improve the
# alignment when there are certain leading operators. Here is an
# example, in which some extra space is introduced before
# '( $year' to make it line up with the subsequent lines:
#
# if ( ( $Year < 1601 )
# || ( $Year > 2899 )
# || ( $EndYear < 1601 )
# || ( $EndYear > 2899 ) )
# {
# &Error_OutOfRange;
# }
#
my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
= @_;
my $max_line = @{$ri_first} - 1;
my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
$tok_next, $type_next, $has_leading_op_next, $has_leading_op );
# looking at each line of this batch..
foreach my $line ( 0 .. $max_line - 1 ) {
# see if the next line begins with a logical operator
$ibeg = $ri_first->[$line];
$iend = $ri_last->[$line];
$ibeg_next = $ri_first->[ $line + 1 ];
$tok_next = $tokens_to_go[$ibeg_next];
$type_next = $types_to_go[$ibeg_next];
$has_leading_op_next = ( $tok_next =~ /^\w/ )
? $is_chain_operator{$tok_next} # + - * / : ? && ||
: $is_chain_operator{$type_next}; # and, or
next unless ($has_leading_op_next);
# next line must not be at lesser depth
next
if ( $nesting_depth_to_go[$ibeg] >
$nesting_depth_to_go[$ibeg_next] );
# identify the token in this line to be padded on the left
$ipad = undef;
# handle lines at same depth...
if ( $nesting_depth_to_go[$ibeg] ==
$nesting_depth_to_go[$ibeg_next] )
{
# if this is not first line of the batch ...
if ( $line > 0 ) {
# and we have leading operator..
next if $has_leading_op;
# Introduce padding if..
# 1. the previous line is at lesser depth, or
# 2. the previous line ends in an assignment
# 3. the previous line ends in a 'return'
# 4. the previous line ends in a comma
# Example 1: previous line at lesser depth
# if ( ( $Year < 1601 ) # <- we are here but
# || ( $Year > 2899 ) # list has not yet
# || ( $EndYear < 1601 ) # collapsed vertically
# || ( $EndYear > 2899 ) )
# {
#
# Example 2: previous line ending in assignment:
# $leapyear =
# $year % 4 ? 0 # <- We are here
# : $year % 100 ? 1
# : $year % 400 ? 0
# : 1;
#
# Example 3: previous line ending in comma:
# push @expr,
# /test/ ? undef
# : eval($_) ? 1
# : eval($_) ? 1
# : 0;
# be sure levels agree (do not indent after an indented 'if')
next
if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
# allow padding on first line after a comma but only if:
# (1) this is line 2 and
# (2) there are at more than three lines and
# (3) lines 3 and 4 have the same leading operator
# These rules try to prevent padding within a long
# comma-separated list.
my $ok_comma;
if ( $types_to_go[$iendm] eq ','
&& $line == 1
&& $max_line > 2 )
{
my $ibeg_next_next = $ri_first->[ $line + 2 ];
my $tok_next_next = $tokens_to_go[$ibeg_next_next];
$ok_comma = $tok_next_next eq $tok_next;
}
next
unless (
$is_assignment{ $types_to_go[$iendm] }
|| $ok_comma
|| ( $nesting_depth_to_go[$ibegm] <
$nesting_depth_to_go[$ibeg] )
|| ( $types_to_go[$iendm] eq 'k'
&& $tokens_to_go[$iendm] eq 'return' )
);
# we will add padding before the first token
$ipad = $ibeg;
}
# for first line of the batch..
else {
# WARNING: Never indent if first line is starting in a
# continued quote, which would change the quote.
next if $starting_in_quote;
# if this is text after closing '}'
# then look for an interior token to pad
if ( $types_to_go[$ibeg] eq '}' ) {
}
# otherwise, we might pad if it looks really good
else {
# we might pad token $ibeg, so be sure that it
# is at the same depth as the next line.
next
if ( $nesting_depth_to_go[$ibeg] !=
$nesting_depth_to_go[$ibeg_next] );
# We can pad on line 1 of a statement if at least 3
# lines will be aligned. Otherwise, it
# can look very confusing.
# We have to be careful not to pad if there are too few
# lines. The current rule is:
# (1) in general we require at least 3 consecutive lines
# with the same leading chain operator token,
# (2) but an exception is that we only require two lines
# with leading colons if there are no more lines. For example,
# the first $i in the following snippet would get padding
# by the second rule:
#
# $i == 1 ? ( "First", "Color" )
# : $i == 2 ? ( "Then", "Rarity" )
# : ( "Then", "Name" );
if ( $max_line > 1 ) {
my $leading_token = $tokens_to_go[$ibeg_next];
my $tokens_differ;
# never indent line 1 of a '.' series because
# previous line is most likely at same level.
# TODO: we should also look at the leasing_spaces
# of the last output line and skip if it is same
# as this line.
next if ( $leading_token eq '.' );
my $count = 1;
foreach my $l ( 2 .. 3 ) {
last if ( $line + $l > $max_line );
my $ibeg_next_next = $ri_first->[ $line + $l ];
if ( $tokens_to_go[$ibeg_next_next] ne
$leading_token )
{
$tokens_differ = 1;
last;
}
$count++;
}
next if ($tokens_differ);
next if ( $count < 3 && $leading_token ne ':' );
$ipad = $ibeg;
}
else {
next;
}
}
}
}
# find interior token to pad if necessary
if ( !defined($ipad) ) {
for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
# find any unclosed container
next
unless ( $type_sequence_to_go[$i]
&& $mate_index_to_go[$i] > $iend );
# find next nonblank token to pad
$ipad = $inext_to_go[$i];
last if ( $ipad > $iend );
}
last unless $ipad;
}
# We cannot pad the first leading token of a file because
# it could cause a bug in which the starting indentation
# level is guessed incorrectly each time the code is run
# though perltidy, thus causing the code to march off to
# the right. For example, the following snippet would have
# this problem:
## ov_method mycan( $package, '(""' ), $package
## or ov_method mycan( $package, '(0+' ), $package
## or ov_method mycan( $package, '(bool' ), $package
## or ov_method mycan( $package, '(nomethod' ), $package;
# If this snippet is within a block this won't happen
# unless the user just processes the snippet alone within
# an editor. In that case either the user will see and
# fix the problem or it will be corrected next time the
# entire file is processed with perltidy.
next if ( $ipad == 0 && $peak_batch_size <= 1 );
## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
## IT DID MORE HARM THAN GOOD
## ceil(
## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
## / $upem
## ),
##? # do not put leading padding for just 2 lines of math
##? if ( $ipad == $ibeg
##? && $line > 0
##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
##? && $is_math_op{$type_next}
##? && $line + 2 <= $max_line )
##? {
##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
##? my $type_next_next = $types_to_go[$ibeg_next_next];
##? next if !$is_math_op{$type_next_next};
##? }
# next line must not be at greater depth
my $iend_next = $ri_last->[ $line + 1 ];
next
if ( $nesting_depth_to_go[ $iend_next + 1 ] >
$nesting_depth_to_go[$ipad] );
# lines must be somewhat similar to be padded..
my $inext_next = $inext_to_go[$ibeg_next];
my $type = $types_to_go[$ipad];
my $type_next = $types_to_go[ $ipad + 1 ];
# see if there are multiple continuation lines
my $logical_continuation_lines = 1;
if ( $line + 2 <= $max_line ) {
my $leading_token = $tokens_to_go[$ibeg_next];
my $ibeg_next_next = $ri_first->[ $line + 2 ];
if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
&& $nesting_depth_to_go[$ibeg_next] eq
$nesting_depth_to_go[$ibeg_next_next] )
{
$logical_continuation_lines++;
}
}
# see if leading types match
my $types_match = $types_to_go[$inext_next] eq $type;
my $matches_without_bang;
# if first line has leading ! then compare the following token
if ( !$types_match && $type eq '!' ) {
$types_match = $matches_without_bang =
$types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
}
if (
# either we have multiple continuation lines to follow
# and we are not padding the first token
( $logical_continuation_lines > 1 && $ipad > 0 )
# or..
|| (
# types must match
$types_match
# and keywords must match if keyword
&& !(
$type eq 'k'
&& $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
)
)
)
{
#----------------------begin special checks--------------
#
# SPECIAL CHECK 1:
# A check is needed before we can make the pad.
# If we are in a list with some long items, we want each
# item to stand out. So in the following example, the
# first line beginning with '$casefold->' would look good
# padded to align with the next line, but then it
# would be indented more than the last line, so we
# won't do it.
#
# ok(
# $casefold->{code} eq '0041'
# && $casefold->{status} eq 'C'
# && $casefold->{mapping} eq '0061',
# 'casefold 0x41'
# );
#
# Note:
# It would be faster, and almost as good, to use a comma
# count, and not pad if comma_count > 1 and the previous
# line did not end with a comma.
#
my $ok_to_pad = 1;
my $ibg = $ri_first->[ $line + 1 ];
my $depth = $nesting_depth_to_go[ $ibg + 1 ];
# just use simplified formula for leading spaces to avoid
# needless sub calls
my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
# look at each line beyond the next ..
my $l = $line + 1;
foreach my $ltest ( $line + 2 .. $max_line ) {
$l = $ltest;
my $ibg = $ri_first->[$l];
# quit looking at the end of this container
last
if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
|| ( $nesting_depth_to_go[$ibg] < $depth );
# cannot do the pad if a later line would be
# outdented more
if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
$ok_to_pad = 0;
last;
}
}
# don't pad if we end in a broken list
if ( $l == $max_line ) {
my $i2 = $ri_last->[$l];
if ( $types_to_go[$i2] eq '#' ) {
my $i1 = $ri_first->[$l];
next if terminal_type_i( $i1, $i2 ) eq ',';
}
}
# SPECIAL CHECK 2:
# a minus may introduce a quoted variable, and we will
# add the pad only if this line begins with a bare word,
# such as for the word 'Button' here:
# [
# Button => "Print letter \"~$_\"",
# -command => [ sub { print "$_[0]\n" }, $_ ],
# -accelerator => "Meta+$_"
# ];
#
# On the other hand, if 'Button' is quoted, it looks best
# not to pad:
# [
# 'Button' => "Print letter \"~$_\"",
# -command => [ sub { print "$_[0]\n" }, $_ ],
# -accelerator => "Meta+$_"
# ];
if ( $types_to_go[$ibeg_next] eq 'm' ) {
$ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
}
next unless $ok_to_pad;
#----------------------end special check---------------
my $length_1 = total_line_length( $ibeg, $ipad - 1 );
my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
$pad_spaces = $length_2 - $length_1;
# If the first line has a leading ! and the second does
# not, then remove one space to try to align the next
# leading characters, which are often the same. For example:
# if ( !$ts
# || $ts == $self->Holder
# || $self->Holder->Type eq "Arena" )
#
# This usually helps readability, but if there are subsequent
# ! operators things will still get messed up. For example:
#
# if ( !exists $Net::DNS::typesbyname{$qtype}
# && exists $Net::DNS::classesbyname{$qtype}
# && !exists $Net::DNS::classesbyname{$qclass}
# && exists $Net::DNS::typesbyname{$qclass} )
# We can't fix that.
if ($matches_without_bang) { $pad_spaces-- }
# make sure this won't change if -lp is used
my $indentation_1 = $leading_spaces_to_go[$ibeg];
if ( ref($indentation_1) ) {
if ( $indentation_1->get_recoverable_spaces() == 0 ) {
my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
unless ( $indentation_2->get_recoverable_spaces() == 0 )
{
$pad_spaces = 0;
}
}
}
# we might be able to handle a pad of -1 by removing a blank
# token
if ( $pad_spaces < 0 ) {
# Deactivated for -kpit due to conflict. This block deletes
# a space in an attempt to improve alignment in some cases,
# but it may conflict with user spacing requests. For now
# it is just deactivated if the -kpit option is used.
if ( $pad_spaces == -1 ) {
if ( $ipad > $ibeg
&& $types_to_go[ $ipad - 1 ] eq 'b'
&& !%keyword_paren_inner_tightness )
{
$self->pad_token( $ipad - 1, $pad_spaces );
}
}
$pad_spaces = 0;
}
# now apply any padding for alignment
if ( $ipad >= 0 && $pad_spaces ) {
my $length_t = total_line_length( $ibeg, $iend );
if ( $pad_spaces + $length_t <=
$maximum_line_length[ $levels_to_go[$ibeg] ] )
{
$self->pad_token( $ipad, $pad_spaces );
}
}
}
}
continue {
$iendm = $iend;
$ibegm = $ibeg;
$has_leading_op = $has_leading_op_next;
} # end of loop over lines
return;
}
} ## end closure set_logical_padding
sub pad_token {
# insert $pad_spaces before token number $ipad
my ( $self, $ipad, $pad_spaces ) = @_;
my $rLL = $self->[_rLL_];
my $KK = $K_to_go[$ipad];
my $tok = $rLL->[$KK]->[_TOKEN_];
my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
if ( $pad_spaces > 0 ) {
$tok = ' ' x $pad_spaces . $tok;
$tok_len += $pad_spaces;
}
elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
$tok = "";
$tok_len = 0;
}
else {
# shouldn't happen
return;
}
$tok = $rLL->[$KK]->[_TOKEN_] = $tok;
$tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
$token_lengths_to_go[$ipad] += $pad_spaces;
$tokens_to_go[$ipad] = $tok;
foreach my $i ( $ipad .. $max_index_to_go ) {
$summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
}
return;
}
{ ## begin closure make_alignment_patterns
my %block_type_map;
my %keyword_map;
my %operator_map;
my %is_w_n_C;
BEGIN {
# map related block names into a common name to
# allow alignment
%block_type_map = (
'unless' => 'if',
'else' => 'if',
'elsif' => 'if',
'when' => 'if',
'default' => 'if',
'case' => 'if',
'sort' => 'map',
'grep' => 'map',
);
# map certain keywords to the same 'if' class to align
# long if/elsif sequences. [elsif.pl]
%keyword_map = (
'unless' => 'if',
'else' => 'if',
'elsif' => 'if',
'when' => 'given',
'default' => 'given',
'case' => 'switch',
# treat an 'undef' similar to numbers and quotes
'undef' => 'Q',
);
# map certain operators to the same class for pattern matching
%operator_map = (
'!~' => '=~',
'+=' => '+=',
'-=' => '+=',
'*=' => '+=',
'/=' => '+=',
);
%is_w_n_C = (
'w' => 1,
'n' => 1,
'C' => 1,
);
}
sub delete_needless_alignments {
my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
# Remove unwanted alignments. This routine is a place to remove
# alignments which might cause problems at later stages. There are
# currently two types of fixes:
# 1. Remove excess parens
# 2. Remove alignments within 'elsif' conditions
# Patch #1: Excess alignment of parens can prevent other good
# alignments. For example, note the parens in the first two rows of
# the following snippet. They would normally get marked for alignment
# and aligned as follows:
# my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
# my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
# my $img = new Gimp::Image( $w, $h, RGB );
# This causes unnecessary paren alignment and prevents the third equals
# from aligning. If we remove the unwanted alignments we get:
# my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
# my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
# my $img = new Gimp::Image( $w, $h, RGB );
# A rule for doing this which works well is to remove alignment of
# parens whose containers do not contain other aligning tokens, with
# the exception that we always keep alignment of the first opening
# paren on a line (for things like 'if' and 'elsif' statements).
# Setup needed constants
my $i_good_paren = -1;
my $imin_match = $iend + 1;
my $i_elsif_close = $ibeg - 1;
my $i_elsif_open = $iend + 1;
if ( $iend > $ibeg ) {
if ( $types_to_go[$ibeg] eq 'k' ) {
# Paren patch: mark a location of a paren we should keep, such
# as one following something like a leading 'if', 'elsif',..
$i_good_paren = $ibeg + 1;
if ( $types_to_go[$i_good_paren] eq 'b' ) {
$i_good_paren++;
}
# 'elsif' patch: remember the range of the parens of an elsif,
# and do not make alignments within them because this can cause
# loss of padding and overall brace alignment in the vertical
# aligner.
if ( $tokens_to_go[$ibeg] eq 'elsif'
&& $i_good_paren < $iend
&& $tokens_to_go[$i_good_paren] eq '(' )
{
$i_elsif_open = $i_good_paren;
$i_elsif_close = $mate_index_to_go[$i_good_paren];
}
}
}
# Loop to make the fixes on this line
my @imatch_list;
for my $i ( $ibeg .. $iend ) {
if ( $ralignment_type_to_go->[$i] ) {
# Patch #2: undo alignment within elsif parens
if ( $i > $i_elsif_open && $i < $i_elsif_close ) {
$ralignment_type_to_go->[$i] = '';
next;
}
push @imatch_list, $i;
}
if ( $tokens_to_go[$i] eq ')' ) {
# Patch #1: undo the corresponding opening paren if:
# - it is at the top of the stack
# - and not the first overall opening paren
# - does not follow a leading keyword on this line
my $imate = $mate_index_to_go[$i];
if ( @imatch_list
&& $imatch_list[-1] eq $imate
&& ( $ibeg > 1 || @imatch_list > 1 )
&& $imate > $i_good_paren )
{
$ralignment_type_to_go->[$imate] = '';
pop @imatch_list;
}
}
}
return;
}
sub make_alignment_patterns {
# Here we do some important preliminary work for the
# vertical aligner. We create three arrays for one
# output line. These arrays contain strings that can
# be tested by the vertical aligner to see if
# consecutive lines can be aligned vertically.
#
# The three arrays are indexed on the vertical
# alignment fields and are:
# @tokens - a list of any vertical alignment tokens for this line.
# These are tokens, such as '=' '&&' '#' etc which
# we want to might align vertically. These are
# decorated with various information such as
# nesting depth to prevent unwanted vertical
# alignment matches.
# @fields - the actual text of the line between the vertical alignment
# tokens.
# @patterns - a modified list of token types, one for each alignment
# field. These should normally each match before alignment is
# allowed, even when the alignment tokens match.
my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
my @tokens = ();
my @fields = ();
my @patterns = ();
my @field_lengths = ();
my $i_start = $ibeg;
my $depth = 0;
my %container_name = ( 0 => "" );
my $j = 0; # field index
$patterns[0] = "";
my %token_count;
for my $i ( $ibeg .. $iend ) {
# Keep track of containers balanced on this line only.
# These are used below to prevent unwanted cross-line alignments.
# Unbalanced containers already avoid aligning across
# container boundaries.
my $type = $types_to_go[$i];
my $token = $tokens_to_go[$i];
my $depth_last = $depth;
if ( $type_sequence_to_go[$i] ) {
if ( $is_opening_type{$token} ) {
# if container is balanced on this line...
my $i_mate = $mate_index_to_go[$i];
if ( $i_mate > $i && $i_mate <= $iend ) {
$depth++;
# Append the previous token name to make the container name
# more unique. This name will also be given to any commas
# within this container, and it helps avoid undesirable
# alignments of different types of containers.
# Containers beginning with { and [ are given those names
# for uniqueness. That way commas in different containers
# will not match. Here is an example of what this prevents:
# a => [ 1, 2, 3 ],
# b => { b1 => 4, b2 => 5 },
# Here is another example of what we avoid by labeling the
# commas properly:
# is_d( [ $a, $a ], [ $b, $c ] );
# is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
# is_d( [ \$a, \$a ], [ \$b, \$c ] );
my $name = $token;
if ( $token eq '(' ) {
$name = $self->make_paren_name($i);
}
$container_name{$depth} = "+" . $name;
# Make the container name even more unique if necessary.
# If we are not vertically aligning this opening paren,
# append a character count to avoid bad alignment because
# it usually looks bad to align commas within containers
# for which the opening parens do not align. Here
# is an example very BAD alignment of commas (because
# the atan2 functions are not all aligned):
# $XY =
# $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
# $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
# $X * atan2( $X, 1 ) -
# $Y * atan2( $Y, 1 );
#
# On the other hand, it is usually okay to align commas
# if opening parens align, such as:
# glVertex3d( $cx + $s * $xs, $cy, $z );
# glVertex3d( $cx, $cy + $s * $ys, $z );
# glVertex3d( $cx - $s * $xs, $cy, $z );
# glVertex3d( $cx, $cy - $s * $ys, $z );
#
# To distinguish between these situations, we will append
# the length of the line from the previous matching
# token, or beginning of line, to the function name.
# This will allow the vertical aligner to reject
# undesirable matches.
# if we are not aligning on this paren...
if ( !$ralignment_type_to_go->[$i] ) {
# Sum length from previous alignment
my $len = token_sequence_length( $i_start, $i - 1 );
# Minor patch: do not include the length of any '!'.
# Otherwise, commas in the following line will not
# match
# ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
# ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
if ( grep { $_ eq '!' }
@types_to_go[ $i_start .. $i - 1 ] )
{
$len -= 1;
}
if ( $i_start == $ibeg ) {
# For first token, use distance from start of line
# but subtract off the indentation due to level.
# Otherwise, results could vary with indentation.
$len +=
leading_spaces_to_go($ibeg) -
$levels_to_go[$i_start] *
$rOpts_indent_columns;
if ( $len < 0 ) { $len = 0 }
}
# tack this length onto the container name to try
# to make a unique token name
$container_name{$depth} .= "-" . $len;
}
}
}
elsif ( $is_closing_type{$token} ) {
$depth-- if $depth > 0;
}
}
# if we find a new synchronization token, we are done with
# a field
if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
# map similar items
my $tok_map = $operator_map{$tok};
$tok = $tok_map if ($tok_map);
# make separators in different nesting depths unique
# by appending the nesting depth digit.
if ( $raw_tok ne '#' ) {
$tok .= "$nesting_depth_to_go[$i]";
}
# also decorate commas with any container name to avoid
# unwanted cross-line alignments.
if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
# If we are at an opening token which increased depth, we have
# to use the name from the previous depth.
my $depth_p =
( $depth_last < $depth ? $depth_last : $depth );
if ( $container_name{$depth_p} ) {
$tok .= $container_name{$depth_p};
}
}
# Patch to avoid aligning leading and trailing if, unless.
# Mark trailing if, unless statements with container names.
# This makes them different from leading if, unless which
# are not so marked at present. If we ever need to name
# them too, we could use ci to distinguish them.
# Example problem to avoid:
# return ( 2, "DBERROR" )
# if ( $retval == 2 );
# if ( scalar @_ ) {
# my ( $a, $b, $c, $d, $e, $f ) = @_;
# }
if ( $raw_tok eq '(' ) {
if ( $ci_levels_to_go[$ibeg]
&& $container_name{$depth} =~ /^\+(if|unless)/ )
{
$tok .= $container_name{$depth};
}
}
# Decorate block braces with block types to avoid
# unwanted alignments such as the following:
# foreach ( @{$routput_array} ) { $fh->print($_) }
# eval { $fh->close() };
if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
my $block_type = $block_type_to_go[$i];
# map certain related block types to allow
# else blocks to align
$block_type = $block_type_map{$block_type}
if ( defined( $block_type_map{$block_type} ) );
# remove sub names to allow one-line sub braces to align
# regardless of name
if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
# allow all control-type blocks to align
if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
$tok .= $block_type;
}
# Mark multiple copies of certain tokens with the copy number
# This will allow the aligner to decide if they are matched.
# For now, only do this for equals. For example, the two
# equals on the next line will be labeled '=0' and '=0.2'.
# Later, the '=0.2' will be ignored in alignment because it
# has no match.
# $| = $debug = 1 if $opt_d;
# $full_index = 1 if $opt_i;
if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
$token_count{$tok}++;
if ( $token_count{$tok} > 1 ) {
$tok .= '.' . $token_count{$tok};
}
}
# concatenate the text of the consecutive tokens to form
# the field
push( @fields,
join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
push @field_lengths,
$summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
# store the alignment token for this field
push( @tokens, $tok );
# get ready for the next batch
$i_start = $i;
$j++;
$patterns[$j] = "";
}
# continue accumulating tokens
# for keywords we have to use the actual text
if ( $type eq 'k' ) {
my $tok_fix = $tokens_to_go[$i];
# but map certain keywords to a common string to allow
# alignment.
$tok_fix = $keyword_map{$tok_fix}
if ( defined( $keyword_map{$tok_fix} ) );
$patterns[$j] .= $tok_fix;
}
elsif ( $type eq 'b' ) {
$patterns[$j] .= $type;
}
# handle non-keywords..
else {
my $type_fix = $type;
# Mark most things before arrows as a quote to
# get them to line up. Testfile: mixed.pl.
# $type =~ /^[wnC]$/
if ( $i < $iend - 1 && $is_w_n_C{$type} ) {
my $next_type = $types_to_go[ $i + 1 ];
my $i_next_nonblank =
( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
$type_fix = 'Q';
# Patch to ignore leading minus before words,
# by changing pattern 'mQ' into just 'Q',
# so that we can align things like this:
# Button => "Print letter \"~$_\"",
# -command => [ sub { print "$_[0]\n" }, $_ ],
if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
}
}
# Convert a bareword within braces into a quote for matching.
# This will allow alignment of expressions like this:
# local ( $SIG{'INT'} ) = IGNORE;
# local ( $SIG{ALRM} ) = 'POSTMAN';
if ( $type eq 'w'
&& $i > $ibeg
&& $i < $iend
&& $types_to_go[ $i - 1 ] eq 'L'
&& $types_to_go[ $i + 1 ] eq 'R' )
{
$type_fix = 'Q';
}
# patch to make numbers and quotes align
if ( $type eq 'n' ) { $type_fix = 'Q' }
# patch to ignore any ! in patterns
if ( $type eq '!' ) { $type_fix = '' }
$patterns[$j] .= $type_fix;
}
}
# done with this line .. join text of tokens to make the last field
push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
push @field_lengths,
$summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
return ( \@tokens, \@fields, \@patterns, \@field_lengths );
}
} ## end closure make_alignment_patterns
sub make_paren_name {
my ( $self, $i ) = @_;
# The token at index $i is a '('.
# Create an alignment name for it to avoid incorrect alignments.
# Start with the name of the previous nonblank token...
my $name = "";
my $im = $i - 1;
return "" if ( $im < 0 );
if ( $types_to_go[$im] eq 'b' ) { $im--; }
return "" if ( $im < 0 );
$name = $tokens_to_go[$im];
# Prepend any sub name to an isolated -> to avoid unwanted alignments
# [test case is test8/penco.pl]
if ( $name eq '->' ) {
$im--;
if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
$name = $tokens_to_go[$im] . $name;
}
}
# Finally, remove any leading arrows
if ( substr( $name, 0, 2 ) eq '->' ) {
$name = substr( $name, 2 );
}
return $name;
}
{ ## begin closure set_adjusted_indentation
my ( $last_indentation_written, $last_unadjusted_indentation,
$last_leading_token );
sub initialize_adjusted_indentation {
$last_indentation_written = 0;
$last_unadjusted_indentation = 0;
$last_leading_token = "";
return;
}
sub set_adjusted_indentation {
# This routine has the final say regarding the actual indentation of
# a line. It starts with the basic indentation which has been
# defined for the leading token, and then takes into account any
# options that the user has set regarding special indenting and
# outdenting.
# This routine has to resolve a number of complex interacting issues,
# including:
# 1. The various -cti=n type flags, which contain the desired change in
# indentation for lines ending in commas and semicolons, should be
# followed,
# 2. qw quotes require special processing and do not fit perfectly
# with normal containers,
# 3. formatting with -wn can complicate things, especially with qw
# quotes,
# 4. formatting with the -lp option is complicated, and does not
# work well with qw quotes and with -wn formatting.
# 5. a number of special situations, such as 'cuddled' formatting.
# 6. This routine is mainly concerned with outdenting closing tokens
# but note that there is some overlap with the functions of sub
# undo_ci, which was processed earlier, so care has to be taken to
# keep them coordinated.
my (
$self, $ibeg,
$iend, $rfields,
$rpatterns, $ri_first,
$ri_last, $rindentation_list,
$level_jump, $starting_in_quote,
$is_static_block_comment,
) = @_;
my $rLL = $self->[_rLL_];
my $ris_bli_container = $self->[_ris_bli_container_];
my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
# we need to know the last token of this line
my ( $terminal_type, $i_terminal ) = terminal_type_i( $ibeg, $iend );
my $terminal_block_type = $block_type_to_go[$i_terminal];
my $is_outdented_line = 0;
my $type_beg = $types_to_go[$ibeg];
my $token_beg = $tokens_to_go[$ibeg];
my $K_beg = $K_to_go[$ibeg];
my $ibeg_weld_fix = $ibeg;
my $seqno_beg = $type_sequence_to_go[$ibeg];
my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
# QW INDENTATION PATCH 3:
my $seqno_qw_closing;
if ( $type_beg eq 'q' && $ibeg == 0 ) {
my $KK = $K_to_go[$ibeg];
$seqno_qw_closing =
$self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
}
my $is_semicolon_terminated = $terminal_type eq ';'
&& ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
|| $seqno_qw_closing );
# NOTE: A future improvement would be to make it semicolon terminated
# even if it does not have a semicolon but is followed by a closing
# block brace. This would undo ci even for something like the
# following, in which the final paren does not have a semicolon because
# it is a possible weld location:
# if ($BOLD_MATH) {
# (
# $labels, $comment,
# join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
# )
# }
#
# MOJO: Set a flag if this lines begins with ')->'
my $leading_paren_arrow = (
$types_to_go[$ibeg] eq '}'
&& $tokens_to_go[$ibeg] eq ')'
&& (
( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
|| ( $ibeg < $i_terminal - 1
&& $types_to_go[ $ibeg + 1 ] eq 'b'
&& $types_to_go[ $ibeg + 2 ] eq '->' )
)
);
##########################################################
# Section 1: set a flag and a default indentation
#
# Most lines are indented according to the initial token.
# But it is common to outdent to the level just after the
# terminal token in certain cases...
# adjust_indentation flag:
# 0 - do not adjust
# 1 - outdent
# 2 - vertically align with opening token
# 3 - indent
##########################################################
my $adjust_indentation = 0;
my $default_adjust_indentation = $adjust_indentation;
my (
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
);
# Update the $is_bli flag as we go. It is initially 1.
# We note seeing a leading opening brace by setting it to 2.
# If we get to the closing brace without seeing the opening then we
# turn it off. This occurs if the opening brace did not get output
# at the start of a line, so we will then indent the closing brace
# in the default way.
if ( $is_bli_beg && $is_bli_beg == 1 ) {
my $K_opening_container = $self->[_K_opening_container_];
my $K_opening = $K_opening_container->{$seqno_beg};
if ( $K_beg eq $K_opening ) {
$ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
}
else { $is_bli_beg = 0 }
}
# QW PATCH for the combination -lp -wn
# For -lp formatting use $ibeg_weld_fix to get around the problem
# that with -lp type formatting the opening and closing tokens to not
# have sequence numbers.
if ($seqno_qw_closing) {
my $K_next_nonblank = $self->K_next_code($K_beg);
if ( defined($K_next_nonblank) ) {
my $type_sequence = $rLL->[$K_next_nonblank]->[_TYPE_SEQUENCE_];
my $token = $rLL->[$K_next_nonblank]->[_TOKEN_];
my $welded = $self->weld_len_left( $type_sequence, $token );
if ($welded) {
my $itest = $ibeg + ( $K_next_nonblank - $K_beg );
if ( $itest <= $max_index_to_go ) {
$ibeg_weld_fix = $itest;
}
}
}
}
# if we are at a closing token of some type..
if ( $is_closing_type{$type_beg} || $seqno_qw_closing ) {
# get the indentation of the line containing the corresponding
# opening token
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
= $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
$ri_last, $rindentation_list, $seqno_qw_closing );
# First set the default behavior:
if (
# default behavior is to outdent closing lines
# of the form: "); }; ]; )->xxx;"
$is_semicolon_terminated
# and 'cuddled parens' of the form: ")->pack("
# Bug fix for RT #123749]: the types here were
# incorrectly '(' and ')'. Corrected to be '{' and '}'
|| (
$terminal_type eq '{'
&& $type_beg eq '}'
&& ( $nesting_depth_to_go[$iend] + 1 ==
$nesting_depth_to_go[$ibeg] )
)
# remove continuation indentation for any line like
# } ... {
# or without ending '{' and unbalanced, such as
# such as '}->{$operator}'
|| (
$type_beg eq '}'
&& ( $types_to_go[$iend] eq '{'
|| $levels_to_go[$iend] < $levels_to_go[$ibeg] )
)
# and when the next line is at a lower indentation level...
# PATCH #1: and only if the style allows undoing continuation
# for all closing token types. We should really wait until
# the indentation of the next line is known and then make
# a decision, but that would require another pass.
# PATCH #2: and not if this token is under -xci control
|| ( $level_jump < 0
&& !$some_closing_token_indentation
&& !$rseqno_controlling_my_ci->{$K_beg} )
# Patch for -wn=2, multiple welded closing tokens
|| ( $i_terminal > $ibeg
&& $is_closing_type{ $types_to_go[$iend] } )
)
{
$adjust_indentation = 1;
}
# outdent something like '),'
if (
$terminal_type eq ','
# Removed this constraint for -wn
# OLD: allow just one character before the comma
# && $i_terminal == $ibeg + 1
# require LIST environment; otherwise, we may outdent too much -
# this can happen in calls without parentheses (overload.t);
&& $container_environment_to_go[$i_terminal] eq 'LIST'
)
{
$adjust_indentation = 1;
}
# undo continuation indentation of a terminal closing token if
# it is the last token before a level decrease. This will allow
# a closing token to line up with its opening counterpart, and
# avoids an indentation jump larger than 1 level.
if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
&& $i_terminal == $ibeg
&& defined($K_beg) )
{
my $K_next_nonblank = $self->K_next_code($K_beg);
if ( !$is_bli_beg && defined($K_next_nonblank) ) {
my $lev = $rLL->[$K_beg]->[_LEVEL_];
my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
# and do not undo ci if it was set by the -xci option
$adjust_indentation = 1
if ( $level_next < $lev
&& !$rseqno_controlling_my_ci->{$K_beg} );
}
# Patch for RT #96101, in which closing brace of anonymous subs
# was not outdented. We should look ahead and see if there is
# a level decrease at the next token (i.e., a closing token),
# but right now we do not have that information. For now
# we see if we are in a list, and this works well.
# See test files 'sub*.t' for good test cases.
if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
&& $container_environment_to_go[$i_terminal] eq 'LIST'
&& !$rOpts->{'indent-closing-brace'} )
{
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
= $self->get_opening_indentation( $ibeg, $ri_first,
$ri_last, $rindentation_list );
my $indentation = $leading_spaces_to_go[$ibeg];
if ( defined($opening_indentation)
&& get_spaces($indentation) >
get_spaces($opening_indentation) )
{
$adjust_indentation = 1;
}
}
}
# YVES patch 1 of 2:
# Undo ci of line with leading closing eval brace,
# but not beyond the indention of the line with
# the opening brace.
if ( $block_type_to_go[$ibeg] eq 'eval'
&& !$rOpts->{'line-up-parentheses'}
&& !$rOpts->{'indent-closing-brace'} )
{
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
= $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
$rindentation_list );
my $indentation = $leading_spaces_to_go[$ibeg];
if ( defined($opening_indentation)
&& get_spaces($indentation) >
get_spaces($opening_indentation) )
{
$adjust_indentation = 1;
}
}
# patch for issue git #40: -bli setting has priority
$adjust_indentation = 0 if ($is_bli_beg);
$default_adjust_indentation = $adjust_indentation;
# Now modify default behavior according to user request:
# handle option to indent non-blocks of the form ); }; ];
# But don't do special indentation to something like ')->pack('
if ( !$block_type_to_go[$ibeg] ) {
# Note that logical padding has already been applied, so we may
# need to remove some spaces to get a valid hash key.
my $tok = $tokens_to_go[$ibeg];
my $cti = $closing_token_indentation{$tok};
# Fix the value of 'cti' for an isloated non-welded closing qw
# delimiter.
if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
# A quote delimiter which is not a container will not have
# a cti value defined. In this case use the style of a
# paren. For example
# my @fars = (
# qw<
# far
# farfar
# farfars-far
# >,
# );
if ( !defined($cti) && length($tok) == 1 ) {
# something other than ')', '}', ']' ; use flag for ')'
$cti = $closing_token_indentation{')'};
# But for now, do not outdent non-container qw
# delimiters because it would would change existing
# formatting.
if ( $tok ne '>' ) { $cti = 3 }
}
# A non-welded closing qw cannot currently use -cti=1
# because that option requires a sequence number to find
# the opening indentation, and qw quote delimiters are not
# sequenced items.
if ( defined($cti) && $cti == 1 ) { $cti = 0 }
}
if ( !defined($cti) ) {
# $cti may not be defined for several reasons.
# -padding may have been applied so the character
# has a length > 1
# - we may have welded to a closing quote token.
# Here is an example (perltidy -wn):
# __PACKAGE__->load_components( qw(
# > Core
# >
# > ) );
$adjust_indentation = 0;
}
elsif ( $cti == 1 ) {
if ( $i_terminal <= $ibeg + 1
|| $is_semicolon_terminated )
{
$adjust_indentation = 2;
}
else {
$adjust_indentation = 0;
}
}
elsif ( $cti == 2 ) {
if ($is_semicolon_terminated) {
$adjust_indentation = 3;
}
else {
$adjust_indentation = 0;
}
}
elsif ( $cti == 3 ) {
$adjust_indentation = 3;
}
}
# handle option to indent blocks
else {
if (
$rOpts->{'indent-closing-brace'}
&& (
$i_terminal == $ibeg # isolated terminal '}'
|| $is_semicolon_terminated
)
) # } xxxx ;
{
$adjust_indentation = 3;
}
}
}
# if at ');', '};', '>;', and '];' of a terminal qw quote
elsif ($rpatterns->[0] =~ /^qb*;$/
&& $rfields->[0] =~ /^([\)\}\]\>]);$/ )
{
if ( $closing_token_indentation{$1} == 0 ) {
$adjust_indentation = 1;
}
else {
$adjust_indentation = 3;
}
}
# if line begins with a ':', align it with any
# previous line leading with corresponding ?
elsif ( $types_to_go[$ibeg] eq ':' ) {
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
= $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
$rindentation_list );
if ($is_leading) { $adjust_indentation = 2; }
}
##########################################################
# Section 2: set indentation according to flag set above
#
# Select the indentation object to define leading
# whitespace. If we are outdenting something like '} } );'
# then we want to use one level below the last token
# ($i_terminal) in order to get it to fully outdent through
# all levels.
##########################################################
my $indentation;
my $lev;
my $level_end = $levels_to_go[$iend];
if ( $adjust_indentation == 0 ) {
$indentation = $leading_spaces_to_go[$ibeg];
$lev = $levels_to_go[$ibeg];
}
elsif ( $adjust_indentation == 1 ) {
# Change the indentation to be that of a different token on the line
# Previously, the indentation of the terminal token was used:
# OLD CODING:
# $indentation = $reduced_spaces_to_go[$i_terminal];
# $lev = $levels_to_go[$i_terminal];
# Generalization for MOJO:
# Use the lowest level indentation of the tokens on the line.
# For example, here we can use the indentation of the ending ';':
# } until ($selection > 0 and $selection < 10); # ok to use ';'
# But this will not outdent if we use the terminal indentation:
# )->then( sub { # use indentation of the ->, not the {
# Warning: reduced_spaces_to_go[] may be a reference, do not
# do numerical checks with it
my $i_ind = $ibeg;
$indentation = $reduced_spaces_to_go[$i_ind];
$lev = $levels_to_go[$i_ind];
while ( $i_ind < $i_terminal ) {
$i_ind++;
if ( $levels_to_go[$i_ind] < $lev ) {
$indentation = $reduced_spaces_to_go[$i_ind];
$lev = $levels_to_go[$i_ind];
}
}
}
# handle indented closing token which aligns with opening token
elsif ( $adjust_indentation == 2 ) {
# handle option to align closing token with opening token
$lev = $levels_to_go[$ibeg];
# calculate spaces needed to align with opening token
my $space_count =
get_spaces($opening_indentation) + $opening_offset;
# Indent less than the previous line.
#
# Problem: For -lp we don't exactly know what it was if there
# were recoverable spaces sent to the aligner. A good solution
# would be to force a flush of the vertical alignment buffer, so
# that we would know. For now, this rule is used for -lp:
#
# When the last line did not start with a closing token we will
# be optimistic that the aligner will recover everything wanted.
#
# This rule will prevent us from breaking a hierarchy of closing
# tokens, and in a worst case will leave a closing paren too far
# indented, but this is better than frequently leaving it not
# indented enough.
my $last_spaces = get_spaces($last_indentation_written);
if ( !$is_closing_token{$last_leading_token} ) {
$last_spaces +=
get_recoverable_spaces($last_indentation_written);
}
# reset the indentation to the new space count if it works
# only options are all or none: nothing in-between looks good
$lev = $levels_to_go[$ibeg];
if ( $space_count < $last_spaces ) {
if ($rOpts_line_up_parentheses) {
my $lev = $levels_to_go[$ibeg];
$indentation =
new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
}
else {
$indentation = $space_count;
}
}
# revert to default if it doesn't work
else {
$space_count = leading_spaces_to_go($ibeg);
if ( $default_adjust_indentation == 0 ) {
$indentation = $leading_spaces_to_go[$ibeg];
}
elsif ( $default_adjust_indentation == 1 ) {
$indentation = $reduced_spaces_to_go[$i_terminal];
$lev = $levels_to_go[$i_terminal];
}
}
}
# Full indentaion of closing tokens (-icb and -icp or -cti=2)
else {
# handle -icb (indented closing code block braces)
# Updated method for indented block braces: indent one full level if
# there is no continuation indentation. This will occur for major
# structures such as sub, if, else, but not for things like map
# blocks.
#
# Note: only code blocks without continuation indentation are
# handled here (if, else, unless, ..). In the following snippet,
# the terminal brace of the sort block will have continuation
# indentation as shown so it will not be handled by the coding
# here. We would have to undo the continuation indentation to do
# this, but it probably looks ok as is. This is a possible future
# update for semicolon terminated lines.
#
# if ($sortby eq 'date' or $sortby eq 'size') {
# @files = sort {
# $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
# or $a cmp $b
# } @files;
# }
#
if ( $block_type_to_go[$ibeg]
&& $ci_levels_to_go[$i_terminal] == 0 )
{
my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
$indentation = $spaces + $rOpts_indent_columns;
# NOTE: for -lp we could create a new indentation object, but
# there is probably no need to do it
}
# handle -icp and any -icb block braces which fall through above
# test such as the 'sort' block mentioned above.
else {
# There are currently two ways to handle -icp...
# One way is to use the indentation of the previous line:
# $indentation = $last_indentation_written;
# The other way is to use the indentation that the previous line
# would have had if it hadn't been adjusted:
$indentation = $last_unadjusted_indentation;
# Current method: use the minimum of the two. This avoids
# inconsistent indentation.
if ( get_spaces($last_indentation_written) <
get_spaces($indentation) )
{
$indentation = $last_indentation_written;
}
}
# use previous indentation but use own level
# to cause list to be flushed properly
$lev = $levels_to_go[$ibeg];
}
# remember indentation except for multi-line quotes, which get
# no indentation
unless ( $ibeg == 0 && $starting_in_quote ) {
$last_indentation_written = $indentation;
$last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
$last_leading_token = $tokens_to_go[$ibeg];
# Patch to make a line which is the end of a qw quote work with the
# -lp option. Make $token_beg look like a closing token as some
# type even if it is not. This veriable will become
# $last_leading_token at the end of this loop. Then, if the -lp
# style is selected, and the next line is also a
# closing token, it will not get more indentation than this line.
# We need to do this because qw quotes (at present) only get
# continuation indentation, not one level of indentation, so we
# need to turn off the -lp indentation.
# ... a picture is worth a thousand words:
# perltidy -wn -gnu (Without this patch):
# ok(defined(
# $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
# 2981014)])
# ));
# perltidy -wn -gnu (With this patch):
# ok(defined(
# $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
# 2981014)])
# ));
## if ($seqno_qw_closing) { $last_leading_token = ')' }
if ( $seqno_qw_closing
&& ( length($token_beg) > 1 || $token_beg eq '>' ) )
{
$last_leading_token = ')';
}
}
# be sure lines with leading closing tokens are not outdented more
# than the line which contained the corresponding opening token.
#############################################################
# updated per bug report in alex_bug.pl: we must not
# mess with the indentation of closing logical braces so
# we must treat something like '} else {' as if it were
# an isolated brace
#############################################################
my $is_isolated_block_brace = $block_type_to_go[$ibeg]
&& ( $i_terminal == $ibeg
|| $is_if_elsif_else_unless_while_until_for_foreach{
$block_type_to_go[$ibeg] } );
# only do this for a ':; which is aligned with its leading '?'
my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
if (
defined($opening_indentation)
&& !$leading_paren_arrow # MOJO
&& !$is_isolated_block_brace
&& !$is_unaligned_colon
)
{
if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
$indentation = $opening_indentation;
}
}
# remember the indentation of each line of this batch
push @{$rindentation_list}, $indentation;
# outdent lines with certain leading tokens...
if (
# must be first word of this batch
$ibeg == 0
# and ...
&& (
# certain leading keywords if requested
(
$rOpts->{'outdent-keywords'}
&& $types_to_go[$ibeg] eq 'k'
&& $outdent_keyword{ $tokens_to_go[$ibeg] }
)
# or labels if requested
|| ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
# or static block comments if requested
|| ( $types_to_go[$ibeg] eq '#'
&& $rOpts->{'outdent-static-block-comments'}
&& $is_static_block_comment )
)
)
{
my $space_count = leading_spaces_to_go($ibeg);
if ( $space_count > 0 ) {
$space_count -= $rOpts_continuation_indentation;
$is_outdented_line = 1;
if ( $space_count < 0 ) { $space_count = 0 }
# do not promote a spaced static block comment to non-spaced;
# this is not normally necessary but could be for some
# unusual user inputs (such as -ci = -i)
if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
$space_count = 1;
}
if ($rOpts_line_up_parentheses) {
$indentation =
new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
}
else {
$indentation = $space_count;
}
}
}
return ( $indentation, $lev, $level_end, $terminal_type,
$terminal_block_type, $is_semicolon_terminated,
$is_outdented_line );
}
} ## end closure set_adjusted_indentation
sub get_opening_indentation {
# get the indentation of the line which output the opening token
# corresponding to a given closing token in the current output batch.
#
# given:
# $i_closing - index in this line of a closing token ')' '}' or ']'
#
# $ri_first - reference to list of the first index $i for each output
# line in this batch
# $ri_last - reference to list of the last index $i for each output line
# in this batch
# $rindentation_list - reference to a list containing the indentation
# used for each line.
# $qw_seqno - optional sequence number to use if normal seqno not defined
# (TODO: would be more general to just look this up from index i)
#
# return:
# -the indentation of the line which contained the opening token
# which matches the token at index $i_opening
# -and its offset (number of columns) from the start of the line
#
my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
= @_;
# first, see if the opening token is in the current batch
my $i_opening = $mate_index_to_go[$i_closing];
my ( $indent, $offset, $is_leading, $exists );
$exists = 1;
if ( defined($i_opening) && $i_opening >= 0 ) {
# it is..look up the indentation
( $indent, $offset, $is_leading ) =
lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
$rindentation_list );
}
# if not, it should have been stored in the hash by a previous batch
else {
my $seqno = $type_sequence_to_go[$i_closing];
$seqno = $qw_seqno unless ($seqno);
( $indent, $offset, $is_leading, $exists ) =
get_saved_opening_indentation($seqno);
}
return ( $indent, $offset, $is_leading, $exists );
}
sub set_vertical_tightness_flags {
my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
$ending_in_quote, $closing_side_comment )
= @_;
# Define vertical tightness controls for the nth line of a batch.
# We create an array of parameters which tell the vertical aligner
# if we should combine this line with the next line to achieve the
# desired vertical tightness. The array of parameters contains:
#
# [0] type: 1=opening non-block 2=closing non-block
# 3=opening block brace 4=closing block brace
#
# [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
# if closing: spaces of padding to use
# [2] sequence number of container
# [3] valid flag: do not append if this flag is false. Will be
# true if appropriate -vt flag is set. Otherwise, Will be
# made true only for 2 line container in parens with -lp
#
# These flags are used by sub set_leading_whitespace in
# the vertical aligner
my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
# Uses these parameters:
# $rOpts_block_brace_tightness
# $rOpts_block_brace_vertical_tightness
# $rOpts_stack_closing_block_brace
# %opening_vertical_tightness
# %closing_vertical_tightness
# %opening_token_right
# %stack_closing_token
# %stack_opening_token
#--------------------------------------------------------------
# Vertical Tightness Flags Section 1:
# Handle Lines 1 .. n-1 but not the last line
# For non-BLOCK tokens, we will need to examine the next line
# too, so we won't consider the last line.
#--------------------------------------------------------------
if ( $n < $n_last_line ) {
#--------------------------------------------------------------
# Vertical Tightness Flags Section 1a:
# Look for Type 1, last token of this line is a non-block opening token
#--------------------------------------------------------------
my $ibeg_next = $ri_first->[ $n + 1 ];
my $token_end = $tokens_to_go[$iend];
my $iend_next = $ri_last->[ $n + 1 ];
if (
$type_sequence_to_go[$iend]
&& !$block_type_to_go[$iend]
&& $is_opening_token{$token_end}
&& (
$opening_vertical_tightness{$token_end} > 0
# allow 2-line method call to be closed up
|| ( $rOpts_line_up_parentheses
&& $token_end eq '('
&& $iend > $ibeg
&& $types_to_go[ $iend - 1 ] ne 'b' )
)
)
{
# avoid multiple jumps in nesting depth in one line if
# requested
my $ovt = $opening_vertical_tightness{$token_end};
my $iend_next = $ri_last->[ $n + 1 ];
unless (
$ovt < 2
&& ( $nesting_depth_to_go[ $iend_next + 1 ] !=
$nesting_depth_to_go[$ibeg_next] )
)
{
# If -vt flag has not been set, mark this as invalid
# and aligner will validate it if it sees the closing paren
# within 2 lines.
my $valid_flag = $ovt;
@{$rvertical_tightness_flags} =
( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
}
}
#--------------------------------------------------------------
# Vertical Tightness Flags Section 1b:
# Look for Type 2, first token of next line is a non-block closing
# token .. and be sure this line does not have a side comment
#--------------------------------------------------------------
my $token_next = $tokens_to_go[$ibeg_next];
if ( $type_sequence_to_go[$ibeg_next]
&& !$block_type_to_go[$ibeg_next]
&& $is_closing_token{$token_next}
&& $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen!
{
my $ovt = $opening_vertical_tightness{$token_next};
my $cvt = $closing_vertical_tightness{$token_next};
if (
# Never append a trailing line like ')->pack(' because it
# will throw off later alignment. So this line must start at a
# deeper level than the next line (fix1 for welding, git #45).
(
$nesting_depth_to_go[$ibeg_next] >=
$nesting_depth_to_go[ $iend_next + 1 ] + 1
)
&& (
$cvt == 2
|| (
$container_environment_to_go[$ibeg_next] ne 'LIST'
&& (
$cvt == 1
# allow closing up 2-line method calls
|| ( $rOpts_line_up_parentheses
&& $token_next eq ')' )
)
)
)
)
{
# decide which trailing closing tokens to append..
my $ok = 0;
if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
else {
my $str = join( '',
@types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
# append closing token if followed by comment or ';'
# or another closing token (fix2 for welding, git #45)
if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
}
if ($ok) {
my $valid_flag = $cvt;
@{$rvertical_tightness_flags} = (
2,
$tightness{$token_next} == 2 ? 0 : 1,
$type_sequence_to_go[$ibeg_next], $valid_flag,
);
}
}
}
#--------------------------------------------------------------
# Vertical Tightness Flags Section 1c:
# Implement the Opening Token Right flag (Type 2)..
# If requested, move an isolated trailing opening token to the end of
# the previous line which ended in a comma. We could do this
# in sub recombine_breakpoints but that would cause problems
# with -lp formatting. The problem is that indentation will
# quickly move far to the right in nested expressions. By
# doing it after indentation has been set, we avoid changes
# to the indentation. Actual movement of the token takes place
# in sub valign_output_step_B.
#--------------------------------------------------------------
if (
$opening_token_right{ $tokens_to_go[$ibeg_next] }
# previous line is not opening
# (use -sot to combine with it)
&& !$is_opening_token{$token_end}
# previous line ended in one of these
# (add other cases if necessary; '=>' and '.' are not necessary
&& !$block_type_to_go[$ibeg_next]
# this is a line with just an opening token
&& ( $iend_next == $ibeg_next
|| $iend_next == $ibeg_next + 2
&& $types_to_go[$iend_next] eq '#' )
# looks bad if we align vertically with the wrong container
&& $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
)
{
my $valid_flag = 1;
my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
@{$rvertical_tightness_flags} =
( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
}
#--------------------------------------------------------------
# Vertical Tightness Flags Section 1d:
# Stacking of opening and closing tokens (Type 2)
#--------------------------------------------------------------
my $stackable;
my $token_beg_next = $tokens_to_go[$ibeg_next];
# patch to make something like 'qw(' behave like an opening paren
# (aran.t)
if ( $types_to_go[$ibeg_next] eq 'q' ) {
if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
$token_beg_next = $1;
}
}
if ( $is_closing_token{$token_end}
&& $is_closing_token{$token_beg_next} )
{
$stackable = $stack_closing_token{$token_beg_next}
unless ( $block_type_to_go[$ibeg_next] )
; # shouldn't happen; just checking
}
elsif ($is_opening_token{$token_end}
&& $is_opening_token{$token_beg_next} )
{
$stackable = $stack_opening_token{$token_beg_next}
unless ( $block_type_to_go[$ibeg_next] )
; # shouldn't happen; just checking
}
if ($stackable) {
my $is_semicolon_terminated;
if ( $n + 1 == $n_last_line ) {
my ( $terminal_type, $i_terminal ) =
terminal_type_i( $ibeg_next, $iend_next );
$is_semicolon_terminated = $terminal_type eq ';'
&& $nesting_depth_to_go[$iend_next] <
$nesting_depth_to_go[$ibeg_next];
}
# this must be a line with just an opening token
# or end in a semicolon
if (
$is_semicolon_terminated
|| ( $iend_next == $ibeg_next
|| $iend_next == $ibeg_next + 2
&& $types_to_go[$iend_next] eq '#' )
)
{
my $valid_flag = 1;
my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
@{$rvertical_tightness_flags} =
( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
);
}
}
}
#--------------------------------------------------------------
# Vertical Tightness Flags Section 2:
# Handle type 3, opening block braces on last line of the batch
# Check for a last line with isolated opening BLOCK curly
#--------------------------------------------------------------
elsif ($rOpts_block_brace_vertical_tightness
&& $ibeg eq $iend
&& $types_to_go[$iend] eq '{'
&& $block_type_to_go[$iend] =~
/$block_brace_vertical_tightness_pattern/ )
{
@{$rvertical_tightness_flags} =
( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
}
#--------------------------------------------------------------
# Vertical Tightness Flags Section 3:
# Handle type 4, a closing block brace on the last line of the batch Check
# for a last line with isolated closing BLOCK curly
# Patch: added a check for any new closing side comment which the
# -csc option may generate. If it exists, there will be a side comment
# so we cannot combine with a brace on the next line. This issue
# occurs for the combination -scbb and -csc is used.
#--------------------------------------------------------------
elsif ($rOpts_stack_closing_block_brace
&& $ibeg eq $iend
&& $block_type_to_go[$iend]
&& $types_to_go[$iend] eq '}'
&& ( !$closing_side_comment || $n < $n_last_line ) )
{
my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
@{$rvertical_tightness_flags} =
( 4, $spaces, $type_sequence_to_go[$iend], 1 );
}
# pack in the sequence numbers of the ends of this line
my $seqno_beg = $type_sequence_to_go[$ibeg];
if ( !$seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
$seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
}
my $seqno_end = $type_sequence_to_go[$iend];
if ( !$seqno_end && $types_to_go[$iend] eq 'q' ) {
$seqno_end = $self->get_seqno( $iend, $ending_in_quote );
}
$rvertical_tightness_flags->[4] = $seqno_beg;
$rvertical_tightness_flags->[5] = $seqno_end;
return $rvertical_tightness_flags;
}
##########################################################
# CODE SECTION 14: Code for creating closing side comments
##########################################################
{ ## begin closure accumulate_csc_text
# These routines are called once per batch when the --closing-side-comments flag
# has been set.
my %block_leading_text;
my %block_opening_line_number;
my $csc_new_statement_ok;
my $csc_last_label;
my %csc_block_label;
my $accumulating_text_for_block;
my $leading_block_text;
my $rleading_block_if_elsif_text;
my $leading_block_text_level;
my $leading_block_text_length_exceeded;
my $leading_block_text_line_length;
my $leading_block_text_line_number;
sub initialize_csc_vars {
%block_leading_text = ();
%block_opening_line_number = ();
$csc_new_statement_ok = 1;
$csc_last_label = "";
%csc_block_label = ();
$rleading_block_if_elsif_text = [];
$accumulating_text_for_block = "";
reset_block_text_accumulator();
return;
}
sub reset_block_text_accumulator {
# save text after 'if' and 'elsif' to append after 'else'
if ($accumulating_text_for_block) {
if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
push @{$rleading_block_if_elsif_text}, $leading_block_text;
}
}
$accumulating_text_for_block = "";
$leading_block_text = "";
$leading_block_text_level = 0;
$leading_block_text_length_exceeded = 0;
$leading_block_text_line_number = 0;
$leading_block_text_line_length = 0;
return;
}
sub set_block_text_accumulator {
my ( $self, $i ) = @_;
$accumulating_text_for_block = $tokens_to_go[$i];
if ( $accumulating_text_for_block !~ /^els/ ) {
$rleading_block_if_elsif_text = [];
}
$leading_block_text = "";
$leading_block_text_level = $levels_to_go[$i];
$leading_block_text_line_number = $self->get_output_line_number();
$leading_block_text_length_exceeded = 0;
# this will contain the column number of the last character
# of the closing side comment
$leading_block_text_line_length =
length($csc_last_label) +
length($accumulating_text_for_block) +
length( $rOpts->{'closing-side-comment-prefix'} ) +
$leading_block_text_level * $rOpts_indent_columns + 3;
return;
}
sub accumulate_block_text {
my ( $self, $i ) = @_;
# accumulate leading text for -csc, ignoring any side comments
if ( $accumulating_text_for_block
&& !$leading_block_text_length_exceeded
&& $types_to_go[$i] ne '#' )
{
my $added_length = $token_lengths_to_go[$i];
$added_length += 1 if $i == 0;
my $new_line_length =
$leading_block_text_line_length + $added_length;
# we can add this text if we don't exceed some limits..
if (
# we must not have already exceeded the text length limit
length($leading_block_text) <
$rOpts_closing_side_comment_maximum_text
# and either:
# the new total line length must be below the line length limit
# or the new length must be below the text length limit
# (ie, we may allow one token to exceed the text length limit)
&& (
$new_line_length <
$maximum_line_length[$leading_block_text_level]
|| length($leading_block_text) + $added_length <
$rOpts_closing_side_comment_maximum_text
)
# UNLESS: we are adding a closing paren before the brace we seek.
# This is an attempt to avoid situations where the ... to be
# added are longer than the omitted right paren, as in:
# foreach my $item (@a_rather_long_variable_name_here) {
# &whatever;
# } ## end foreach my $item (@a_rather_long_variable_name_here...
|| (
$tokens_to_go[$i] eq ')'
&& (
(
$i + 1 <= $max_index_to_go
&& $block_type_to_go[ $i + 1 ] eq
$accumulating_text_for_block
)
|| ( $i + 2 <= $max_index_to_go
&& $block_type_to_go[ $i + 2 ] eq
$accumulating_text_for_block )
)
)
)
{
# add an extra space at each newline
if ( $i == 0 ) { $leading_block_text .= ' ' }
# add the token text
$leading_block_text .= $tokens_to_go[$i];
$leading_block_text_line_length = $new_line_length;
}
# show that text was truncated if necessary
elsif ( $types_to_go[$i] ne 'b' ) {
$leading_block_text_length_exceeded = 1;
$leading_block_text .= '...';
}
}
return;
}
sub accumulate_csc_text {
my ($self) = @_;
# called once per output buffer when -csc is used. Accumulates
# the text placed after certain closing block braces.
# Defines and returns the following for this buffer:
my $block_leading_text = ""; # the leading text of the last '}'
my $rblock_leading_if_elsif_text;
my $i_block_leading_text =
-1; # index of token owning block_leading_text
my $block_line_count = 100; # how many lines the block spans
my $terminal_type = 'b'; # type of last nonblank token
my $i_terminal = 0; # index of last nonblank token
my $terminal_block_type = "";
# update most recent statement label
$csc_last_label = "" unless ($csc_last_label);
if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
my $block_label = $csc_last_label;
# Loop over all tokens of this batch
for my $i ( 0 .. $max_index_to_go ) {
my $type = $types_to_go[$i];
my $block_type = $block_type_to_go[$i];
my $token = $tokens_to_go[$i];
# remember last nonblank token type
if ( $type ne '#' && $type ne 'b' ) {
$terminal_type = $type;
$terminal_block_type = $block_type;
$i_terminal = $i;
}
my $type_sequence = $type_sequence_to_go[$i];
if ( $block_type && $type_sequence ) {
if ( $token eq '}' ) {
# restore any leading text saved when we entered this block
if ( defined( $block_leading_text{$type_sequence} ) ) {
( $block_leading_text, $rblock_leading_if_elsif_text )
= @{ $block_leading_text{$type_sequence} };
$i_block_leading_text = $i;
delete $block_leading_text{$type_sequence};
$rleading_block_if_elsif_text =
$rblock_leading_if_elsif_text;
}
if ( defined( $csc_block_label{$type_sequence} ) ) {
$block_label = $csc_block_label{$type_sequence};
delete $csc_block_label{$type_sequence};
}
# if we run into a '}' then we probably started accumulating
# at something like a trailing 'if' clause..no harm done.
if ( $accumulating_text_for_block
&& $levels_to_go[$i] <= $leading_block_text_level )
{
my $lev = $levels_to_go[$i];
reset_block_text_accumulator();
}
if ( defined( $block_opening_line_number{$type_sequence} ) )
{
my $output_line_number =
$self->get_output_line_number();
$block_line_count =
$output_line_number -
$block_opening_line_number{$type_sequence} + 1;
delete $block_opening_line_number{$type_sequence};
}
else {
# Error: block opening line undefined for this line..
# This shouldn't be possible, but it is not a
# significant problem.
}
}
elsif ( $token eq '{' ) {
my $line_number = $self->get_output_line_number();
$block_opening_line_number{$type_sequence} = $line_number;
# set a label for this block, except for
# a bare block which already has the label
# A label can only be used on the next {
if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
$csc_block_label{$type_sequence} = $csc_last_label;
$csc_last_label = "";
if ( $accumulating_text_for_block
&& $levels_to_go[$i] == $leading_block_text_level )
{
if ( $accumulating_text_for_block eq $block_type ) {
# save any leading text before we enter this block
$block_leading_text{$type_sequence} = [
$leading_block_text,
$rleading_block_if_elsif_text
];
$block_opening_line_number{$type_sequence} =
$leading_block_text_line_number;
reset_block_text_accumulator();
}
else {
# shouldn't happen, but not a serious error.
# We were accumulating -csc text for block type
# $accumulating_text_for_block and unexpectedly
# encountered a '{' for block type $block_type.
}
}
}
}
if ( $type eq 'k'
&& $csc_new_statement_ok
&& $is_if_elsif_else_unless_while_until_for_foreach{$token}
&& $token =~ /$closing_side_comment_list_pattern/ )
{
$self->set_block_text_accumulator($i);
}
else {
# note: ignoring type 'q' because of tricks being played
# with 'q' for hanging side comments
if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
$csc_new_statement_ok =
( $block_type || $type eq 'J' || $type eq ';' );
}
if ( $type eq ';'
&& $accumulating_text_for_block
&& $levels_to_go[$i] == $leading_block_text_level )
{
reset_block_text_accumulator();
}
else {
$self->accumulate_block_text($i);
}
}
}
# Treat an 'else' block specially by adding preceding 'if' and
# 'elsif' text. Otherwise, the 'end else' is not helpful,
# especially for cuddled-else formatting.
if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
$block_leading_text =
$self->make_else_csc_text( $i_terminal, $terminal_block_type,
$block_leading_text, $rblock_leading_if_elsif_text );
}
# if this line ends in a label then remember it for the next pass
$csc_last_label = "";
if ( $terminal_type eq 'J' ) {
$csc_last_label = $tokens_to_go[$i_terminal];
}
return ( $terminal_type, $i_terminal, $i_block_leading_text,
$block_leading_text, $block_line_count, $block_label );
}
sub make_else_csc_text {
# create additional -csc text for an 'else' and optionally 'elsif',
# depending on the value of switch
#
# = 0 add 'if' text to trailing else
# = 1 same as 0 plus:
# add 'if' to 'elsif's if can fit in line length
# add last 'elsif' to trailing else if can fit in one line
# = 2 same as 1 but do not check if exceed line length
#
# $rif_elsif_text = a reference to a list of all previous closing
# side comments created for this if block
#
my ( $self, $i_terminal, $block_type, $block_leading_text,
$rif_elsif_text )
= @_;
my $csc_text = $block_leading_text;
my $rOpts_closing_side_comment_else_flag =
$rOpts->{'closing-side-comment-else-flag'};
if ( $block_type eq 'elsif'
&& $rOpts_closing_side_comment_else_flag == 0 )
{
return $csc_text;
}
my $count = @{$rif_elsif_text};
return $csc_text unless ($count);
my $if_text = '[ if' . $rif_elsif_text->[0];
# always show the leading 'if' text on 'else'
if ( $block_type eq 'else' ) {
$csc_text .= $if_text;
}
# see if that's all
if ( $rOpts_closing_side_comment_else_flag == 0 ) {
return $csc_text;
}
my $last_elsif_text = "";
if ( $count > 1 ) {
$last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
}
# tentatively append one more item
my $saved_text = $csc_text;
if ( $block_type eq 'else' ) {
$csc_text .= $last_elsif_text;
}
else {
$csc_text .= ' ' . $if_text;
}
# all done if no length checks requested
if ( $rOpts_closing_side_comment_else_flag == 2 ) {
return $csc_text;
}
# undo it if line length exceeded
my $length =
length($csc_text) +
length($block_type) +
length( $rOpts->{'closing-side-comment-prefix'} ) +
$levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
if ( $length > $maximum_line_length[$leading_block_text_level] ) {
$csc_text = $saved_text;
}
return $csc_text;
}
} ## end closure accumulate_csc_text
{ ## begin closure balance_csc_text
# Some additional routines for handling the --closing-side-comments option
my %matching_char;
BEGIN {
%matching_char = (
'{' => '}',
'(' => ')',
'[' => ']',
'}' => '{',
')' => '(',
']' => '[',
);
}
sub balance_csc_text {
# Append characters to balance a closing side comment so that editors
# such as vim can correctly jump through code.
# Simple Example:
# input = ## end foreach my $foo ( sort { $b ...
# output = ## end foreach my $foo ( sort { $b ...})
# NOTE: This routine does not currently filter out structures within
# quoted text because the bounce algorithms in text editors do not
# necessarily do this either (a version of vim was checked and
# did not do this).
# Some complex examples which will cause trouble for some editors:
# while ( $mask_string =~ /\{[^{]*?\}/g ) {
# if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
# if ( $1 eq '{' ) {
# test file test1/braces.pl has many such examples.
my ($csc) = @_;
# loop to examine characters one-by-one, RIGHT to LEFT and
# build a balancing ending, LEFT to RIGHT.
for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
my $char = substr( $csc, $pos, 1 );
# ignore everything except structural characters
next unless ( $matching_char{$char} );
# pop most recently appended character
my $top = chop($csc);
# push it back plus the mate to the newest character
# unless they balance each other.
$csc = $csc . $top . $matching_char{$char} unless $top eq $char;
}
# return the balanced string
return $csc;
}
} ## end closure balance_csc_text
sub add_closing_side_comment {
my $self = shift;
my $rLL = $self->[_rLL_];
# add closing side comments after closing block braces if -csc used
my ( $closing_side_comment, $cscw_block_comment );
#---------------------------------------------------------------
# Step 1: loop through all tokens of this line to accumulate
# the text needed to create the closing side comments. Also see
# how the line ends.
#---------------------------------------------------------------
my ( $terminal_type, $i_terminal, $i_block_leading_text,
$block_leading_text, $block_line_count, $block_label )
= $self->accumulate_csc_text();
#---------------------------------------------------------------
# Step 2: make the closing side comment if this ends a block
#---------------------------------------------------------------
my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
# if this line might end in a block closure..
if (
$terminal_type eq '}'
# ..and either
&& (
# the block is long enough
( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
# or there is an existing comment to check
|| ( $have_side_comment
&& $rOpts->{'closing-side-comment-warnings'} )
)
# .. and if this is one of the types of interest
&& $block_type_to_go[$i_terminal] =~
/$closing_side_comment_list_pattern/
# .. but not an anonymous sub
# These are not normally of interest, and their closing braces are
# often followed by commas or semicolons anyway. This also avoids
# possible erratic output due to line numbering inconsistencies
# in the cases where their closing braces terminate a line.
&& $block_type_to_go[$i_terminal] ne 'sub'
# ..and the corresponding opening brace must is not in this batch
# (because we do not need to tag one-line blocks, although this
# should also be caught with a positive -csci value)
&& $mate_index_to_go[$i_terminal] < 0
# ..and either
&& (
# this is the last token (line doesn't have a side comment)
!$have_side_comment
# or the old side comment is a closing side comment
|| $tokens_to_go[$max_index_to_go] =~
/$closing_side_comment_prefix_pattern/
)
)
{
# then make the closing side comment text
if ($block_label) { $block_label .= " " }
my $token =
"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
# append any extra descriptive text collected above
if ( $i_block_leading_text == $i_terminal ) {
$token .= $block_leading_text;
}
$token = balance_csc_text($token)
if $rOpts->{'closing-side-comments-balanced'};
$token =~ s/\s*$//; # trim any trailing whitespace
# handle case of existing closing side comment
if ($have_side_comment) {
# warn if requested and tokens differ significantly
if ( $rOpts->{'closing-side-comment-warnings'} ) {
my $old_csc = $tokens_to_go[$max_index_to_go];
my $new_csc = $token;
$new_csc =~ s/\s+//g; # trim all whitespace
$old_csc =~ s/\s+//g; # trim all whitespace
$new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
$old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
$new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
my $new_trailing_dots = $1;
$old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
# Patch to handle multiple closing side comments at
# else and elsif's. These have become too complicated
# to check, so if we see an indication of
# '[ if' or '[ # elsif', then assume they were made
# by perltidy.
if ( $block_type_to_go[$i_terminal] eq 'else' ) {
if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
}
elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
}
# if old comment is contained in new comment,
# only compare the common part.
if ( length($new_csc) > length($old_csc) ) {
$new_csc = substr( $new_csc, 0, length($old_csc) );
}
# if the new comment is shorter and has been limited,
# only compare the common part.
if ( length($new_csc) < length($old_csc)
&& $new_trailing_dots )
{
$old_csc = substr( $old_csc, 0, length($new_csc) );
}
# any remaining difference?
if ( $new_csc ne $old_csc ) {
# just leave the old comment if we are below the threshold
# for creating side comments
if ( $block_line_count <
$rOpts->{'closing-side-comment-interval'} )
{
$token = undef;
}
# otherwise we'll make a note of it
else {
warning(
"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
);
# save the old side comment in a new trailing block
# comment
my $timestamp = "";
if ( $rOpts->{'timestamp'} ) {
my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
$year += 1900;
$month += 1;
$timestamp = "$year-$month-$day";
}
$cscw_block_comment =
"## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
}
}
else {
# No differences.. we can safely delete old comment if we
# are below the threshold
if ( $block_line_count <
$rOpts->{'closing-side-comment-interval'} )
{
$token = undef;
$self->unstore_token_to_go()
if ( $types_to_go[$max_index_to_go] eq '#' );
$self->unstore_token_to_go()
if ( $types_to_go[$max_index_to_go] eq 'b' );
}
}
}
# switch to the new csc (unless we deleted it!)
if ($token) {
$tokens_to_go[$max_index_to_go] = $token;
my $K = $K_to_go[$max_index_to_go];
$rLL->[$K]->[_TOKEN_] = $token;
$rLL->[$K]->[_TOKEN_LENGTH_] =
length($token); # NOTE: length no longer important
}
}
# handle case of NO existing closing side comment
else {
# To avoid inserting a new token in the token arrays, we
# will just return the new side comment so that it can be
# inserted just before it is needed in the call to the
# vertical aligner.
$closing_side_comment = $token;
}
}
return ( $closing_side_comment, $cscw_block_comment );
}
############################
# CODE SECTION 15: Summarize
############################
sub wrapup {
# This is the last routine called when a file is formatted.
# Flush buffer and write any informative messages
my $self = shift;
$self->flush();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->decrement_output_line_number()
; # fix up line number since it was incremented
we_are_at_the_last_line();
my $added_semicolon_count = $self->[_added_semicolon_count_];
my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
if ( $added_semicolon_count > 0 ) {
my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
my $what =
( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
write_logfile_entry("$added_semicolon_count $what added:\n");
write_logfile_entry(
" $first at input line $first_added_semicolon_at\n");
if ( $added_semicolon_count > 1 ) {
write_logfile_entry(
" Last at input line $last_added_semicolon_at\n");
}
write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
write_logfile_entry("\n");
}
my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
if ( $deleted_semicolon_count > 0 ) {
my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
my $what =
( $deleted_semicolon_count > 1 )
? "semicolons were"
: "semicolon was";
write_logfile_entry(
"$deleted_semicolon_count unnecessary $what deleted:\n");
write_logfile_entry(
" $first at input line $first_deleted_semicolon_at\n");
if ( $deleted_semicolon_count > 1 ) {
write_logfile_entry(
" Last at input line $last_deleted_semicolon_at\n");
}
write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
write_logfile_entry("\n");
}
my $embedded_tab_count = $self->[_embedded_tab_count_];
my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
if ( $embedded_tab_count > 0 ) {
my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
my $what =
( $embedded_tab_count > 1 )
? "quotes or patterns"
: "quote or pattern";
write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
write_logfile_entry(
"This means the display of this script could vary with device or software\n"
);
write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
if ( $embedded_tab_count > 1 ) {
write_logfile_entry(
" Last at input line $last_embedded_tab_at\n");
}
write_logfile_entry("\n");
}
my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
if ($first_tabbing_disagreement) {
write_logfile_entry(
"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
);
}
my $first_btd = $self->[_first_brace_tabbing_disagreement_];
if ($first_btd) {
my $msg =
"First closing brace indentation disagreement started at input line $first_btd\n";
write_logfile_entry($msg);
# leave a hint in the .ERR file if there was a brace error
if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
}
my $in_btd = $self->[_in_brace_tabbing_disagreement_];
if ($in_btd) {
my $msg =
"Ending with brace indentation disagreement which started at input line $in_btd\n";
write_logfile_entry($msg);
# leave a hint in the .ERR file if there was a brace error
if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
}
if ($in_tabbing_disagreement) {
my $msg =
"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
write_logfile_entry($msg);
}
else {
if ($last_tabbing_disagreement) {
write_logfile_entry(
"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
);
}
else {
write_logfile_entry("No indentation disagreement seen\n");
}
}
if ($first_tabbing_disagreement) {
write_logfile_entry(
"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
);
}
write_logfile_entry("\n");
my $vao = $self->[_vertical_aligner_object_];
$vao->report_anything_unusual();
$file_writer_object->report_line_length_errors();
$self->[_converged_] = $file_writer_object->get_convergence_check()
|| $rOpts->{'indent-only'};
return;
}
} ## end package Perl::Tidy::Formatter
1;
|