// Early register allocation pass.
// Copyright (C) 2023-2024 Free Software Foundation, Inc.
//
// This file is part of GCC.
//
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
//
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// .
// This pass implements a simple form of early register allocation.
// It is restricted to FP/SIMD registers, and it only allocates
// a region of FP/SIMD usage if it can do so without any spilling.
// It punts on anything too complicated, leaving it to the real
// register allocator.
//
// There are two main purposes:
//
// (1) The pass runs before scheduling. It therefore has a chance to
// bag a spill-free allocation, if there is one, before scheduling
// moves things around.
//
// (2) The pass can make use of strided register operations, such as the
// strided forms of LD1 and ST1 in SME2.
//
// The allocator works at the level of individual FPRs, rather than whole
// pseudo registers. It is mostly intended to help optimize ACLE code.
//
// The pass is very simplistic. There are many things that could be improved.
#define IN_TARGET_CODE 1
#define INCLUDE_ALGORITHM
#define INCLUDE_FUNCTIONAL
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "rtl.h"
#include "df.h"
#include "rtl-ssa.h"
#include "tree-pass.h"
#include "target.h"
#include "expr.h"
#include "cfgrtl.h"
#include "print-rtl.h"
#include "insn-attr.h"
#include "insn-opinit.h"
#include "reload.h"
template
class simple_iterator : public wrapper_iterator
{
public:
using wrapper_iterator::wrapper_iterator;
simple_iterator &operator-- () { --this->m_contents; return *this; }
simple_iterator operator-- (int) { return this->m_contents--; }
simple_iterator &operator++ () { ++this->m_contents; return *this; }
simple_iterator operator++ (int) { return this->m_contents++; }
};
using namespace rtl_ssa;
namespace {
const pass_data pass_data_early_ra =
{
RTL_PASS, // type
"early_ra", // name
OPTGROUP_NONE, // optinfo_flags
TV_NONE, // tv_id
0, // properties_required
0, // properties_provided
0, // properties_destroyed
0, // todo_flags_start
TODO_df_finish, // todo_flags_finish
};
using allocno_iterator = simple_iterator;
// Class that represents one run of the pass.
class early_ra
{
public:
early_ra (function *fn);
~early_ra ();
void execute ();
private:
// Whether to test only things that are required for correctness,
// or whether to take optimization heuristics into account as well.
enum test_strictness { CORRECTNESS_ONLY, ALL_REASONS };
static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits");
using operand_mask = uint32_t;
// Points in the function are represented using "program points".
// The program points are allocated in reverse order, with smaller
// numbers indicating later points. These special values indicate
// the start and end of a region.
static constexpr unsigned int START_OF_REGION = ~0U;
static constexpr unsigned int END_OF_REGION = 0U;
// An invalid allocno index, used to represent no allocno.
static constexpr unsigned int INVALID_ALLOCNO = ~0U;
// Enumerates the single FPR sizes that matter for register allocation.
// Anything smaller than 64 bits is treated as FPR_D.
enum fpr_size_info
{
FPR_D,
FPR_Q,
FPR_Z
};
// A live range for an FPR, containing program points [START_POINT,
// END_POINT]. If ALLOCNO is not INVALID_ALLOCNO, the FPR is known
// to be equal to ALLOCNO for the duration of the live range.
struct fpr_range_info
{
unsigned int start_point;
unsigned int end_point;
unsigned int allocno;
};
// Flags used in pseudo_reg_info.
//
// Whether the pseudo register occurs in one instruction alternative that
// matches (respectively) V0-V7, V0-V15, V0-V31 or a non-FP register.
static constexpr unsigned int ALLOWS_FPR8 = 1U << 0;
static constexpr unsigned int ALLOWS_FPR16 = 1U << 1;
static constexpr unsigned int ALLOWS_FPR32 = 1U << 2;
static constexpr unsigned int ALLOWS_NONFPR = 1U << 3;
//
// Likewise whether the register occurs in an instruction that requires
// the associated register type.
static constexpr unsigned int NEEDS_FPR8 = 1U << 4;
static constexpr unsigned int NEEDS_FPR16 = 1U << 5;
static constexpr unsigned int NEEDS_FPR32 = 1U << 6;
static constexpr unsigned int NEEDS_NONFPR = 1U << 7;
//
// Whether the pseudo register is copied to or from a hard FP register.
static constexpr unsigned int HAS_FPR_COPY = 1U << 8;
//
// Whether the pseudo register is copied to or from a hard non-FP register.
static constexpr unsigned int HAS_NONFPR_COPY = 1U << 9;
//
// Whether the pseudo register is used as a multi-register vector operand
// to an instruction that supports strided accesses, and whether it is used
// as a multi-register vector operand in some other non-move instruction.
static constexpr unsigned int HAS_FLEXIBLE_STRIDE = 1U << 10;
static constexpr unsigned int HAS_FIXED_STRIDE = 1U << 11;
// Flags that should be propagated across moves between pseudo registers.
static constexpr unsigned int PSEUDO_COPY_FLAGS = ~(HAS_FLEXIBLE_STRIDE
| HAS_FIXED_STRIDE);
// Information about a copy between two registers.
struct reg_copy_info
{
// The two registers, in order.
unsigned int regnos[2];
// Index I gives the index of the next reg_copy_info involving REGNOS[I],
// or 0 if none.
unsigned int next_copies[2];
};
// Information about a pseudo register.
struct pseudo_reg_info
{
// Flags describing how the register is used, defined above.
unsigned int flags : 16;
// The mode of the pseudo register, cached for convenience.
machine_mode mode : 16;
// The index of the first copy, or 0 if none.
unsigned int first_copy;
};
// Information about a group of allocnos that have a fixed offset
// relative to each other. The allocnos in each group must be allocated
// together.
//
// Allocnos that can share the same hard register are eventually
// chained together. These chains represent edges on a graph of
// allocnos, such that two allocnos joined by an edge use the same FPR.
// These chains are formed between individual allocnos rather than
// whole groups, although the system is required to be self-consistent.
// Each clique in the graph has at least one "full-width" allocno group
// that has one allocno for every FPR that needs to be allocated to
// the clique.
//
// One group of allocnos is chosen as the "color representative" of
// each clique in the graph. This group will be a full-width group.
struct allocno_info;
struct allocno_group_info
{
array_slice chain_heads ();
array_slice allocnos ();
allocno_group_info *color_rep ();
allocno_info *allocno (unsigned int);
// The color representative of the containing clique.
allocno_group_info *m_color_rep;
// The pseudo register associated with this allocno, or INVALID_REGNUM
// if none.
unsigned int regno;
// The offset of the first allocno (and thus this group) from the start
// of color_rep.
unsigned int color_rep_offset : 8;
// The number of allocnos in the group, and thus the number of FPRs
// that need to be allocated.
unsigned int size : 8;
// The gap between FPRs in the group. This is normally 1, but can be
// higher if we've decided to use strided multi-register accesses.
unsigned int stride : 4;
// Used temporarily while deciding which allocnos should have non-unit
// strides; see find_strided_accesses for details.
int consecutive_pref : 4;
int strided_polarity : 2;
// The largest size of FPR needed by references to the allocno group.
fpr_size_info fpr_size : 2;
// True if all non-move accesses can be converted to strided form.
unsigned int has_flexible_stride : 1;
// True if we've assigned a color index to this group.
unsigned int has_color : 1;
// The mask of FPRs that would make valid choices for the first allocno,
// taking the requirements of all the allocnos in the group into account.
unsigned int fpr_candidates;
// The index of the color that has been assigned to the containing clique.
unsigned int color;
};
// Represents a single FPR-sized quantity that needs to be allocated.
// Each allocno is identified by index (for compactness).
//
// Quantities that span multiple FPRs are assigned groups of consecutive
// allocnos. Quantities that occupy a single FPR are assigned their own
// group.
struct allocno_info
{
allocno_group_info *group ();
bool is_shared ();
bool is_equiv_to (unsigned int);
// The allocno's unique identifier.
unsigned int id;
// The offset of this allocno into the containing group.
unsigned int offset : 8;
// The number of allocnos in the containing group.
unsigned int group_size : 8;
// If the allocno has an affinity with at least one hard register
// (so that choosing that hard register would avoid a copy), this is
// the number of one such hard register, otherwise it is
// FIRST_PSEUDO_REGISTER.
unsigned int hard_regno : 8;
// Set to 1 if the allocno has a single definition or 2 if it has more.
unsigned int num_defs : 2;
// True if, at START_POINT, another allocno is copied to this one.
// See callers of record_copy for what counts as a copy.
unsigned int is_copy_dest : 1;
// True if, at START_POINT, another allocno is copied to this one,
// and if the allocnos at both ends of the copy chain have an affinity
// with the same hard register.
unsigned int is_strong_copy_dest : 1;
// True if, at END_POINT, this allocno is copied to another one,
// and both allocnos have an affinity with the same hard register.
unsigned int is_strong_copy_src : 1;
// True if the allocno is subject to an earlyclobber at END_POINT,
// so that it cannot be tied to the destination of the instruction.
unsigned int is_earlyclobbered : 1;
// True if this allocno is known to be equivalent to related_allocno
// for the whole of this allocno's lifetime.
unsigned int is_equiv : 1;
// The inclusive range of program points spanned by the allocno.
// START_POINT >= END_POINT.
unsigned int start_point;
unsigned int end_point;
// If, at END_POINT, this allocno is copied to another allocno, this
// is the index of that allocno, otherwise it is INVALID_ALLOCNO.
// See callers of record_copy for what counts as a copy.
unsigned int copy_dest;
// If this field is not INVALID_ALLOCNO, it indicates one of two things:
//
// - if is_equiv, this allocno is equivalent to related_allocno for
// the whole of this allocno's lifetime.
//
// - if !is_equiv, this allocno's live range is a subrange of
// related_allocno's and we have committed to making this allocno
// share whatever register related_allocno uses.
unsigned int related_allocno;
union
{
// The program point at which the allocno was last defined,
// or START_OF_REGION if none. This is only used temporarily
// while recording allocnos; after that, chain_next below is
// used instead.
unsigned int last_def_point;
// The next chained allocno in program order (i.e. at lower program
// points), or INVALID_ALLOCNO if none.
unsigned int chain_next;
};
union
{
// The program point before start_point at which the allocno was
// last used, or END_OF_REGION if none. This is only used temporarily
// while recording allocnos; after that, chain_prev below is used
// instead.
unsigned int last_use_point;
// The previous chained allocno in program order (i.e. at higher
// program points), or INVALID_ALLOCNO if none.
unsigned int chain_prev;
};
};
// Information about a full allocno group or a subgroup of it.
// The subgroup can be empty to indicate "none".
struct allocno_subgroup
{
array_slice allocnos ();
allocno_info *allocno (unsigned int);
// True if a subgroup is present.
operator bool () const { return count; }
// The containing group.
allocno_group_info *group;
// The offset of the subgroup from the start of GROUP.
unsigned int start;
// The number of allocnos in the subgroup.
unsigned int count;
};
// Represents information about a copy between an allocno and an FPR.
// This establishes an affinity between the allocno and the FPR.
struct allocno_copy_info
{
// The allocno involved in the copy.
unsigned int allocno;
// The FPR involved in the copy, relative to V0_REGNUM.
unsigned int fpr : 16;
// A measure of how strong the affinity between the allocno and FPR is.
unsigned int weight : 16;
};
// Information about a possible allocno chain.
struct chain_candidate_info
{
// The candidate target allocno.
allocno_info *allocno;
// A rating of the candidate (higher is better).
int score;
};
// Information about an allocno color.
struct color_info
{
// The color's unique identifier.
int id;
// The allocated hard register, when known.
unsigned int hard_regno;
// The clique's representative group.
allocno_group_info *group;
// The number of FPR preferences recorded in fpr_preferences.
unsigned int num_fpr_preferences;
// Weights in favor of choosing each FPR as the first register for GROUP.
int8_t fpr_preferences[32];
};
template
T *region_allocate (Ts...);
allocno_info *chain_prev (allocno_info *);
allocno_info *chain_next (allocno_info *);
void dump_pseudo_regs ();
void dump_fpr_ranges ();
void dump_copies ();
void dump_allocnos ();
void dump_colors ();
iterator_range get_group_allocnos (unsigned int);
void preprocess_move (rtx, rtx);
void process_pseudo_reg_constraints (rtx_insn *);
void preprocess_insns ();
int fpr_preference (unsigned int);
void propagate_pseudo_reg_info ();
void choose_fpr_pseudos ();
void start_new_region ();
allocno_group_info *create_allocno_group (unsigned int, unsigned int);
allocno_subgroup get_allocno_subgroup (rtx);
void record_fpr_use (unsigned int);
void record_fpr_def (unsigned int);
void record_allocno_use (allocno_info *);
void record_allocno_def (allocno_info *);
allocno_info *find_related_start (allocno_info *, allocno_info *, bool);
void accumulate_defs (allocno_info *, allocno_info *);
void record_copy (rtx, rtx, bool = false);
void record_constraints (rtx_insn *);
void record_artificial_refs (unsigned int);
void record_insn_refs (rtx_insn *);
bool consider_strong_copy_src_chain (allocno_info *);
int strided_polarity_pref (allocno_info *, allocno_info *);
void find_strided_accesses ();
template
static int cmp_increasing (const void *, const void *);
bool is_chain_candidate (allocno_info *, allocno_info *, test_strictness);
int rate_chain (allocno_info *, allocno_info *);
static int cmp_chain_candidates (const void *, const void *);
void chain_allocnos (unsigned int &, unsigned int &);
void merge_fpr_info (allocno_group_info *, allocno_group_info *,
unsigned int);
void set_single_color_rep (allocno_info *, allocno_group_info *,
unsigned int);
void set_color_rep (allocno_group_info *, allocno_group_info *,
unsigned int);
bool try_to_chain_allocnos (allocno_info *, allocno_info *);
void create_color (allocno_group_info *);
void form_chains ();
bool fpr_conflicts_with_allocno_p (unsigned int, allocno_info *);
bool call_in_range_p (unsigned int, unsigned int, unsigned int);
unsigned int partial_fpr_clobbers (unsigned int, fpr_size_info);
void process_copies ();
static int cmp_allocation_order (const void *, const void *);
void allocate_colors ();
allocno_info *find_independent_subchain (allocno_info *);
color_info *find_oldest_color (unsigned int, unsigned int);
void broaden_colors ();
void finalize_allocation ();
bool replace_regs (rtx_insn *, df_ref);
int try_enforce_constraints (rtx_insn *, vec> &);
void enforce_constraints (rtx_insn *);
bool maybe_convert_to_strided_access (rtx_insn *);
void apply_allocation ();
void process_region ();
bool is_dead_insn (rtx_insn *);
void process_block (basic_block, bool);
void process_blocks ();
// ----------------------------------------------------------------------
// The function we're operating on.
function *m_fn;
// Information about each pseudo register, indexed by REGNO.
auto_vec m_pseudo_regs;
// All recorded register copies.
auto_vec m_pseudo_reg_copies;
// The set of pseudos that we've decided to allocate an FPR to.
auto_bitmap m_fpr_pseudos;
// ----------------------------------------------------------------------
// An obstack for allocating information that is referenced by the member
// variables below.
obstack m_region_obstack;
void *m_region_alloc_start;
// ----------------------------------------------------------------------
// The basic block that we're currently processing.
basic_block m_current_bb;
// The lowest-numbered program point in the current basic block.
unsigned int m_current_bb_point;
// The program point that we're currently processing (described above).
unsigned int m_current_point;
// The set of allocnos that are currently live.
auto_bitmap m_live_allocnos;
// The set of FPRs that are currently live.
unsigned int m_live_fprs;
// A unique one-based identifier for the current region.
unsigned int m_current_region;
// The region in which each FPR was last used, or 0 if none.
unsigned int m_fpr_recency[32];
// ----------------------------------------------------------------------
// A mask of the FPRs that have already been allocated.
unsigned int m_allocated_fprs;
// A mask of the FPRs that must be at least partially preserved by the
// current function.
unsigned int m_call_preserved_fprs;
// True if we haven't yet failed to allocate the current region.
bool m_allocation_successful;
// A map from pseudo registers to the first allocno in their associated
// allocno groups.
hash_map,
allocno_group_info *> m_regno_to_group;
// All recorded copies between allocnos and FPRs.
auto_vec m_allocno_copies;
// All allocnos, by index.
auto_vec m_allocnos;
// All allocnos, by increasing START_POINT.
auto_vec m_sorted_allocnos;
// Allocnos for which is_shared is true.
auto_vec m_shared_allocnos;
// All colors, by index.
auto_vec m_colors;
// The instruction ranges that make up the current region,
// as half-open ranges [LAST, FIRST).
auto_vec> m_insn_ranges;
// The live ranges of each FPR, in order of increasing program point.
auto_vec m_fpr_ranges[32];
// For each function call id, a list of program points at which a call
// to such a function is made. Each list is in order of increasing
// program point.
auto_vec m_call_points[NUM_ABI_IDS];
// A list of instructions that can be removed if allocation succeeds.
auto_vec m_dead_insns;
};
// True if PAT is something that would typically be treated as a move.
static inline bool
is_move_set (rtx pat)
{
if (GET_CODE (pat) != SET)
return false;
rtx dest = SET_DEST (pat);
if (SUBREG_P (dest))
dest = SUBREG_REG (dest);
if (!OBJECT_P (dest))
return false;
rtx src = SET_SRC (pat);
if (SUBREG_P (src))
src = SUBREG_REG (src);
if (!OBJECT_P (src) && !CONSTANT_P (src))
return false;
return true;
}
// Return true if operand OP is likely to match OP_ALT after register
// allocation.
static bool
likely_operand_match_p (const operand_alternative &op_alt, rtx op)
{
// Empty constraints match everything.
const char *constraint = op_alt.constraint;
if (constraint[0] == 0 || constraint[0] == ',')
return true;
for (;;)
{
char c = *constraint;
int len = CONSTRAINT_LEN (c, constraint);
if (c == 0 || c == ',')
break;
if (c == 'X')
return true;
auto cn = lookup_constraint (constraint);
switch (get_constraint_type (cn))
{
case CT_REGISTER:
if (REG_P (op) || SUBREG_P (op))
return true;
break;
case CT_MEMORY:
case CT_SPECIAL_MEMORY:
case CT_RELAXED_MEMORY:
if (MEM_P (op))
return true;
break;
case CT_CONST_INT:
case CT_ADDRESS:
case CT_FIXED_FORM:
if (constraint_satisfied_p (op, cn))
return true;
break;
}
constraint += len;
}
if (op_alt.matches >= 0)
{
rtx other = recog_data.operand[op_alt.matches];
if ((REG_P (other) || SUBREG_P (other))
&& (REG_P (op) || SUBREG_P (op)))
return true;
}
return false;
}
// Return true if the operands of the current instruction are likely to
// match OP_ALT.
static bool
likely_alternative_match_p (const operand_alternative *op_alt)
{
for (int i = 0; i < recog_data.n_operands; ++i)
if (!likely_operand_match_p (op_alt[i], recog_data.operand[i]))
return false;
return true;
}
// Return the sum of how disparaged OP_ALT is.
static int
count_rejects (const operand_alternative *op_alt)
{
int reject = 0;
for (int opno = 0; opno < recog_data.n_operands; ++opno)
reject += op_alt[opno].reject;
return reject;
}
// Allocate a T from the region obstack.
template
inline T *
early_ra::region_allocate (Ts... args)
{
static_assert (std::is_trivially_destructible::value,
"destructor won't be called");
void *addr = obstack_alloc (&m_region_obstack, sizeof (T));
return new (addr) T (std::forward (args)...);
}
early_ra::early_ra (function *fn) : m_fn (fn), m_live_fprs (0)
{
gcc_obstack_init (&m_region_obstack);
m_region_alloc_start = obstack_alloc (&m_region_obstack, 0);
bitmap_tree_view (m_live_allocnos);
}
early_ra::~early_ra ()
{
obstack_free (&m_region_obstack, nullptr);
}
// Return an array that, for each allocno A in the group, contains the index
// of the allocno at the head of A's chain (that is, the one with the highest
// START_POINT). The index is INVALID_ALLOCNO if the chain is empty.
inline array_slice
early_ra::allocno_group_info::chain_heads ()
{
auto *start = reinterpret_cast (this + 1);
return { start, size };
}
// Return the array of allocnos in the group.
inline array_slice
early_ra::allocno_group_info::allocnos ()
{
gcc_checking_assert (regno != INVALID_REGNUM);
auto *chain_end = reinterpret_cast (this + 1) + size;
auto *allocno_start = reinterpret_cast (chain_end);
return { allocno_start, size };
}
// Return the group's color representative.
inline early_ra::allocno_group_info *
early_ra::allocno_group_info::color_rep ()
{
gcc_checking_assert (m_color_rep->m_color_rep == m_color_rep);
return m_color_rep;
}
// Return the group that contains the allocno.
inline early_ra::allocno_group_info *
early_ra::allocno_info::group ()
{
auto *chain_end = reinterpret_cast (this - offset);
return reinterpret_cast (chain_end - group_size) - 1;
}
// Return true if this allocno's live range is a subrange of related_allocno's
// and if we have committed to making this allocno share whatever register
// related_allocno uses.
inline bool
early_ra::allocno_info::is_shared ()
{
return related_allocno != INVALID_ALLOCNO && !is_equiv;
}
// Return true if this allocno is known to be equivalent to ALLOCNO.
inline bool
early_ra::allocno_info::is_equiv_to (unsigned int allocno)
{
return is_equiv && related_allocno == allocno;
}
// Return the allocnos in the subgroup.
inline array_slice
early_ra::allocno_subgroup::allocnos ()
{
if (!count)
return {};
return { &group->allocnos ()[start], count };
}
// Return allocno I in the subgroup, with 0 being the first.
inline early_ra::allocno_info *
early_ra::allocno_subgroup::allocno (unsigned int i)
{
return &group->allocnos ()[start + i];
}
// Return the previous (earlier) allocno in ALLOCNO's chain, or null if none.
inline early_ra::allocno_info *
early_ra::chain_prev (allocno_info *allocno)
{
if (allocno->chain_prev != INVALID_ALLOCNO)
return m_allocnos[allocno->chain_prev];
return nullptr;
}
// Return the next (later) allocno in ALLOCNO's chain, or null if none.
inline early_ra::allocno_info *
early_ra::chain_next (allocno_info *allocno)
{
if (allocno->chain_next != INVALID_ALLOCNO)
return m_allocnos[allocno->chain_next];
return nullptr;
}
// Dump the information in m_pseudo_regs.
void
early_ra::dump_pseudo_regs ()
{
fprintf (dump_file, "\nPseudos:\n");
fprintf (dump_file, " %6s %6s %6s %6s %6s %6s %8s %s\n",
"Id", "FPR8", "FPR16", "FPR32", "NONFPR", "Stride",
"FPRness", "Copies");
pseudo_reg_info unused_reg = {};
for (unsigned int regno = FIRST_PSEUDO_REGISTER;
regno < m_pseudo_regs.length (); ++regno)
{
const auto ® = m_pseudo_regs[regno];
if (memcmp (®, &unused_reg, sizeof (reg)) == 0)
continue;
fprintf (dump_file, " %6d %6s %6s %6s %6s %6s %8d", regno,
reg.flags & NEEDS_FPR8 ? "Req"
: reg.flags & ALLOWS_FPR8 ? "OK" : "-",
reg.flags & NEEDS_FPR16 ? "Req"
: reg.flags & ALLOWS_FPR16 ? "OK" : "-",
reg.flags & NEEDS_FPR32 ? "Req"
: reg.flags & ALLOWS_FPR32 ? "OK" : "-",
reg.flags & NEEDS_NONFPR ? "Req"
: reg.flags & ALLOWS_NONFPR ? "OK" : "-",
~reg.flags & HAS_FLEXIBLE_STRIDE ? "-"
: reg.flags & HAS_FIXED_STRIDE ? "Some" : "All",
fpr_preference (regno));
if (reg.flags & HAS_FPR_COPY)
fprintf (dump_file, " FPR");
if (reg.flags & HAS_NONFPR_COPY)
fprintf (dump_file, " Non-FPR");
unsigned int copyi = reg.first_copy;
while (copyi)
{
const auto © = m_pseudo_reg_copies[copyi];
if (copy.regnos[0] == regno)
{
fprintf (dump_file, " r%d", copy.regnos[1]);
copyi = copy.next_copies[0];
}
else
{
fprintf (dump_file, " r%d", copy.regnos[0]);
copyi = copy.next_copies[1];
}
}
fprintf (dump_file, "\n");
}
}
// Dump the information in m_fpr_ranges.
void
early_ra::dump_fpr_ranges ()
{
fprintf (dump_file, "\nFPR live ranges:\n");
for (unsigned int fpr = 0; fpr < 32; ++fpr)
{
auto &intervals = m_fpr_ranges[fpr];
if (intervals.is_empty ())
continue;
fprintf (dump_file, " %2d", fpr);
for (unsigned int i = 0; i < intervals.length (); ++i)
{
auto &interval = intervals[i];
if (i && (i % 4) == 0)
fprintf (dump_file, "\n ");
fprintf (dump_file, " [ %6d %6d ]", interval.start_point,
interval.end_point);
}
fprintf (dump_file, "\n");
}
}
// Dump the information in m_allocno_copies.
void
early_ra::dump_copies ()
{
fprintf (dump_file, "\nCopies:\n");
fprintf (dump_file, " %8s %3s %6s\n",
"Allocno", "FPR", "Weight");
for (const auto © : m_allocno_copies)
fprintf (dump_file, " %8d %3d %6d\n", copy.allocno,
copy.fpr, copy.weight);
}
// Dump the information in m_allocnos.
void
early_ra::dump_allocnos ()
{
char buffer[sizeof ("r[:]") + 3 * 3 * sizeof (int) + 1];
fprintf (dump_file, "\nAllocno groups:\n");
fprintf (dump_file,
" %12s %12s %4s %6s %8s %s\n",
"Ids", "Regno", "Size", "Stride", "Cands", "Heads");
for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
{
auto *allocno = m_allocnos[ai];
if (allocno->offset != 0)
continue;
auto *group = allocno->group ();
snprintf (buffer, sizeof (buffer), "[%d:%d]", allocno->id,
allocno->id + group->size - 1);
fprintf (dump_file, " %12s", buffer);
snprintf (buffer, sizeof (buffer), "r%d[0:%d]", group->regno,
group->size - 1);
fprintf (dump_file, " %12s %4s %6d %08x", buffer,
group->fpr_size == FPR_D ? "D"
: group->fpr_size == FPR_Q ? "Q" : "Z",
group->stride,
group->fpr_candidates);
for (auto head : group->chain_heads ())
if (head == INVALID_ALLOCNO)
fprintf (dump_file, " -");
else
fprintf (dump_file, " %d", head);
fprintf (dump_file, "\n");
}
fprintf (dump_file, "\nAllocno chains:\n");
fprintf (dump_file, " %5s %12s %12s %6s %5s %5s %6s %5s\n",
"Id", "Regno", "Range ", "Src", "Dest", "Equiv", "Shared", "FPR");
for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
{
auto *allocno = m_allocnos[ai];
if (allocno->chain_prev != INVALID_ALLOCNO)
continue;
const char *prefix = "=>";
for (;;)
{
auto *group = allocno->group ();
fprintf (dump_file, " %2s", prefix);
fprintf (dump_file, " %5d", allocno->id);
snprintf (buffer, sizeof (buffer), "r%d[%d]", group->regno,
allocno->offset);
fprintf (dump_file, " %12s", buffer);
snprintf (buffer, sizeof (buffer), "[%d,%d]",
allocno->start_point, allocno->end_point);
fprintf (dump_file, " %11s%s %6s", buffer,
allocno->is_earlyclobbered ? "*" : " ",
allocno->is_strong_copy_dest ? "Strong"
: allocno->is_copy_dest ? "Yes" : "-");
if (allocno->copy_dest == INVALID_ALLOCNO)
fprintf (dump_file, " %5s", "-");
else
fprintf (dump_file, " %5d", allocno->copy_dest);
if (allocno->is_equiv)
fprintf (dump_file, " %5d", allocno->related_allocno);
else
fprintf (dump_file, " %5s", "-");
if (allocno->is_shared ())
fprintf (dump_file, " %6d", allocno->related_allocno);
else
fprintf (dump_file, " %6s", "-");
if (allocno->hard_regno == FIRST_PSEUDO_REGISTER)
fprintf (dump_file, " %5s", "-");
else
fprintf (dump_file, " %5s", reg_names[allocno->hard_regno]);
fprintf (dump_file, "\n");
if (allocno->chain_next == INVALID_ALLOCNO)
break;
allocno = m_allocnos[allocno->chain_next];
prefix = "";
}
}
}
// Dump the information in m_colors.
void
early_ra::dump_colors ()
{
fprintf (dump_file, "\nColors:\n");
for (unsigned int i = 0; i < m_colors.length (); ++i)
{
auto *color = m_colors[i];
if (!color->group)
continue;
fprintf (dump_file, " color %d:\n", i);
fprintf (dump_file, " chains:\n");
auto heads = color->group->chain_heads ();
for (unsigned int i = 0; i < color->group->size; ++i)
{
fprintf (dump_file, " %2d:", i);
auto ai = heads[i];
while (ai != INVALID_ALLOCNO)
{
auto *allocno = m_allocnos[ai];
fprintf (dump_file, " r%d[%d]", allocno->group ()->regno,
allocno->offset);
ai = allocno->chain_next;
}
fprintf (dump_file, "\n");
}
fprintf (dump_file, " FPR candidates:");
for (unsigned int fpr = 0; fpr < 32; ++fpr)
fprintf (dump_file, "%s%c", fpr % 8 ? "" : " ",
color->group->fpr_candidates & (1U << fpr) ? 'Y' : '-');
fprintf (dump_file, "\n");
fprintf (dump_file, " FPR preferences:");
for (unsigned int fpr = 0; fpr < 32; ++fpr)
if (color->fpr_preferences[fpr])
fprintf (dump_file, " %d(%d)", fpr, color->fpr_preferences[fpr]);
fprintf (dump_file, "\n");
}
}
// Record any necessary information about a move from SRC to DEST.
void
early_ra::preprocess_move (rtx dest, rtx src)
{
if (SUBREG_P (dest))
dest = SUBREG_REG (dest);
if (!REG_P (dest))
return;
if (SUBREG_P (src))
src = SUBREG_REG (src);
if (!REG_P (src))
return;
// Sort the registers by increasing REGNO.
rtx regs[] = { dest, src };
if (REGNO (dest) > REGNO (src))
std::swap (regs[0], regs[1]);
unsigned int regno0 = REGNO (regs[0]);
unsigned int regno1 = REGNO (regs[1]);
// Ignore moves between hard registers.
if (HARD_REGISTER_NUM_P (regno1))
return;
// For moves between hard registers and pseudos, just record the type
// of hard register involved.
auto ®1 = m_pseudo_regs[regno1];
reg1.mode = GET_MODE (regs[1]);
if (HARD_REGISTER_NUM_P (regno0))
{
reg1.flags |= (FP_REGNUM_P (regno0) ? HAS_FPR_COPY : HAS_NONFPR_COPY);
return;
}
// Record a move between two pseudo registers.
auto ®0 = m_pseudo_regs[regno0];
reg0.mode = GET_MODE (regs[0]);
reg_copy_info copy;
copy.regnos[0] = regno0;
copy.regnos[1] = regno1;
copy.next_copies[0] = reg0.first_copy;
copy.next_copies[1] = reg1.first_copy;
reg0.first_copy = reg1.first_copy = m_pseudo_reg_copies.length ();
m_pseudo_reg_copies.safe_push (copy);
}
// Return true if INSN has a multi-vector operand and if that operand
// could be converted to strided form.
static bool
is_stride_candidate (rtx_insn *insn)
{
if (recog_memoized (insn) < 0)
return false;
auto stride_type = get_attr_stride_type (insn);
return (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
|| stride_type == STRIDE_TYPE_ST1_CONSECUTIVE);
}
// Go through the constraints of INSN, which has already been extracted,
// and record any relevant information about pseudo registers.
void
early_ra::process_pseudo_reg_constraints (rtx_insn *insn)
{
extract_insn (insn);
preprocess_constraints (insn);
// Flags that describe any multi-register vector operands.
unsigned int insn_flags = (is_stride_candidate (insn)
? HAS_FLEXIBLE_STRIDE
: HAS_FIXED_STRIDE);
auto alts = get_preferred_alternatives (insn);
int operand_matches[MAX_RECOG_OPERANDS];
unsigned int operand_flags[MAX_RECOG_OPERANDS];
for (int i = 0; i < recog_data.n_operands; ++i)
{
operand_matches[i] = -1;
operand_flags[i] = 0;
}
// Extract information from the constraints, considering all plausible
// alternatives.
for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
{
if (!(alts & ALTERNATIVE_BIT (altno)))
continue;
auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
if (!likely_alternative_match_p (op_alt))
continue;
// Use SRC_OPNO's constraints to derive information about DEST_OPNO.
auto record_operand = [&](int src_opno, int dest_opno)
{
int matches = op_alt[src_opno].matches;
if (matches >= 0)
operand_matches[dest_opno] = matches;
auto cl = alternative_class (op_alt, src_opno);
if (cl != NO_REGS)
{
if (reg_class_subset_p (cl, FP_REGS))
operand_flags[dest_opno] |= ALLOWS_FPR32;
if (reg_class_subset_p (cl, FP_LO_REGS))
operand_flags[dest_opno] |= ALLOWS_FPR16;
if (reg_class_subset_p (cl, FP_LO8_REGS))
operand_flags[dest_opno] |= ALLOWS_FPR8;
if (!reg_classes_intersect_p (cl, FP_REGS))
operand_flags[dest_opno] |= ALLOWS_NONFPR;
}
};
for (int i = 0; i < recog_data.n_operands; ++i)
{
record_operand (i, i);
if (recog_data.constraints[i][0] == '%')
{
record_operand (i, i + 1);
record_operand (i + 1, i);
}
}
}
// Process the information we collected above.
for (int i = 0; i < recog_data.n_operands; ++i)
{
rtx op = recog_data.operand[i];
machine_mode orig_mode = GET_MODE (op);
if (SUBREG_P (op))
op = SUBREG_REG (op);
// Record the accumulated information in m_pseudo_regs.
if (REG_P (op) && !HARD_REGISTER_P (op))
{
// The flags so far just describe what at least one alternative
// would accept. Calculate the associated NEEDS_* information.
auto flags = operand_flags[i];
if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR))
flags |= NEEDS_NONFPR;
else if ((flags & ALLOWS_FPR32) && !(flags & ALLOWS_NONFPR))
{
if (flags & ALLOWS_FPR8)
flags |= NEEDS_FPR8;
if (flags & ALLOWS_FPR16)
flags |= NEEDS_FPR16;
flags |= NEEDS_FPR32;
}
// Look for multi-register vector operands.
if (VECTOR_MODE_P (orig_mode)
&& targetm.hard_regno_mode_ok (V0_REGNUM, orig_mode)
&& hard_regno_nregs (V0_REGNUM, orig_mode) > 1)
flags |= insn_flags;
m_pseudo_regs[REGNO (op)].flags |= flags;
m_pseudo_regs[REGNO (op)].mode = GET_MODE (op);
}
// Treat matching constraints as equivalent to moves.
if (operand_matches[i] >= 0)
preprocess_move (recog_data.operand[operand_matches[i]], op);
}
}
// Make one pass through the instructions, collecting information that
// will be needed later.
void
early_ra::preprocess_insns ()
{
m_pseudo_regs.safe_grow_cleared (max_reg_num ());
m_pseudo_reg_copies.safe_push (reg_copy_info ());
for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
if (!NONDEBUG_INSN_P (insn))
continue;
// Mark all registers that occur in addresses as needing a GPR.
vec_rtx_properties properties;
properties.add_insn (insn, true);
for (rtx_obj_reference ref : properties.refs ())
if (ref.is_reg ()
&& ref.in_address ()
&& !HARD_REGISTER_NUM_P (ref.regno))
m_pseudo_regs[ref.regno].flags |= ALLOWS_NONFPR | NEEDS_NONFPR;
if (GET_CODE (PATTERN (insn)) == USE
|| GET_CODE (PATTERN (insn)) == CLOBBER)
continue;
rtx set = single_set (insn);
if (set && is_move_set (set))
preprocess_move (SET_DEST (set), SET_SRC (set));
else
process_pseudo_reg_constraints (insn);
}
}
// Return a signed integer that says (roughly) how strong an affinity
// pseudo register REGNO has with FPRs. A positive value indicates
// that we should try to allocate an FPR, a negative value indicates
// that we shouldn't, and 0 indicates neutrality.
int
early_ra::fpr_preference (unsigned int regno)
{
auto mode = m_pseudo_regs[regno].mode;
auto flags = m_pseudo_regs[regno].flags;
if (mode == VOIDmode || !targetm.hard_regno_mode_ok (V0_REGNUM, mode))
return -3;
else if (flags & HAS_FLEXIBLE_STRIDE)
return 3;
else if (flags & NEEDS_FPR32)
return 2;
else if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR))
return -2;
else if ((flags & HAS_FPR_COPY) && !(flags & HAS_NONFPR_COPY))
return 1;
else if ((flags & HAS_NONFPR_COPY) && !(flags & HAS_FPR_COPY))
return -1;
else
return 0;
}
// Propagate information about pseudo-registers along copy edges,
// while doing so doesn't create conflicting FPR preferences.
void
early_ra::propagate_pseudo_reg_info ()
{
struct stack_entry { unsigned int regno, copyi; };
auto_vec stack;
for (unsigned int i = FIRST_PSEUDO_REGISTER;
i < m_pseudo_regs.length (); ++i)
{
auto start = m_pseudo_regs[i].first_copy;
if (!start)
continue;
stack.quick_push ({ i, start });
while (!stack.is_empty ())
{
auto entry = stack.pop ();
auto © = m_pseudo_reg_copies[entry.copyi];
auto src_regno = entry.regno;
auto dest_regno = (src_regno == copy.regnos[1]
? copy.regnos[0]
: copy.regnos[1]);
auto next_copyi = (src_regno == copy.regnos[1]
? copy.next_copies[1]
: copy.next_copies[0]);
if (next_copyi)
stack.safe_push ({ src_regno, next_copyi });
auto &src_reg = m_pseudo_regs[src_regno];
auto &dest_reg = m_pseudo_regs[dest_regno];
if (src_reg.flags & ~dest_reg.flags & PSEUDO_COPY_FLAGS)
{
auto src_preference = fpr_preference (src_regno);
auto dest_preference = fpr_preference (dest_regno);
if ((src_preference >= 0 && dest_preference >= 0)
|| (src_preference <= 0 && dest_preference <= 0))
{
dest_reg.flags |= (src_reg.flags & PSEUDO_COPY_FLAGS);
stack.safe_push ({ dest_regno, dest_reg.first_copy });
}
}
}
}
}
// Decide which pseudos should be allocated an FPR, setting m_fpr_pseudos
// accordingly.
void
early_ra::choose_fpr_pseudos ()
{
for (unsigned int i = FIRST_PSEUDO_REGISTER;
i < m_pseudo_regs.length (); ++i)
if (fpr_preference (i) > 0)
bitmap_set_bit (m_fpr_pseudos, i);
}
// Clear out information about the previous CFG region (if any)
// and set up the data for a new region.
void
early_ra::start_new_region ()
{
obstack_free (&m_region_obstack, m_region_alloc_start);
m_regno_to_group.empty ();
m_allocno_copies.truncate (0);
m_allocnos.truncate (0);
m_sorted_allocnos.truncate (0);
m_shared_allocnos.truncate (0);
m_colors.truncate (0);
m_insn_ranges.truncate (0);
for (auto &fpr_ranges : m_fpr_ranges)
fpr_ranges.truncate (0);
for (auto &call_points : m_call_points)
call_points.truncate (0);
gcc_assert (bitmap_empty_p (m_live_allocnos) && m_live_fprs == 0);
m_dead_insns.truncate (0);
m_allocated_fprs = 0;
m_call_preserved_fprs = 0;
m_allocation_successful = true;
m_current_region += 1;
}
// Create and return an allocno group of size SIZE for register REGNO.
// REGNO can be INVALID_REGNUM if the group just exists to allow
// other groups to be chained together, and does not have any new
// allocnos of its own.
early_ra::allocno_group_info *
early_ra::create_allocno_group (unsigned int regno, unsigned int size)
{
static_assert (alignof (unsigned int) == alignof (allocno_info),
"allocno_info alignment");
unsigned int num_allocnos = (regno != INVALID_REGNUM ? size : 0);
// Allocate an allocno_group_info, followed by an array of chain heads,
// followed by the allocnos themselves.
size_t alloc_size = (sizeof (allocno_group_info)
+ size * sizeof (unsigned int)
+ num_allocnos * sizeof (allocno_info));
void *data = obstack_alloc (&m_region_obstack, alloc_size);
// Initialize the group.
auto *group = reinterpret_cast (data);
memset (group, 0, sizeof (*group));
group->m_color_rep = group;
group->regno = regno;
group->size = size;
group->stride = 1;
group->fpr_size = FPR_D;
group->fpr_candidates = ~0U;
// Initialize the chain heads.
auto heads = group->chain_heads ();
for (unsigned int i = 0; i < heads.size (); ++i)
heads[i] = (i < num_allocnos ? m_allocnos.length () + i : INVALID_ALLOCNO);
// Initialize the allocnos.
if (num_allocnos > 0)
{
auto allocnos = group->allocnos ();
memset (allocnos.begin (), 0, num_allocnos * sizeof (allocno_info));
for (unsigned int i = 0; i < num_allocnos; ++i)
{
auto *allocno = &allocnos[i];
allocno->id = m_allocnos.length ();
allocno->offset = i;
allocno->group_size = size;
allocno->hard_regno = FIRST_PSEUDO_REGISTER;
allocno->start_point = END_OF_REGION;
allocno->end_point = START_OF_REGION;
allocno->copy_dest = INVALID_ALLOCNO;
allocno->related_allocno = INVALID_ALLOCNO;
allocno->chain_next = INVALID_ALLOCNO;
allocno->chain_prev = INVALID_ALLOCNO;
m_allocnos.safe_push (allocno);
}
}
return group;
}
// If REG refers to a pseudo register that might be allocated to FPRs,
// return the associated range of allocnos, creating new ones if necessary.
// Return an empty range otherwise.
early_ra::allocno_subgroup
early_ra::get_allocno_subgroup (rtx reg)
{
if (GET_CODE (reg) == SUBREG)
{
allocno_subgroup inner = get_allocno_subgroup (SUBREG_REG (reg));
if (!inner)
return {};
if (!targetm.modes_tieable_p (GET_MODE (SUBREG_REG (reg)),
GET_MODE (reg)))
{
m_allocation_successful = false;
return {};
}
subreg_info info;
subreg_get_info (V0_REGNUM, GET_MODE (SUBREG_REG (reg)),
SUBREG_BYTE (reg), GET_MODE (reg), &info);
if (!info.representable_p)
{
m_allocation_successful = false;
return {};
}
inner.start += info.offset;
inner.count = info.nregs;
return inner;
}
if (!REG_P (reg) || HARD_REGISTER_P (reg))
return {};
unsigned int regno = REGNO (reg);
if (fpr_preference (regno) <= 0)
return {};
unsigned int count = hard_regno_nregs (V0_REGNUM, GET_MODE (reg));
bool existed;
auto &entry = m_regno_to_group.get_or_insert (regno, &existed);
if (!existed)
{
auto *group = create_allocno_group (regno, count);
if (dump_file && (dump_flags & TDF_DETAILS))
{
auto allocnos = group->allocnos ();
fprintf (dump_file, "Creating allocnos [%d:%d] for r%d\n",
allocnos.front ().id, allocnos.back ().id, regno);
}
auto reg_bits = GET_MODE_BITSIZE (GET_MODE (reg));
auto fpr_bits = exact_div (reg_bits, count);
auto flags = m_pseudo_regs[regno].flags;
// Punt for now if there is a choice to be made between using an
// FPR and a non-FPR.
if ((flags & NEEDS_NONFPR)
|| ((flags & ALLOWS_NONFPR)
&& !FLOAT_MODE_P (GET_MODE (reg))
&& !VECTOR_MODE_P (GET_MODE (reg))))
m_allocation_successful = false;
if (flags & ALLOWS_FPR8)
group->fpr_candidates &= 0xff;
else if (flags & ALLOWS_FPR16)
group->fpr_candidates &= 0xffff;
group->fpr_candidates &= ~0U >> (count - 1);
group->has_flexible_stride = ((flags & HAS_FLEXIBLE_STRIDE) != 0
&& (flags & HAS_FIXED_STRIDE) == 0);
group->fpr_size = (maybe_gt (fpr_bits, 128) ? FPR_Z
: maybe_gt (fpr_bits, 64) ? FPR_Q : FPR_D);
entry = group;
}
return { entry, 0, count };
}
// Record a use of FPR REGNO at the current program point, as part of
// a backwards walk over a block.
void
early_ra::record_fpr_use (unsigned int regno)
{
gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM));
unsigned int offset = regno - V0_REGNUM;
if (!(m_live_fprs & (1U << offset)))
{
m_fpr_ranges[offset].safe_push ({ START_OF_REGION, m_current_point,
INVALID_ALLOCNO });
m_live_fprs |= 1U << offset;
}
}
// Record a definition of FPR REGNO at the current program point, as part of
// a backwards walk over a block.
void
early_ra::record_fpr_def (unsigned int regno)
{
gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM));
unsigned int offset = regno - V0_REGNUM;
// The definition completes the current live range. If the result
// of the definition is used, the live range extends to the last use.
// Otherwise the live range is just a momentary blip at the current point.
auto &ranges = m_fpr_ranges[offset];
if (m_live_fprs & (1U << offset))
{
ranges.last ().start_point = m_current_point;
m_live_fprs &= ~(1U << offset);
}
else
ranges.safe_push ({ m_current_point, m_current_point, INVALID_ALLOCNO });
}
// Record a use of allocno ALLOCNO at the current program point, as part
// of a backwards walk over a block.
void
early_ra::record_allocno_use (allocno_info *allocno)
{
if (allocno->start_point == m_current_point)
return;
gcc_checking_assert (!allocno->is_shared ());
bitmap_set_bit (m_live_allocnos, allocno->id);
if (allocno->end_point > m_current_point)
{
allocno->end_point = m_current_point;
allocno->last_def_point = START_OF_REGION;
allocno->last_use_point = END_OF_REGION;
}
else
allocno->last_use_point = allocno->start_point;
allocno->start_point = m_current_point;
allocno->is_copy_dest = false;
allocno->is_strong_copy_src = false;
allocno->related_allocno = INVALID_ALLOCNO;
allocno->is_equiv = false;
}
// Record a definition of the allocno with index AI at the current program
// point, as part of a backwards walk over a block. The allocno is known
// to be live.
void
early_ra::record_allocno_def (allocno_info *allocno)
{
gcc_checking_assert (!allocno->is_shared ());
allocno->last_use_point = allocno->start_point;
allocno->last_def_point = m_current_point;
allocno->start_point = m_current_point;
allocno->num_defs = MIN (allocno->num_defs + 1, 2);
gcc_checking_assert (!allocno->is_copy_dest
&& !allocno->is_strong_copy_src);
if (!bitmap_clear_bit (m_live_allocnos, allocno->id))
gcc_unreachable ();
}
// SRC_ALLOCNO is copied or tied to DEST_ALLOCNO; IS_EQUIV is true if the
// two allocnos are known to be equal. See whether we can mark a chain of
// allocnos ending at DEST_ALLOCNO as related to SRC_ALLOCNO. Return the
// start of the chain if so, otherwise return null.
//
// If IS_EQUIV, a chain that contains just DEST_ALLOCNO should be treated
// as an equivalence. Otherwise the chain should be shared with SRC_ALLOCNO.
//
// Sharing chains are a rather hacky workaround for the fact that we
// don't collect segmented live ranges, and that in the end we want to do
// simple interval graph coloring.
early_ra::allocno_info *
early_ra::find_related_start (allocno_info *dest_allocno,
allocno_info *src_allocno, bool is_equiv)
{
allocno_info *res = nullptr;
for (;;)
{
if (src_allocno->end_point > dest_allocno->end_point)
// The src allocno dies first.
return res;
if (src_allocno->num_defs != 0)
{
if (dest_allocno->end_point < m_current_bb_point)
// We don't currently track enough information to handle multiple
// definitions across basic block boundaries.
return res;
if (src_allocno->last_def_point >= dest_allocno->end_point)
// There is another definition during the destination's live range.
return res;
}
if (is_equiv)
{
if (dest_allocno->num_defs == 1)
// dest_allocno is equivalent to src_allocno for dest_allocno's
// entire live range. Fall back to that if we can't establish
// a sharing chain.
res = dest_allocno;
}
else
{
if (src_allocno->last_use_point >= dest_allocno->end_point)
// src_allocno is live during dest_allocno's live range,
// and the two allocnos do not necessarily have the same value.
return res;
}
if (dest_allocno->group_size != 1
// Account for definitions by shared registers.
|| dest_allocno->num_defs > 1
|| DF_REG_DEF_COUNT (dest_allocno->group ()->regno) != 1)
// Currently only single allocnos that are defined once can
// share registers with non-equivalent allocnos. This could be
// relaxed, but at the time of writing, aggregates are not valid
// SSA names and so generally only use a single pseudo throughout
// their lifetime.
return res;
if (dest_allocno->copy_dest == src_allocno->id)
// We've found a complete and valid sharing chain.
return dest_allocno;
if (dest_allocno->copy_dest == INVALID_ALLOCNO)
return res;
auto *next_allocno = m_allocnos[dest_allocno->copy_dest];
if (!is_chain_candidate (dest_allocno, next_allocno, ALL_REASONS))
return res;
dest_allocno = next_allocno;
is_equiv = false;
}
}
// Add FROM_ALLOCNO's definition information to TO_ALLOCNO's.
void
early_ra::accumulate_defs (allocno_info *to_allocno,
allocno_info *from_allocno)
{
if (from_allocno->num_defs > 0)
{
to_allocno->num_defs = MIN (from_allocno->num_defs
+ to_allocno->num_defs, 2);
to_allocno->last_def_point = MAX (to_allocno->last_def_point,
from_allocno->last_def_point);
}
}
// Record any relevant allocno-related information for an actual or imagined
// copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit
// move instruction, false if it represents one way of satisfying the previous
// instruction's constraints.
void
early_ra::record_copy (rtx dest, rtx src, bool from_move_p)
{
auto dest_range = get_allocno_subgroup (dest);
auto src_range = get_allocno_subgroup (src);
if (from_move_p
&& dest_range
&& REG_P (src)
&& FP_REGNUM_P (REGNO (src)))
{
// A copy from an FPR to an allocno group.
unsigned int fpr = REGNO (src) - V0_REGNUM;
m_allocno_copies.safe_push ({ dest_range.allocno (0)->id, fpr,
dest_range.count });
// If the allocno at the other end of the chain of copies from DEST
// has a copy to the same FPR, record that all intervening copy chains
// could become "strong" ones. This indicates that picking the FPR
// avoids a copy at both ends.
unsigned int hard_regno = REGNO (src);
for (auto &dest_allocno : dest_range.allocnos ())
if (dest_allocno.hard_regno == hard_regno++)
dest_allocno.is_strong_copy_src = true;
}
else if (from_move_p
&& src_range
&& REG_P (dest)
&& FP_REGNUM_P (REGNO (dest)))
{
// A copy from an allocno group to an FPR.
unsigned int fpr = REGNO (dest) - V0_REGNUM;
m_allocno_copies.safe_push ({ src_range.allocno (0)->id, fpr,
src_range.count });
for (auto &src_allocno : src_range.allocnos ())
{
// If the copy comes from a move, see whether the destination
// FPR is known to be equal to the source allocno for the FPR's
// last live range.
if (from_move_p && src_allocno.num_defs == 0)
{
auto &last_range = m_fpr_ranges[fpr].last ();
if (last_range.end_point >= src_allocno.end_point)
last_range.allocno = src_allocno.id;
}
src_allocno.hard_regno = V0_REGNUM + fpr;
fpr += 1;
}
}
else if (src_range && dest_range)
{
// A copy between two allocno groups. We can only have a mismatched
// number of FPRs for imaginary, non-move copies. In that case
// the matching happens on the common lowparts.
gcc_assert (!from_move_p || src_range.count == dest_range.count);
unsigned int count = std::min (src_range.count, dest_range.count);
if (WORDS_BIG_ENDIAN)
{
src_range.start += src_range.count - count;
dest_range.start += dest_range.count - count;
}
src_range.count = count;
dest_range.count = count;
// Ignore (imaginary non-move) copies if the destination is still live.
for (auto &dest_allocno : dest_range.allocnos ())
if (bitmap_bit_p (m_live_allocnos, dest_allocno.id))
return;
for (unsigned int i = 0; i < src_range.count; ++i)
{
auto *dest_allocno = dest_range.allocno (i);
auto *src_allocno = src_range.allocno (i);
if (src_allocno->end_point > dest_allocno->start_point)
{
gcc_assert (src_allocno->copy_dest == INVALID_ALLOCNO
|| src_allocno->copy_dest == dest_allocno->id);
src_allocno->copy_dest = dest_allocno->id;
src_allocno->hard_regno = dest_allocno->hard_regno;
dest_allocno->is_copy_dest = 1;
}
else if (auto *start_allocno = find_related_start (dest_allocno,
src_allocno,
from_move_p))
{
auto *next_allocno = dest_allocno;
for (;;)
{
next_allocno->related_allocno = src_allocno->id;
next_allocno->is_equiv = (start_allocno == dest_allocno
&& from_move_p);
// If we're sharing two allocnos that are not equivalent,
// carry across the definition information. This is needed
// to prevent multiple incompatible attempts to share with
// the same register.
if (next_allocno->is_shared ())
accumulate_defs (src_allocno, next_allocno);
src_allocno->last_use_point
= MAX (src_allocno->last_use_point,
next_allocno->last_use_point);
if (next_allocno == start_allocno)
break;
next_allocno = m_allocnos[next_allocno->copy_dest];
}
}
}
}
}
// Record any relevant allocno-related information about the constraints
// on INSN, which has already been extracted.
void
early_ra::record_constraints (rtx_insn *insn)
{
preprocess_constraints (insn);
int operand_matches[MAX_RECOG_OPERANDS];
for (int i = 0; i < recog_data.n_operands; ++i)
operand_matches[i] = -1;
auto alts = get_preferred_alternatives (insn);
bool any_ok = recog_data.n_alternatives == 0;
// The set of output operands that are earlyclobber in at least one
// alternative.
operand_mask earlyclobber_operands = 0;
// The set of output operands that are matched to inputs in at least
// one alternative.
operand_mask matched_operands = 0;
// The set of output operands that are not matched to inputs in at least
// one alternative.
operand_mask unmatched_operands = 0;
// The set of input operands that are matched to outputs in at least one
// alternative, or that overlap with such an input if the output is not
// earlyclobber. The latter part of the condition copes with things
// like y = x * x, where the first x is tied to the destination, and where
// y is not earlyclobber.
operand_mask matches_operands = 0;
for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
{
if (!(alts & ALTERNATIVE_BIT (altno)))
continue;
auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
if (!likely_alternative_match_p (op_alt))
continue;
any_ok = true;
// Update the information for operand DEST_OPNO based on the constraint
// information for operand SRC_OPNO. The numbers can be different for
// swapped commutative operands.
auto record_operand = [&](int src_opno, int dest_opno)
{
int matches = op_alt[src_opno].matches;
// A matched earlyclobber cannot be used if the same operand value
// occurs in an unmatched operand. E.g. for y = x * x, a matched
// earlyclobber on the first input does not cover the second input.
if (matches >= 0)
{
rtx op = recog_data.operand[dest_opno];
operand_mask overlaps = 0;
for (int i = 0; i < recog_data.n_operands; ++i)
if (i != dest_opno
&& !recog_data.is_operator[i]
&& recog_data.operand_type[i] != OP_OUT
&& reg_overlap_mentioned_p (op, recog_data.operand[i]))
overlaps |= 1U << i;
if (!op_alt[matches].earlyclobber || overlaps == 0)
{
operand_matches[dest_opno] = matches;
matches_operands |= (1U << dest_opno) | overlaps;
}
}
};
auto reject = count_rejects (op_alt);
for (int opno = 0; opno < recog_data.n_operands; ++opno)
{
operand_mask op_mask = operand_mask (1) << opno;
if (recog_data.operand_type[opno] != OP_IN)
{
if (reject == 0 && op_alt[opno].matched >= 0)
matched_operands |= op_mask;
else
unmatched_operands |= op_mask;
}
if (op_alt[opno].earlyclobber)
earlyclobber_operands |= op_mask;
// Punt for now on scratches. If we wanted to handle them,
// we'd need to create allocnos for them, like IRA does.
rtx op = recog_data.operand[opno];
if (GET_CODE (op) == SCRATCH
&& reg_classes_intersect_p (op_alt[opno].cl, FP_REGS))
m_allocation_successful = false;
// Record filter information, which applies to the first register
// in the operand.
if (auto filters = alternative_register_filters (op_alt, opno))
if (auto range = get_allocno_subgroup (recog_data.operand[opno]))
for (unsigned int fpr = range.start; fpr < 32; ++fpr)
if (!test_register_filters (filters, fpr))
range.group->fpr_candidates &= ~(1U << (fpr - range.start));
if (reject == 0)
{
// Record possible matched operands.
record_operand (opno, opno);
if (recog_data.constraints[opno][0] == '%')
{
record_operand (opno, opno + 1);
record_operand (opno + 1, opno);
}
}
}
}
if (!any_ok)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " -- no match\n");
m_allocation_successful = false;
}
// Record if there is an output operand that is never earlyclobber and never
// matched to an input. See the comment below for how this is used.
rtx dest_op = NULL_RTX;
for (int opno = 0; opno < recog_data.n_operands; ++opno)
{
auto op_mask = operand_mask (1) << opno;
if (recog_data.operand_type[opno] == OP_OUT
&& (earlyclobber_operands & op_mask) == 0
&& (matched_operands & op_mask) == 0)
{
dest_op = recog_data.operand[opno];
break;
}
}
for (int opno = 0; opno < recog_data.n_operands; ++opno)
{
auto op_mask = operand_mask (1) << opno;
rtx op = recog_data.operand[opno];
int matches = operand_matches[opno];
// Punt for now on operands that already have a fixed choice of
// register, since we don't have IRA's ability to find an alternative.
// It's better if earlier passes don't create this kind of situation.
if (REG_P (op) && FP_REGNUM_P (REGNO (op)))
m_allocation_successful = false;
// Treat input operands as being earlyclobbered if an output is
// sometimes earlyclobber and if the input never matches an output.
// Do the same if there is an output that is always matched to an
// input, and if this operand doesn't match that input. In both
// cases, tying the input and the output would lead to an impossible
// combination (or at least one that is difficult to reload).
if (recog_data.operand_type[opno] != OP_OUT
&& ((earlyclobber_operands && matches < 0)
|| ((matched_operands & ~unmatched_operands)
&& !(matches_operands & op_mask))))
for (auto &allocno : get_allocno_subgroup (op).allocnos ())
if (allocno.end_point + 1 == m_current_point)
allocno.is_earlyclobbered = true;
// Create copies between operands that can be tied. This (deliberately)
// might add several copies to the same destination register; later code
// can then choose between them based on other criteria.
//
// If there is an output operand that is never matched or earlyclobber,
// and an input operand that never matches an output operand, create
// a tentative copy between them. This allows hard register preferences
// to be transmitted along the copy chains.
if (matches >= 0)
record_copy (recog_data.operand[matches], op);
else if (dest_op && recog_data.operand_type[opno] == OP_IN)
record_copy (dest_op, op);
}
}
// If FLAGS is DF_REF_AT_TOP, model the artificial uses and defs at the
// start of the current basic block, otherwise model the artificial uses
// and defs at the end of the basic block. This is done as part of a
// backwards walk, so defs should be processed before uses.
void
early_ra::record_artificial_refs (unsigned int flags)
{
df_ref ref;
FOR_EACH_ARTIFICIAL_DEF (ref, m_current_bb->index)
if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags
&& IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
record_fpr_def (DF_REF_REGNO (ref));
m_current_point += 1;
FOR_EACH_ARTIFICIAL_USE (ref, m_current_bb->index)
if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags
&& IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
record_fpr_use (DF_REF_REGNO (ref));
m_current_point += 1;
}
// Model the register references in INSN as part of a backwards walk.
void
early_ra::record_insn_refs (rtx_insn *insn)
{
df_ref ref;
// Record all definitions, excluding partial call clobbers.
FOR_EACH_INSN_DEF (ref, insn)
if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
record_fpr_def (DF_REF_REGNO (ref));
else
{
auto range = get_allocno_subgroup (DF_REF_REG (ref));
for (auto &allocno : range.allocnos ())
{
// If the destination is unused, record a momentary blip
// in its live range.
if (!bitmap_bit_p (m_live_allocnos, allocno.id))
record_allocno_use (&allocno);
record_allocno_def (&allocno);
}
}
m_current_point += 1;
// Model the call made by a call insn as a separate phase in the
// evaluation of the insn. Any partial call clobbers happen at that
// point, rather than in the definition or use phase of the insn.
if (auto *call_insn = dyn_cast (insn))
{
function_abi abi = insn_callee_abi (call_insn);
m_call_points[abi.id ()].safe_push (m_current_point);
m_current_point += 1;
}
// Record all uses. We can ignore READ_MODIFY_WRITE uses of plain subregs,
// since we track the FPR-sized parts of them individually.
FOR_EACH_INSN_USE (ref, insn)
if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
record_fpr_use (DF_REF_REGNO (ref));
else if (!DF_REF_FLAGS_IS_SET (ref, DF_REF_READ_WRITE)
|| DF_REF_FLAGS_IS_SET (ref, DF_REF_STRICT_LOW_PART)
|| DF_REF_FLAGS_IS_SET (ref, DF_REF_ZERO_EXTRACT))
{
auto range = get_allocno_subgroup (DF_REF_REG (ref));
for (auto &allocno : range.allocnos ())
record_allocno_use (&allocno);
}
m_current_point += 1;
}
// ALLOCNO->is_strong_copy_src is true. See whether ALLOCNO heads a
// natural chain that has an affinity with the same hard register at
// both ends.
bool
early_ra::consider_strong_copy_src_chain (allocno_info *allocno)
{
auto *src_allocno = allocno;
while (src_allocno->copy_dest != INVALID_ALLOCNO)
{
auto *dest_allocno = m_allocnos[src_allocno->copy_dest];
if (dest_allocno->start_point > src_allocno->end_point
|| dest_allocno->hard_regno != src_allocno->hard_regno)
return false;
gcc_checking_assert (dest_allocno->is_copy_dest);
src_allocno = dest_allocno;
}
while (allocno->copy_dest != INVALID_ALLOCNO)
{
allocno->is_strong_copy_src = 1;
allocno = m_allocnos[allocno->copy_dest];
allocno->is_strong_copy_dest = 1;
}
return true;
}
// ALLOCNO1 and ALLOCNO2 are linked in some way, and might end up being
// chained together. See whether chaining them requires the containing
// groups to have the same stride, or whether it requires them to have
// different strides. Return 1 if they should have the same stride,
// -1 if they should have different strides, or 0 if it doesn't matter.
int
early_ra::strided_polarity_pref (allocno_info *allocno1,
allocno_info *allocno2)
{
if (allocno1->offset + 1 < allocno1->group_size
&& allocno2->offset + 1 < allocno2->group_size)
{
if (is_chain_candidate (allocno1 + 1, allocno2 + 1, ALL_REASONS))
return 1;
else
return -1;
}
if (allocno1->offset > 0 && allocno2->offset > 0)
{
if (is_chain_candidate (allocno1 - 1, allocno2 - 1, ALL_REASONS))
return 1;
else
return -1;
}
return 0;
}
// Decide which groups should be strided. Also complete "strong" copy chains.
void
early_ra::find_strided_accesses ()
{
// This function forms a graph of allocnos, linked by equivalences and
// natural copy chains. It temporarily uses chain_next to record the
// reverse of equivalence edges (related_allocno) and chain_prev to record
// the reverse of copy edges (copy_dest).
unsigned int allocno_info::*links[] = {
&allocno_info::chain_next,
&allocno_info::chain_prev,
&allocno_info::copy_dest,
&allocno_info::related_allocno
};
// Set up the temporary reverse edges. Check for strong copy chains.
for (unsigned int i = m_allocnos.length (); i-- > 0; )
{
auto *allocno1 = m_allocnos[i];
if (allocno1->copy_dest != INVALID_ALLOCNO)
m_allocnos[allocno1->copy_dest]->chain_prev = allocno1->id;
if (allocno1->related_allocno != INVALID_ALLOCNO)
m_allocnos[allocno1->related_allocno]->chain_next = allocno1->id;
if (allocno1->is_strong_copy_src
&& !allocno1->is_copy_dest
&& !consider_strong_copy_src_chain (allocno1))
allocno1->is_strong_copy_src = false;
}
// Partition the graph into cliques based on edges that have the following
// properties:
//
// - the edge joins two allocnos whose groups have a free choice between
// consecutive and strided allocations.
//
// - the two groups have a relative strided polarity preference (that is
// they should make the same choice between consecutive and strided,
// or they should make opposite choices).
//
// Assign relative polarities to each group connected in this way.
//
// The aim is to discover natural move-free striding choices, which will
// often exist in carefully written ACLE code.
unsigned int num_edges = m_allocnos.length () * ARRAY_SIZE (links);
auto_sbitmap visited_edges (num_edges);
bitmap_clear (visited_edges);
auto_vec worklist;
for (unsigned int i = 0; i < num_edges; ++i)
{
if (!bitmap_set_bit (visited_edges, i))
continue;
worklist.quick_push (i);
while (!worklist.is_empty ())
{
auto ei = worklist.pop ();
auto *allocno1 = m_allocnos[ei / ARRAY_SIZE (links)];
auto ai2 = allocno1->*links[ei % ARRAY_SIZE (links)];
if (ai2 == INVALID_ALLOCNO)
continue;
auto *allocno2 = m_allocnos[ai2];
auto *group1 = allocno1->group ();
auto *group2 = allocno2->group ();
if (!group1->has_flexible_stride || !group2->has_flexible_stride)
continue;
int pref = strided_polarity_pref (allocno1, allocno2);
if (pref == 0)
continue;
for (auto *group : { group1, group2 })
for (auto &allocno : group->allocnos ())
for (unsigned int j = 0; j < ARRAY_SIZE (links); ++j)
if (bitmap_set_bit (visited_edges, allocno.id * 4 + j))
worklist.safe_push (allocno.id * 4 + j);
if (group1->strided_polarity)
group2->strided_polarity = group1->strided_polarity * pref;
else if (group2->strided_polarity)
group1->strided_polarity = group2->strided_polarity * pref;
else
{
group1->strided_polarity = 1;
group2->strided_polarity = pref;
}
}
}
// Now look for edges between allocnos in multi-register groups where:
//
// - the two groups have a relative strided polarity preference (as above).
//
// - one group (G1) has a free choice between consecutive and strided
// allocations.
//
// - the other group (G2) must use consecutive allocations.
//
// Update G1's individual preference for strided or consecutive allocations
// based on G2. If the previous loop chose a polarity for G1, work out
// whether it is better for polarity 1 or -1 to correspond to consecutive
// allocation.
int consecutive_pref = 0;
for (unsigned int i = m_allocnos.length (); i-- > 0; )
{
auto *allocno1 = m_allocnos[i];
for (auto link : links)
{
auto ai2 = allocno1->*link;
if (ai2 == INVALID_ALLOCNO)
continue;
auto *allocno2 = m_allocnos[ai2];
auto *group1 = allocno1->group ();
auto *group2 = allocno2->group ();
if (group1->has_flexible_stride == group2->has_flexible_stride)
continue;
int pref = strided_polarity_pref (allocno1, allocno2);
if (pref == 0)
continue;
auto *group = (group1->has_flexible_stride ? group1 : group2);
consecutive_pref += group->strided_polarity * pref;
group->consecutive_pref += pref;
}
}
// If it doesn't matter whether polarity 1 or -1 corresponds to consecutive
// allocation, arbitrarily pick 1.
if (consecutive_pref == 0)
consecutive_pref = 1;
// Record which multi-register groups should use strided allocations.
// Clear out the temporary edges.
for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
{
auto *allocno = m_allocnos[ai];
allocno->chain_prev = INVALID_ALLOCNO;
allocno->chain_next = INVALID_ALLOCNO;
if (allocno->offset != 0)
continue;
auto *group = allocno->group ();
if (!group->has_flexible_stride)
continue;
bool make_strided = (group->strided_polarity
? (consecutive_pref * group->strided_polarity) < 0
: group->consecutive_pref < 0);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Allocno [%d:%d]: strided polarity %d,"
" consecutive pref %d, %s\n",
allocno->id, allocno->id + group->size - 1,
group->strided_polarity, group->consecutive_pref,
make_strided ? "making strided" : "keeping consecutive");
if (!make_strided)
continue;
// 2-register groups have a stride of 8 FPRs and must start in
// registers matching the mask 0x17. 4-register groups have a stride
// of 4 FPRs and must start in registers matching the mask 0x13.
group->stride = group->size == 2 ? 8 : 4;
gcc_checking_assert (group->fpr_candidates
== (group->size == 2 ? 0x55555555 : 0x11111111));
group->fpr_candidates = (group->size == 2 ? 0xff00ff : 0xf000f);
}
}
// Compare the allocnos at *ALLOCNO1_PTR and *ALLOCNO2_PTR and return a <=>
// result that puts allocnos in order of increasing FIELD.
template
int
early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr)
{
auto *allocno1 = *(allocno_info *const *) allocno1_ptr;
auto *allocno2 = *(allocno_info *const *) allocno2_ptr;
if (allocno1->*field != allocno2->*field)
return allocno1->*field < allocno2->*field ? -1 : 1;
return (allocno1->id < allocno2->id ? -1
: allocno1->id == allocno2->id ? 0 : 1);
}
// Return true if we should consider chaining ALLOCNO1 onto the head
// of ALLOCNO2. STRICTNESS says whether we should take copy-elision
// heuristics into account, or whether we should just consider things
// that matter for correctness.
//
// This is just a local test of the two allocnos; it doesn't guarantee
// that chaining them would give a self-consistent system.
bool
early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2,
test_strictness strictness)
{
if (allocno2->is_shared ())
return false;
while (allocno1->is_equiv)
allocno1 = m_allocnos[allocno1->related_allocno];
if (allocno2->start_point >= allocno1->end_point
&& !allocno2->is_equiv_to (allocno1->id))
return false;
if (allocno1->is_earlyclobbered
&& allocno1->end_point == allocno2->start_point + 1)
return false;
if (strictness == ALL_REASONS && allocno2->is_copy_dest)
{
if (allocno1->copy_dest != allocno2->id)
return false;
if (allocno2->is_strong_copy_dest && !allocno1->is_strong_copy_src)
return false;
}
return true;
}
// We're trying to chain allocno ALLOCNO1 to a later allocno.
// Rate how good a choice ALLOCNO2 would be, with higher being better.
int
early_ra::rate_chain (allocno_info *allocno1, allocno_info *allocno2)
{
int score = 0;
if (allocno2->is_strong_copy_dest)
score += 256;
else if (allocno2->is_copy_dest)
score += 128;
// Prefer well-aligned matches.
auto *group1 = allocno1->group ();
auto *group2 = allocno2->group ();
if (group1->stride == 1 && group2->stride == 1)
{
unsigned int min_size = std::min (group1->color_rep ()->size,
group2->color_rep ()->size);
if ((group1->color_rep_offset + allocno1->offset) % min_size
== (group2->color_rep_offset + allocno2->offset) % min_size)
score += min_size;
else
score -= min_size;
}
return score;
}
// Sort the chain_candidate_infos at ARG1 and ARG2 in order of decreasing
// score.
int
early_ra::cmp_chain_candidates (const void *arg1, const void *arg2)
{
auto &candidate1 = *(const chain_candidate_info *) arg1;
auto &candidate2 = *(const chain_candidate_info *) arg2;
if (candidate1.score != candidate2.score)
return candidate1.score > candidate2.score ? -1 : 1;
// Prefer to increase the gap between uses of the allocated register,
// to give the scheduler more freedom.
auto *allocno1 = candidate1.allocno;
auto *allocno2 = candidate2.allocno;
if (allocno1->start_point != allocno2->start_point)
return allocno1->start_point < allocno2->start_point ? -1 : 1;
if (allocno1 != allocno2)
return allocno1->id < allocno2->id ? -1 : 1;
return 0;
}
// Join the chains of allocnos that start at HEADI1 and HEADI2.
// HEADI1 is either empty or a single allocno.
void
early_ra::chain_allocnos (unsigned int &headi1, unsigned int &headi2)
{
if (headi1 == INVALID_ALLOCNO)
headi1 = headi2;
else if (headi2 == INVALID_ALLOCNO)
headi2 = headi1;
else
{
auto *head1 = m_allocnos[headi1];
auto *head2 = m_allocnos[headi2];
gcc_checking_assert (head1->chain_next == INVALID_ALLOCNO
&& head1->chain_prev == INVALID_ALLOCNO
&& head2->chain_prev == INVALID_ALLOCNO);
if (head1->is_equiv
&& m_allocnos[head1->related_allocno]->copy_dest == headi2)
{
head1->is_copy_dest = head2->is_copy_dest;
head1->is_strong_copy_dest = head2->is_strong_copy_dest;
m_allocnos[head1->related_allocno]->copy_dest = headi1;
}
head1->chain_next = headi2;
head2->chain_prev = headi1;
headi2 = headi1;
}
}
// Add GROUP2's FPR information to GROUP1's, given that GROUP2 starts
// OFFSET allocnos into GROUP2.
void
early_ra::merge_fpr_info (allocno_group_info *group1,
allocno_group_info *group2,
unsigned int offset)
{
group1->fpr_size = std::max (group1->fpr_size, group2->fpr_size);
group1->fpr_candidates &= (group2->fpr_candidates
>> (offset * group1->stride));
}
// Set the color representative of ALLOCNO's group to REP, such that ALLOCNO
// ends being at allocno offset REP_OFFSET from the start of REP.
void
early_ra::set_single_color_rep (allocno_info *allocno, allocno_group_info *rep,
unsigned int rep_offset)
{
auto *group = allocno->group ();
if (group->m_color_rep == rep)
return;
group->m_color_rep = rep;
gcc_checking_assert (multiple_p (group->stride, rep->stride));
unsigned int factor = group->stride / rep->stride;
gcc_checking_assert (rep_offset >= allocno->offset * factor);
group->color_rep_offset = rep_offset - allocno->offset * factor;
merge_fpr_info (rep, group, group->color_rep_offset);
}
// REP1 and REP2 are color representatives. Change REP1's color representative
// to REP2, with REP1 starting at allocno offset REP2_OFFSET into REP2.
void
early_ra::set_color_rep (allocno_group_info *rep1, allocno_group_info *rep2,
unsigned int rep2_offset)
{
gcc_checking_assert (rep1 != rep2
&& rep2->m_color_rep == rep2
&& multiple_p (rep1->stride, rep2->stride));
auto heads1 = rep1->chain_heads ();
auto heads2 = rep2->chain_heads ();
for (unsigned int i1 = 0; i1 < heads1.size (); ++i1)
if (heads1[i1] != INVALID_ALLOCNO)
{
unsigned int i2 = rep2_offset + i1 * rep1->stride / rep2->stride;
if (heads2[i2] == INVALID_ALLOCNO)
heads2[i2] = heads1[i1];
else
gcc_checking_assert (heads2[i2] == heads1[i1]);
set_single_color_rep (m_allocnos[heads1[i1]], rep2, i2);
}
}
// Try to chain ALLOCNO1 to the head of the chain starting at ALLOCNO2.
// Return true on success.
bool
early_ra::try_to_chain_allocnos (allocno_info *allocno1,
allocno_info *allocno2)
{
auto *group1 = allocno1->group ()->color_rep ();
auto *group2 = allocno2->group ()->color_rep ();
// Avoid trying to tie different subgroups of the same group. This can
// happen if the parts of a register are defined and used piecemeal.
if (group1 == group2)
return false;
// The stride (in FPRs) between allocnos of each color representative.
auto fpr_stride1 = group1->stride;
auto fpr_stride2 = group2->stride;
// The offset (in FPRs) of each allocno group from its color representative.
auto fpr_offset1 = allocno1->group ()->color_rep_offset * fpr_stride1;
auto fpr_offset2 = allocno2->group ()->color_rep_offset * fpr_stride2;
// The offset (in FPRs) of each allocno from its color representative.
fpr_offset1 += allocno1->offset * allocno1->group ()->stride;
fpr_offset2 += allocno2->offset * allocno2->group ()->stride;
// The FPR overlap is in multiples of the larger stride.
auto max_fpr_stride = std::max (fpr_stride1, fpr_stride2);
auto min_fpr_offset = std::min (fpr_offset1, fpr_offset2);
auto fpr_overlap_offset = ROUND_DOWN (min_fpr_offset, max_fpr_stride);
// The offset (in FPRs) of the start of the overlapping region from
// each color representative.
fpr_offset1 -= fpr_overlap_offset;
fpr_offset2 -= fpr_overlap_offset;
// The number of FPRs in each color representative after the start
// of the overlapping region.
auto fpr_after1 = (group1->size - 1) * fpr_stride1 - fpr_offset1;
auto fpr_after2 = (group2->size - 1) * fpr_stride2 - fpr_offset2;
auto min_fpr_after = std::min (fpr_after1, fpr_after2);
// The number of overlapping allocnos.
auto allocno_overlap_size = min_fpr_after / max_fpr_stride + 1;
// The offset (in allocnos) of the overlapping region from the start
// of each color representative.
auto allocno_offset1 = fpr_offset1 / fpr_stride1;
auto allocno_offset2 = fpr_offset2 / fpr_stride2;
// The stride (in allocnos) between overlapping allocnos.
auto allocno_stride1 = max_fpr_stride / fpr_stride1;
auto allocno_stride2 = max_fpr_stride / fpr_stride2;
// Reject combinations that are impossible to allocate.
auto fprs1 = group1->fpr_candidates;
auto fprs2 = group2->fpr_candidates;
if (fpr_offset1 > fpr_offset2)
fprs2 >>= (fpr_offset1 - fpr_offset2);
else
fprs1 >>= (fpr_offset2 - fpr_offset1);
if ((fprs1 & fprs2) == 0)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " - cannot chain %d->%d, no FPRs in common"
" (%08x@%d and %08x@%d)\n", allocno1->id, allocno2->id,
group1->fpr_candidates, fpr_offset1,
group2->fpr_candidates, fpr_offset2);
return false;
}
// Check whether the chain can be formed.
auto heads1 = group1->chain_heads ();
auto heads2 = group2->chain_heads ();
for (unsigned int i = 0; i < allocno_overlap_size; ++i)
{
auto headi1 = heads1[allocno_offset1 + i * allocno_stride1];
auto headi2 = heads2[allocno_offset2 + i * allocno_stride2];
if (headi1 != INVALID_ALLOCNO && headi2 != INVALID_ALLOCNO)
{
auto *head1 = m_allocnos[headi1];
auto *head2 = m_allocnos[headi2];
if (head1->chain_next != INVALID_ALLOCNO)
return false;
if (!is_chain_candidate (head1, head2, CORRECTNESS_ONLY))
return false;
}
}
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, " - chaining allocnos [");
for (unsigned int i = 0; i < allocno_overlap_size; ++i)
fprintf (dump_file, "%s%d", i ? "," : "",
heads1[allocno_offset1 + i * allocno_stride1]);
fprintf (dump_file, "] and [");
for (unsigned int i = 0; i < allocno_overlap_size; ++i)
fprintf (dump_file, "%s%d", i ? "," : "",
heads2[allocno_offset2 + i * allocno_stride2]);
fprintf (dump_file, "]\n");
}
// Chain the allocnos, updating the chain heads.
for (unsigned int i = 0; i < allocno_overlap_size; ++i)
chain_allocnos (heads1[allocno_offset1 + i * allocno_stride1],
heads2[allocno_offset2 + i * allocno_stride2]);
// Pick a color representative for the merged groups.
allocno_group_info *new_rep;
if (allocno_offset1 == 0
&& group1->size == allocno_overlap_size * allocno_stride1
&& multiple_p (fpr_stride1, fpr_stride2))
{
// The first group fits within the second.
set_color_rep (group1, group2, allocno_offset2);
new_rep = group2;
}
else if (allocno_offset2 == 0
&& group2->size == allocno_overlap_size * allocno_stride2
&& multiple_p (fpr_stride2, fpr_stride1))
{
// The second group fits within the first.
set_color_rep (group2, group1, allocno_offset1);
new_rep = group1;
}
else
{
// We need a new group that is big enough to span both groups.
// The new group always has an FPR stride of 1.
auto max_fpr_offset = std::max (fpr_offset1, fpr_offset2);
auto max_fpr_after = std::max (fpr_after1, fpr_after2);
auto new_size = max_fpr_offset + max_fpr_after + 1;
new_rep = create_allocno_group (INVALID_REGNUM, new_size);
set_color_rep (group1, new_rep, max_fpr_offset - fpr_offset1);
set_color_rep (group2, new_rep, max_fpr_offset - fpr_offset2);
}
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, " - new frontier [");
auto new_heads = new_rep->chain_heads ();
for (unsigned int i = 0; i < new_heads.size (); ++i)
{
if (i)
fprintf (dump_file, ",");
if (new_heads[i] == INVALID_ALLOCNO)
fprintf (dump_file, "-");
else
fprintf (dump_file, "%d", new_heads[i]);
}
fprintf (dump_file, "]\n");
}
return true;
}
// Create a color_info for color representative GROUP.
void
early_ra::create_color (allocno_group_info *group)
{
auto *color = region_allocate ();
color->id = m_colors.length ();
color->hard_regno = FIRST_PSEUDO_REGISTER;
color->group = group;
gcc_checking_assert (group->m_color_rep == group);
group->has_color = true;
group->color = m_colors.length ();
m_colors.safe_push (color);
}
// Form allocnos into chains. Create colors for each resulting clique.
void
early_ra::form_chains ()
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\nChaining allocnos:\n");
// Perform (modified) interval graph coloring. First sort by
// increasing start point.
m_sorted_allocnos.reserve (m_allocnos.length ());
m_sorted_allocnos.splice (m_allocnos);
m_sorted_allocnos.qsort (cmp_increasing<&allocno_info::start_point>);
// During this phase, color representatives are only correct for
// unprocessed allocno groups (where the color representative is
// the group itself) and for groups that contain a current chain head.
unsigned int ti = 0;
auto_vec candidates;
for (unsigned int hi = 0; hi < m_sorted_allocnos.length (); ++hi)
{
auto *allocno1 = m_sorted_allocnos[hi];
if (allocno1->chain_next != INVALID_ALLOCNO)
continue;
// Record conflicts with direct uses for FPR hard registers.
auto *group1 = allocno1->group ();
for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
if (fpr_conflicts_with_allocno_p (fpr, allocno1))
group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
// Record conflicts due to partially call-clobbered registers.
// (Full clobbers are handled by the previous loop.)
for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
if (call_in_range_p (abi_id, allocno1->start_point,
allocno1->end_point))
{
auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
group1->fpr_candidates &= ~fprs >> allocno1->offset;
}
if (allocno1->is_shared ())
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Allocno %d shares the same hard register"
" as allocno %d\n", allocno1->id,
allocno1->related_allocno);
auto *allocno2 = m_allocnos[allocno1->related_allocno];
merge_fpr_info (allocno2->group (), group1, allocno2->offset);
m_shared_allocnos.safe_push (allocno1);
continue;
}
// Find earlier allocnos (in processing order) that could be chained
// to this one.
candidates.truncate (0);
for (unsigned int sci = ti; sci < hi; ++sci)
{
auto *allocno2 = m_sorted_allocnos[sci];
if (allocno2->chain_prev == INVALID_ALLOCNO)
{
if (!is_chain_candidate (allocno1, allocno2, ALL_REASONS))
continue;
chain_candidate_info candidate;
candidate.allocno = allocno2;
candidate.score = rate_chain (allocno1, allocno2);
candidates.safe_push (candidate);
}
else if (sci == ti)
++ti;
}
// Sort the candidates by decreasing score.
candidates.qsort (cmp_chain_candidates);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, " Chain candidates for %d:", allocno1->id);
for (auto &candidate : candidates)
fprintf (dump_file, " %d(%d)", candidate.allocno->id,
candidate.score);
fprintf (dump_file, "\n");
}
// Pick the first candidate that works.
for (auto &candidate : candidates)
if (try_to_chain_allocnos (allocno1, candidate.allocno))
break;
}
// Create color_infos for each group. Make sure that each group's
// color representative is up to date.
for (unsigned int hi = m_sorted_allocnos.length (); hi-- > 0; )
{
auto *allocno = m_sorted_allocnos[hi];
if (allocno->is_shared ())
continue;
auto *rep = allocno->group ()->color_rep ();
if (rep->has_color)
continue;
create_color (rep);
auto heads = rep->chain_heads ();
for (unsigned int i = 0; i < heads.size (); ++i)
{
unsigned int ai = heads[i];
while (ai != INVALID_ALLOCNO)
{
allocno = m_allocnos[ai];
set_single_color_rep (allocno, rep, i * rep->stride);
ai = allocno->chain_next;
}
}
}
}
// Return true if the given FPR (starting at 0) conflicts with allocno
// ALLOCNO.
bool
early_ra::fpr_conflicts_with_allocno_p (unsigned int fpr,
allocno_info *allocno)
{
auto &ranges = m_fpr_ranges[fpr];
unsigned int start_i = 0;
unsigned int end_i = ranges.length ();
while (start_i < end_i)
{
unsigned int mid_i = (start_i + end_i) / 2;
auto &range = ranges[mid_i];
if (allocno->end_point > range.start_point)
start_i = mid_i + 1;
else if (allocno->start_point < range.end_point)
end_i = mid_i;
else
{
if (range.allocno != allocno->id)
return true;
// The FPR is equivalent to ALLOCNO for this particular range.
// See whether ALLOCNO conflicts with a neighboring range.
if (mid_i > 0
&& ranges[mid_i - 1].start_point >= allocno->end_point)
return true;
if (mid_i + 1 < ranges.length ()
&& ranges[mid_i + 1].end_point <= allocno->start_point)
return true;
return false;
}
}
return false;
}
// Return true if there is a call with ABI identifier ABI_ID in the inclusive
// program point range [START_POINT, END_POINT].
bool
early_ra::call_in_range_p (unsigned int abi_id, unsigned int start_point,
unsigned int end_point)
{
auto &points = m_call_points[abi_id];
unsigned int start_i = 0;
unsigned int end_i = points.length ();
while (start_i < end_i)
{
unsigned int mid_i = (start_i + end_i) / 2;
auto point = points[mid_i];
if (end_point > point)
start_i = mid_i + 1;
else if (start_point < point)
end_i = mid_i;
else
return true;
}
return false;
}
// Return the set of FPRs for which a value of size SIZE will be clobbered
// by a call to a function with ABI identifier ABI_ID, but would not be
// for some smaller size. The set therefore excludes FPRs that are
// fully-clobbered, like V0 in the base ABI.
unsigned int
early_ra::partial_fpr_clobbers (unsigned int abi_id, fpr_size_info size)
{
auto &abi = function_abis[abi_id];
unsigned int clobbers = 0;
machine_mode mode = (size == FPR_D ? V8QImode
: size == FPR_Q ? V16QImode : VNx16QImode);
for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
if (!abi.clobbers_full_reg_p (regno)
&& abi.clobbers_reg_p (mode, regno))
clobbers |= 1U << (regno - V0_REGNUM);
return clobbers;
}
// Process copies between pseudo registers and hard registers and update
// the FPR preferences for the associated colors.
void
early_ra::process_copies ()
{
for (auto © : m_allocno_copies)
{
auto *allocno = m_allocnos[copy.allocno];
auto *group = allocno->group ();
auto offset = group->color_rep_offset + allocno->offset;
if (offset > copy.fpr)
continue;
unsigned int fpr = copy.fpr - offset;
auto *color = m_colors[group->color_rep ()->color];
color->fpr_preferences[fpr] = MIN (color->fpr_preferences[fpr]
+ copy.weight, 127);
color->num_fpr_preferences += copy.weight;
}
}
// Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=>
// result that puts colors in allocation order.
int
early_ra::cmp_allocation_order (const void *color1_ptr, const void *color2_ptr)
{
auto *color1 = *(color_info *const *) color1_ptr;
auto *color2 = *(color_info *const *) color2_ptr;
// Allocate bigger groups before smaller groups.
if (color1->group->size != color2->group->size)
return color1->group->size > color2->group->size ? -1 : 1;
// Allocate groups with stronger FPR preferences before groups with weaker
// FPR preferences.
if (color1->num_fpr_preferences != color2->num_fpr_preferences)
return color1->num_fpr_preferences > color2->num_fpr_preferences ? -1 : 1;
return (color1->id < color2->id ? -1
: color1->id == color2->id ? 0 : 1);
}
// Allocate a register to each color. If we run out of registers,
// give up on doing a full allocation of the FPR-based pseudos in the
// region.
void
early_ra::allocate_colors ()
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\nAllocating registers:\n");
auto_vec sorted_colors;
sorted_colors.safe_splice (m_colors);
sorted_colors.qsort (cmp_allocation_order);
for (unsigned int i = 0; i < 32; ++i)
if (!crtl->abi->clobbers_full_reg_p (V0_REGNUM + i))
m_call_preserved_fprs |= 1U << i;
for (auto *color : sorted_colors)
{
unsigned int candidates = color->group->fpr_candidates;
for (unsigned int i = 0; i < color->group->size; ++i)
candidates &= ~(m_allocated_fprs >> i);
unsigned int best = INVALID_REGNUM;
int best_weight = 0;
unsigned int best_recency = 0;
for (unsigned int fpr = 0; fpr <= 32U - color->group->size; ++fpr)
{
if ((candidates & (1U << fpr)) == 0)
continue;
int weight = color->fpr_preferences[fpr];
unsigned int recency = 0;
// Account for registers that the current function must preserve.
for (unsigned int i = 0; i < color->group->size; ++i)
{
if (m_call_preserved_fprs & (1U << (fpr + i)))
weight -= 1;
recency = MAX (recency, m_fpr_recency[fpr + i]);
}
// Prefer higher-numbered registers in the event of a tie.
// This should tend to keep lower-numbered registers free
// for allocnos that require V0-V7 or V0-V15.
if (best == INVALID_REGNUM
|| best_weight < weight
|| (best_weight == weight && recency <= best_recency))
{
best = fpr;
best_weight = weight;
best_recency = recency;
}
}
if (best == INVALID_REGNUM)
{
m_allocation_successful = false;
return;
}
color->hard_regno = best + V0_REGNUM;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Allocating [v%d:v%d] to color %d\n",
best, best + color->group->size - 1, color->id);
m_allocated_fprs |= ((1U << color->group->size) - 1) << best;
}
}
// See if ALLOCNO ends a subchain of single registers that can be split
// off without affecting the rest of the chain, and without introducing
// any moves. Return the start of the chain if so (which might be ALLOCNO
// itself), otherwise return null.
early_ra::allocno_info *
early_ra::find_independent_subchain (allocno_info *allocno)
{
// Make sure ALLOCNO ends a natural subchain.
if (auto *next_allocno = chain_next (allocno))
if (next_allocno->start_point + 1 >= allocno->end_point)
return nullptr;
// Check the allocnos in the purported subchain and find the other end.
for (;;)
{
auto *group = allocno->group ();
if (group->m_color_rep == group)
return nullptr;
if (group->size != 1)
return nullptr;
auto *prev_allocno = chain_prev (allocno);
if (!prev_allocno || allocno->start_point + 1 < prev_allocno->end_point)
return allocno;
allocno = prev_allocno;
}
}
// Search the colors starting at index FIRST_COLOR whose FPRs do not belong
// to FPR_CONFLICTS. Return the first such color that has no group. If all
// such colors have groups, instead return the color with the latest
// (smallest) start point.
early_ra::color_info *
early_ra::find_oldest_color (unsigned int first_color,
unsigned int fpr_conflicts)
{
color_info *best = nullptr;
unsigned int best_start_point = ~0U;
unsigned int best_recency = 0;
for (unsigned int ci = first_color; ci < m_colors.length (); ++ci)
{
auto *color = m_colors[ci];
unsigned int fpr = color->hard_regno - V0_REGNUM;
if (fpr_conflicts & (1U << fpr))
continue;
unsigned int start_point = 0;
if (color->group)
{
auto chain_head = color->group->chain_heads ()[0];
start_point = m_allocnos[chain_head]->start_point;
}
unsigned int recency = m_fpr_recency[fpr];
if (!best
|| best_start_point > start_point
|| (best_start_point == start_point && recency < best_recency))
{
best = color;
best_start_point = start_point;
best_recency = recency;
}
}
return best;
}
// If there are some spare FPRs that can be reused without introducing saves,
// restores, or moves, use them to "broaden" the allocation, in order to give
// the scheduler more freedom. This is particularly useful for forming LDPs
// and STPs.
void
early_ra::broaden_colors ()
{
// Create dummy colors for every leftover FPR that can be used cheaply.
unsigned int first_color = m_colors.length ();
for (unsigned int fpr = 0; fpr < 32; ++fpr)
if (((m_allocated_fprs | m_call_preserved_fprs) & (1U << fpr)) == 0)
{
auto *color = region_allocate ();
color->id = m_colors.length ();
color->hard_regno = V0_REGNUM + fpr;
color->group = nullptr;
m_colors.safe_push (color);
}
// Exit early if there are no spare FPRs.
if (first_color == m_colors.length ())
return;
// Go through the allocnos in order, seeing if there is a subchain of
// single-FPR allocnos that can be split off from the containingg clique.
// Allocate such subchains to the new colors on an oldest-first basis.
for (auto *allocno : m_sorted_allocnos)
if (auto *start_allocno = find_independent_subchain (allocno))
{
unsigned int fpr_conflicts = 0;
auto *member = allocno;
for (;;)
{
fpr_conflicts |= ~member->group ()->fpr_candidates;
if (member == start_allocno)
break;
member = m_allocnos[member->chain_prev];
}
auto *color = find_oldest_color (first_color, fpr_conflicts);
if (!color)
continue;
if (!color->group)
{
auto *group = allocno->group ();
color->group = group;
group->color = color->id;
group->chain_heads ()[0] = INVALID_ALLOCNO;
}
else
{
auto chain_head = color->group->chain_heads ()[0];
auto start_point = m_allocnos[chain_head]->start_point;
if (start_point >= allocno->end_point)
// Allocating to COLOR isn't viable, and it was the best
// option available.
continue;
auto *next_allocno = chain_next (allocno);
if (!next_allocno || next_allocno->start_point <= start_point)
// The current allocation gives at least as much scheduling
// freedom as COLOR would.
continue;
}
// Unlink the chain.
if (auto *next_allocno = chain_next (allocno))
next_allocno->chain_prev = start_allocno->chain_prev;
if (auto *prev_allocno = chain_prev (start_allocno))
prev_allocno->chain_next = allocno->chain_next;
// Make the subchain use COLOR.
allocno->chain_next = color->group->chain_heads ()[0];
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Moving to optional color %d (register %s):",
color->id, reg_names[color->hard_regno]);
for (;;)
{
auto *group = allocno->group ();
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " r%d", group->regno);
group->m_color_rep = color->group;
group->color_rep_offset = 0;
if (allocno == start_allocno)
break;
allocno = m_allocnos[allocno->chain_prev];
}
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\n");
color->group->chain_heads ()[0] = start_allocno->id;
}
}
// Record the final choice of hard register for each allocno.
void
early_ra::finalize_allocation ()
{
for (auto *color : m_colors)
if (color->group)
{
unsigned int fpr = color->hard_regno - V0_REGNUM;
for (unsigned int i = 0; i < color->group->size; ++i)
m_fpr_recency[fpr + i] = m_current_region;
}
for (auto *allocno : m_allocnos)
{
if (allocno->is_shared ())
continue;
auto *group = allocno->group ();
auto *rep = group->color_rep ();
auto rep_regno = m_colors[rep->color]->hard_regno;
auto group_regno = rep_regno + group->color_rep_offset;
allocno->hard_regno = group_regno + allocno->offset * group->stride;
}
for (auto *allocno : m_shared_allocnos)
allocno->hard_regno = m_allocnos[allocno->related_allocno]->hard_regno;
}
// Replace any allocno references in REFS with the allocated register.
// INSN is the instruction that contains REFS.
bool
early_ra::replace_regs (rtx_insn *insn, df_ref refs)
{
bool changed = false;
for (df_ref ref = refs; ref; ref = DF_REF_NEXT_LOC (ref))
{
auto range = get_allocno_subgroup (DF_REF_REG (ref));
if (!range)
continue;
auto new_regno = range.allocno (0)->hard_regno;
if (new_regno == FIRST_PSEUDO_REGISTER)
{
// Reset a debug instruction if, after DCE, the only remaining
// references to a register are in such instructions.
gcc_assert (DEBUG_INSN_P (insn));
INSN_VAR_LOCATION_LOC (insn) = gen_rtx_UNKNOWN_VAR_LOC ();
return true;
}
*DF_REF_LOC (ref) = gen_rtx_REG (GET_MODE (DF_REF_REG (ref)), new_regno);
changed = true;
}
return changed;
}
// Try to make INSN match its FPR-related constraints. If this needs
// a source operand (SRC) to be copied to a destination operand (DEST)
// before INSN, add the associated (DEST, SRC) pairs to MOVES.
//
// Return -1 on failure, otherwise return a ?/!-style reject count.
// The reject count doesn't model the moves, just the internal alternative
// preferences.
int
early_ra::try_enforce_constraints (rtx_insn *insn,
vec> &moves)
{
if (!constrain_operands (0, get_preferred_alternatives (insn)))
return -1;
// Pick the alternative with the lowest cost.
int best = -1;
auto alts = get_preferred_alternatives (insn);
for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
{
if (!(alts & ALTERNATIVE_BIT (altno)))
continue;
auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
if (!likely_alternative_match_p (op_alt))
continue;
auto_vec, 4> new_moves;
for (int opno = 0; opno < recog_data.n_operands; ++opno)
{
rtx op = recog_data.operand[opno];
if (REG_P (op)
&& FP_REGNUM_P (REGNO (op))
&& op_alt[opno].matched >= 0)
{
rtx old_src = recog_data.operand[op_alt[opno].matched];
if (!operands_match_p (op, old_src))
{
for (int i = 0; i < recog_data.n_operands; ++i)
if (i != opno)
{
rtx other = recog_data.operand[i];
if (reg_overlap_mentioned_p (op, other))
{
old_src = NULL_RTX;
break;
}
}
if (!old_src)
continue;
new_moves.safe_push ({ opno, op_alt[opno].matched });
}
}
}
int cost = count_rejects (op_alt) + new_moves.length () * 7;
if (best < 0 || cost < best)
{
best = cost;
moves.truncate (0);
moves.safe_splice (new_moves);
}
}
return best;
}
// Make INSN matches its FPR-related constraints.
void
early_ra::enforce_constraints (rtx_insn *insn)
{
extract_insn (insn);
preprocess_constraints (insn);
// First try with the operands they are.
auto_vec, 4> moves;
int cost = try_enforce_constraints (insn, moves);
// Next try taking advantage of commutativity.
for (int opno = 0; opno < recog_data.n_operands - 1; ++opno)
if (recog_data.constraints[opno][0] == '%')
{
std::swap (*recog_data.operand_loc[opno],
*recog_data.operand_loc[opno + 1]);
std::swap (recog_data.operand[opno],
recog_data.operand[opno + 1]);
auto_vec, 4> swapped_moves;
int swapped_cost = try_enforce_constraints (insn, swapped_moves);
if (swapped_cost >= 0 && (cost < 0 || swapped_cost < cost))
{
cost = swapped_cost;
moves.truncate (0);
moves.safe_splice (swapped_moves);
}
else
{
std::swap (*recog_data.operand_loc[opno],
*recog_data.operand_loc[opno + 1]);
std::swap (recog_data.operand[opno],
recog_data.operand[opno + 1]);
}
}
// The allocation should ensure that there is at least one valid combination.
// It's too late to back out now if not.
gcc_assert (cost >= 0);
for (int i = 0; i < recog_data.n_dups; ++i)
{
int dup_of = recog_data.dup_num[i];
rtx new_op = *recog_data.operand_loc[dup_of];
if (new_op != recog_data.operand[dup_of])
*recog_data.dup_loc[i] = copy_rtx (new_op);
}
for (auto move : moves)
{
int dest_opno = move.first;
int src_opno = move.second;
rtx dest = recog_data.operand[dest_opno];
rtx old_src = recog_data.operand[src_opno];
rtx new_src = lowpart_subreg (GET_MODE (old_src), dest, GET_MODE (dest));
emit_insn_before (gen_move_insn (new_src, old_src), insn);
*recog_data.operand_loc[src_opno] = new_src;
}
}
// See whether INSN is an instruction that operates on multi-register vectors,
// and if we have decided to make it use strided rather than consecutive
// accesses. Update the pattern and return true if so.
bool
early_ra::maybe_convert_to_strided_access (rtx_insn *insn)
{
if (!NONJUMP_INSN_P (insn) || recog_memoized (insn) < 0)
return false;
auto stride_type = get_attr_stride_type (insn);
rtx pat = PATTERN (insn);
rtx op;
if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
op = SET_DEST (pat);
else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
op = XVECEXP (SET_SRC (pat), 0, 1);
else
return false;
auto range = get_allocno_subgroup (op);
if (!range || range.group->stride == 1)
return false;
gcc_assert (range.start == 0 && range.count == range.group->size);
auto elt_mode = GET_MODE_INNER (GET_MODE (op));
auto single_mode = aarch64_full_sve_mode (elt_mode).require ();
auto_vec regs;
for (unsigned int i = 0; i < range.count; ++i)
regs.quick_push (gen_rtx_REG (single_mode, range.allocno (i)->hard_regno));
extract_insn (insn);
if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
{
auto unspec = XINT (SET_SRC (pat), 1);
if (range.count == 2)
pat = gen_aarch64_strided2 (unspec, GET_MODE (op), regs[0], regs[1],
recog_data.operand[1],
recog_data.operand[2]);
else
pat = gen_aarch64_strided4 (unspec, GET_MODE (op),
regs[0], regs[1], regs[2], regs[3],
recog_data.operand[1],
recog_data.operand[2]);
}
else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
{
auto unspec = XINT (SET_SRC (pat), 1);
if (range.count == 2)
pat = gen_aarch64_strided2 (unspec, GET_MODE (op),
recog_data.operand[0],
recog_data.operand[2], regs[0], regs[1]);
else
pat = gen_aarch64_strided4 (unspec, GET_MODE (op),
recog_data.operand[0],
recog_data.operand[2],
regs[0], regs[1], regs[2], regs[3]);
// Ensure correct sharing for the source memory.
//
// ??? Why doesn't the generator get this right?
XVECEXP (SET_SRC (pat), 0, XVECLEN (SET_SRC (pat), 0) - 1)
= *recog_data.dup_loc[0];
}
else
gcc_unreachable ();
PATTERN (insn) = pat;
INSN_CODE (insn) = -1;
df_insn_rescan (insn);
return true;
}
// We've successfully allocated the current region. Apply the allocation
// to the instructions.
void
early_ra::apply_allocation ()
{
for (auto *insn : m_dead_insns)
set_insn_deleted (insn);
rtx_insn *prev;
for (auto insn_range : m_insn_ranges)
for (rtx_insn *insn = insn_range.first;
insn != insn_range.second;
insn = prev)
{
prev = PREV_INSN (insn);
if (!INSN_P (insn))
continue;
bool changed = maybe_convert_to_strided_access (insn);
changed |= replace_regs (insn, DF_INSN_DEFS (insn));
changed |= replace_regs (insn, DF_INSN_USES (insn));
if (changed && NONDEBUG_INSN_P (insn))
{
if (GET_CODE (PATTERN (insn)) != USE
&& GET_CODE (PATTERN (insn)) != CLOBBER
&& !is_move_set (PATTERN (insn)))
enforce_constraints (insn);
// A REG_EQUIV note establishes an equivalence throughout
// the function, but here we're reusing hard registers for
// multiple pseudo registers. We also no longer need REG_EQUIV
// notes that record potential spill locations, since we've
// allocated the pseudo register without spilling.
rtx *ptr = ®_NOTES (insn);
while (*ptr)
if (REG_NOTE_KIND (*ptr) == REG_EQUIV)
*ptr = XEXP (*ptr, 1);
else
ptr = &XEXP (*ptr, 1);
}
changed |= replace_regs (insn, DF_INSN_EQ_USES (insn));
if (changed)
df_insn_rescan (insn);
}
for (auto *insn : m_dead_insns)
delete_insn (insn);
}
// Try to allocate the current region. Update the instructions if successful.
void
early_ra::process_region ()
{
for (auto *allocno : m_allocnos)
{
allocno->chain_next = INVALID_ALLOCNO;
allocno->chain_prev = INVALID_ALLOCNO;
}
if (dump_file && (dump_flags & TDF_DETAILS))
{
dump_fpr_ranges ();
dump_copies ();
dump_allocnos ();
}
find_strided_accesses ();
if (dump_file && (dump_flags & TDF_DETAILS))
dump_allocnos ();
form_chains ();
if (dump_file && (dump_flags & TDF_DETAILS))
dump_allocnos ();
process_copies ();
if (dump_file && (dump_flags & TDF_DETAILS))
dump_colors ();
allocate_colors ();
if (!m_allocation_successful)
return;
broaden_colors ();
finalize_allocation ();
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "\nAllocation successful\nFinal allocation:\n");
dump_allocnos ();
dump_colors ();
}
apply_allocation ();
}
// Return true if INSN would become dead if we successfully allocate the
// current region.
bool
early_ra::is_dead_insn (rtx_insn *insn)
{
rtx set = single_set (insn);
if (!set)
return false;
rtx dest = SET_DEST (set);
auto dest_range = get_allocno_subgroup (dest);
if (!dest_range)
return false;
for (auto &allocno : dest_range.allocnos ())
if (bitmap_bit_p (m_live_allocnos, allocno.id))
return false;
if (side_effects_p (set))
return false;
return true;
}
// Build up information about block BB. IS_ISOLATED is true if the
// block is not part of a larger region.
void
early_ra::process_block (basic_block bb, bool is_isolated)
{
m_current_bb = bb;
m_current_point += 1;
m_current_bb_point = m_current_point;
// Process live-out FPRs.
bitmap live_out = df_get_live_out (bb);
for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
if (bitmap_bit_p (live_out, regno))
record_fpr_use (regno);
// Process live-out allocnos. We don't track individual FPR liveness
// across block boundaries, so we have to assume that the whole pseudo
// register is live.
bitmap_iterator bi;
unsigned int regno;
EXECUTE_IF_AND_IN_BITMAP (df_get_live_out (bb), m_fpr_pseudos,
FIRST_PSEUDO_REGISTER, regno, bi)
{
auto range = get_allocno_subgroup (regno_reg_rtx[regno]);
for (auto &allocno : range.allocnos ())
record_allocno_use (&allocno);
}
m_current_point += 1;
record_artificial_refs (0);
bool is_first = true;
rtx_insn *start_insn = BB_END (bb);
rtx_insn *insn;
FOR_BB_INSNS_REVERSE (bb, insn)
{
if (!NONDEBUG_INSN_P (insn))
continue;
// CLOBBERs are used to prevent pseudos from being upwards exposed.
// We can ignore them if allocation is successful.
if (GET_CODE (PATTERN (insn)) == CLOBBER)
{
if (get_allocno_subgroup (XEXP (PATTERN (insn), 0)))
m_dead_insns.safe_push (insn);
continue;
}
if (dump_file && (dump_flags & TDF_DETAILS))
{
if (is_first)
fprintf (dump_file, "\nBlock %d:\n", bb->index);
fprintf (dump_file, "%6d:", m_current_point);
pretty_printer rtl_slim_pp;
rtl_slim_pp.buffer->stream = dump_file;
print_insn (&rtl_slim_pp, insn, 1);
pp_flush (&rtl_slim_pp);
fprintf (dump_file, "\n");
}
is_first = false;
if (is_dead_insn (insn))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "%14s -- dead\n", "");
m_dead_insns.safe_push (insn);
}
else
{
record_insn_refs (insn);
rtx pat = PATTERN (insn);
if (is_move_set (pat))
record_copy (SET_DEST (pat), SET_SRC (pat), true);
else
{
extract_insn (insn);
record_constraints (insn);
}
}
// See whether we have a complete region, with no remaining live
// allocnos.
if (is_isolated
&& bitmap_empty_p (m_live_allocnos)
&& m_live_fprs == 0
&& m_allocation_successful
&& !m_allocnos.is_empty ())
{
rtx_insn *prev_insn = PREV_INSN (insn);
m_insn_ranges.safe_push ({ start_insn, prev_insn });
process_region ();
start_new_region ();
is_first = true;
start_insn = prev_insn;
}
}
m_insn_ranges.safe_push ({ start_insn, BB_HEAD (bb) });
record_artificial_refs (DF_REF_AT_TOP);
// Process live-in FPRs.
bitmap live_in = df_get_live_in (bb);
for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
if (bitmap_bit_p (live_in, regno)
&& (m_live_fprs & (1U << (regno - V0_REGNUM))))
record_fpr_def (regno);
// Process live-in allocnos.
EXECUTE_IF_AND_IN_BITMAP (live_in, m_fpr_pseudos,
FIRST_PSEUDO_REGISTER, regno, bi)
{
auto range = get_allocno_subgroup (regno_reg_rtx[regno]);
for (auto &allocno : range.allocnos ())
if (bitmap_bit_p (m_live_allocnos, allocno.id))
record_allocno_def (&allocno);
}
m_current_point += 1;
bitmap_clear (m_live_allocnos);
m_live_fprs = 0;
}
// Divide the function into regions, such that there no edges into or out
// of the region have live "FPR pseudos".
void
early_ra::process_blocks ()
{
auto_sbitmap visited (last_basic_block_for_fn (m_fn));
auto_sbitmap fpr_pseudos_live_out (last_basic_block_for_fn (m_fn));
auto_sbitmap fpr_pseudos_live_in (last_basic_block_for_fn (m_fn));
bitmap_clear (visited);
bitmap_clear (fpr_pseudos_live_out);
bitmap_clear (fpr_pseudos_live_in);
// Record which blocks have live FPR pseudos on entry and exit.
basic_block bb;
FOR_EACH_BB_FN (bb, m_fn)
{
if (bitmap_intersect_p (df_get_live_out (bb), m_fpr_pseudos))
bitmap_set_bit (fpr_pseudos_live_out, bb->index);
if (bitmap_intersect_p (df_get_live_in (bb), m_fpr_pseudos))
bitmap_set_bit (fpr_pseudos_live_in, bb->index);
}
// This is incremented by 1 at the start of each region.
m_current_region = 0;
memset (m_fpr_recency, 0, sizeof (m_fpr_recency));
struct stack_node { edge_iterator ei; basic_block bb; };
auto_vec stack;
auto_vec region;
// Go through the function in reverse postorder and process the region
// containing each block.
unsigned int n_blocks = df_get_n_blocks (DF_FORWARD);
int *order = df_get_postorder (DF_FORWARD);
for (unsigned int bbi = 0; bbi < n_blocks; ++bbi)
{
basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, order[bbi]);
if (bb->index < NUM_FIXED_BLOCKS)
continue;
if (!bitmap_set_bit (visited, bb->index))
continue;
// Process forward edges before backward edges (so push backward
// edges first). Build the region in an approximation of reverse
// program order.
if (bitmap_bit_p (fpr_pseudos_live_in, bb->index))
stack.quick_push ({ ei_start (bb->preds), nullptr });
if (bitmap_bit_p (fpr_pseudos_live_out, bb->index))
stack.quick_push ({ ei_start (bb->succs), bb });
else
region.safe_push (bb);
while (!stack.is_empty ())
{
auto &node = stack.last ();
if (ei_end_p (node.ei))
{
if (node.bb)
region.safe_push (node.bb);
stack.pop ();
continue;
}
edge e = ei_edge (node.ei);
if (node.bb)
{
// A forward edge from a node that has not yet been added
// to region.
if (bitmap_bit_p (fpr_pseudos_live_in, e->dest->index)
&& bitmap_set_bit (visited, e->dest->index))
{
stack.safe_push ({ ei_start (e->dest->preds), nullptr });
if (bitmap_bit_p (fpr_pseudos_live_out, e->dest->index))
stack.safe_push ({ ei_start (e->dest->succs), e->dest });
else
region.safe_push (e->dest);
}
else
ei_next (&node.ei);
}
else
{
// A backward edge from a node that has already been added
// to the region.
if (bitmap_bit_p (fpr_pseudos_live_out, e->src->index)
&& bitmap_set_bit (visited, e->src->index))
{
if (bitmap_bit_p (fpr_pseudos_live_in, e->src->index))
stack.safe_push ({ ei_start (e->src->preds), nullptr });
stack.safe_push ({ ei_start (e->src->succs), e->src });
}
else
ei_next (&node.ei);
}
}
m_current_point = 2;
start_new_region ();
if (region.is_empty ())
process_block (bb, true);
else
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "\nRegion (from %d):", bb->index);
for (unsigned int j = 0; j < region.length (); ++j)
fprintf (dump_file, " %d", region[j]->index);
fprintf (dump_file, "\n");
}
for (unsigned int j = 0; j < region.length (); ++j)
{
basic_block bb = region[j];
bool is_isolated
= ((j == 0 && !bitmap_bit_p (fpr_pseudos_live_out, bb->index))
|| (j == region.length () - 1
&& !bitmap_bit_p (fpr_pseudos_live_in, bb->index)));
process_block (bb, is_isolated);
}
}
region.truncate (0);
if (!m_allocnos.is_empty () && m_allocation_successful)
process_region ();
}
}
// Run the pass on the current function.
void
early_ra::execute ()
{
df_analyze ();
preprocess_insns ();
propagate_pseudo_reg_info ();
choose_fpr_pseudos ();
if (bitmap_empty_p (m_fpr_pseudos))
return;
if (dump_file && (dump_flags & TDF_DETAILS))
dump_pseudo_regs ();
process_blocks ();
df_verify ();
}
class pass_early_ra : public rtl_opt_pass
{
public:
pass_early_ra (gcc::context *ctxt)
: rtl_opt_pass (pass_data_early_ra, ctxt)
{}
// opt_pass methods:
virtual bool gate (function *);
virtual unsigned int execute (function *);
};
bool
pass_early_ra::gate (function *)
{
// Require a vector ISA to be enabled.
if (!TARGET_SIMD && !TARGET_SVE)
return false;
if (aarch64_early_ra == AARCH64_EARLY_RA_NONE)
return false;
if (aarch64_early_ra == AARCH64_EARLY_RA_STRIDED
&& !TARGET_STREAMING_SME2)
return false;
return true;
}
unsigned int
pass_early_ra::execute (function *fn)
{
early_ra (fn).execute ();
return 0;
}
} // end namespace
// Create a new instance of the pass.
rtl_opt_pass *
make_pass_aarch64_early_ra (gcc::context *ctxt)
{
return new pass_early_ra (ctxt);
}