Fix AArch64 ABI to respect half-caller-save, half-callee-save vec regs.
This PR updates the AArch64 ABI implementation so that it (i) properly respects that v8-v15 inclusive have callee-save lower halves, and caller-save upper halves, by conservatively approximating (to full registers) in the appropriate directions when generating prologue caller-saves and when informing the regalloc of clobbered regs across callsites. In order to prevent saving all of these vector registers in the prologue of every non-leaf function due to the above approximation, this also makes use of a new regalloc.rs feature to exclude call instructions' writes from the clobber set returned by register allocation. This is safe whenever the caller and callee have the same ABI (because anything the callee could clobber, the caller is allowed to clobber as well without saving it in the prologue). Fixes #2254.
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
use crate::binemit::StackMap;
|
||||
use crate::ir::StackSlot;
|
||||
use crate::isa::CallConv;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
@@ -25,6 +26,9 @@ pub trait ABICallee {
|
||||
/// Get the settings controlling this function's compilation.
|
||||
fn flags(&self) -> &settings::Flags;
|
||||
|
||||
/// Get the calling convention implemented by this ABI object.
|
||||
fn call_conv(&self) -> CallConv;
|
||||
|
||||
/// Get the liveins of the function.
|
||||
fn liveins(&self) -> Set<RealReg>;
|
||||
|
||||
|
||||
@@ -347,6 +347,8 @@ pub trait ABIMachineSpec {
|
||||
loc: SourceLoc,
|
||||
opcode: ir::Opcode,
|
||||
tmp: Writable<Reg>,
|
||||
callee_conv: isa::CallConv,
|
||||
callee_conv: isa::CallConv,
|
||||
) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;
|
||||
|
||||
/// Get the number of spillslots required for the given register-class and
|
||||
@@ -359,8 +361,9 @@ pub trait ABIMachineSpec {
|
||||
/// Get the "nominal SP to FP" offset from an instruction-emission state.
|
||||
fn get_nominal_sp_to_fp(s: &<Self::I as MachInstEmit>::State) -> i64;
|
||||
|
||||
/// Get all caller-save registers.
|
||||
fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>>;
|
||||
/// Get all caller-save registers, that is, registers that we expect
|
||||
/// not to be saved across a call to a callee with the given ABI.
|
||||
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>>;
|
||||
}
|
||||
|
||||
/// ABI information shared between body (callee) and caller.
|
||||
@@ -682,6 +685,10 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
fn call_conv(&self) -> isa::CallConv {
|
||||
self.sig.call_conv
|
||||
}
|
||||
|
||||
fn liveins(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for &arg in &self.sig.args {
|
||||
@@ -1040,7 +1047,7 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
|
||||
}
|
||||
|
||||
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
|
||||
let mut defs = M::get_caller_saves(sig.call_conv);
|
||||
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
|
||||
for ret in &sig.rets {
|
||||
match ret {
|
||||
&ABIArg::Reg(reg, ..) => defs.push(Writable::from_reg(reg.to_reg())),
|
||||
@@ -1063,8 +1070,10 @@ pub struct ABICallerImpl<M: ABIMachineSpec> {
|
||||
dest: CallDest,
|
||||
/// Location of callsite.
|
||||
loc: ir::SourceLoc,
|
||||
/// Actuall call opcode; used to distinguish various types of calls.
|
||||
/// Actual call opcode; used to distinguish various types of calls.
|
||||
opcode: ir::Opcode,
|
||||
/// Caller's calling convention.
|
||||
caller_conv: isa::CallConv,
|
||||
|
||||
_mach: PhantomData<M>,
|
||||
}
|
||||
@@ -1085,6 +1094,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
|
||||
extname: &ir::ExternalName,
|
||||
dist: RelocDistance,
|
||||
loc: ir::SourceLoc,
|
||||
caller_conv: isa::CallConv,
|
||||
) -> CodegenResult<ABICallerImpl<M>> {
|
||||
let sig = ABISig::from_func_sig::<M>(sig)?;
|
||||
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
|
||||
@@ -1095,6 +1105,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
|
||||
dest: CallDest::ExtName(extname.clone(), dist),
|
||||
loc,
|
||||
opcode: ir::Opcode::Call,
|
||||
caller_conv,
|
||||
_mach: PhantomData,
|
||||
})
|
||||
}
|
||||
@@ -1106,6 +1117,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
|
||||
ptr: Reg,
|
||||
loc: ir::SourceLoc,
|
||||
opcode: ir::Opcode,
|
||||
caller_conv: isa::CallConv,
|
||||
) -> CodegenResult<ABICallerImpl<M>> {
|
||||
let sig = ABISig::from_func_sig::<M>(sig)?;
|
||||
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
|
||||
@@ -1116,6 +1128,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
|
||||
dest: CallDest::Reg(ptr),
|
||||
loc,
|
||||
opcode,
|
||||
caller_conv,
|
||||
_mach: PhantomData,
|
||||
})
|
||||
}
|
||||
@@ -1255,8 +1268,17 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
self.emit_copy_reg_to_arg(ctx, i, rd.to_reg());
|
||||
}
|
||||
let tmp = ctx.alloc_tmp(word_rc, word_type);
|
||||
for (is_safepoint, inst) in
|
||||
M::gen_call(&self.dest, uses, defs, self.loc, self.opcode, tmp).into_iter()
|
||||
for (is_safepoint, inst) in M::gen_call(
|
||||
&self.dest,
|
||||
uses,
|
||||
defs,
|
||||
self.loc,
|
||||
self.opcode,
|
||||
tmp,
|
||||
self.sig.call_conv,
|
||||
self.caller_conv,
|
||||
)
|
||||
.into_iter()
|
||||
{
|
||||
match is_safepoint {
|
||||
InstIsSafepoint::Yes => ctx.emit_safepoint(inst),
|
||||
|
||||
@@ -6,7 +6,7 @@ use crate::settings;
|
||||
use crate::timing;
|
||||
|
||||
use log::debug;
|
||||
use regalloc::{allocate_registers_with_opts, Algorithm, Options};
|
||||
use regalloc::{allocate_registers_with_opts, Algorithm, Options, PrettyPrint};
|
||||
|
||||
/// Compile the given function down to VCode with allocated registers, ready
|
||||
/// for binary emission.
|
||||
@@ -16,7 +16,7 @@ pub fn compile<B: LowerBackend + MachBackend>(
|
||||
abi: Box<dyn ABICallee<I = B::MInst>>,
|
||||
) -> CodegenResult<VCode<B::MInst>>
|
||||
where
|
||||
B::MInst: ShowWithRRU,
|
||||
B::MInst: PrettyPrint,
|
||||
{
|
||||
// Compute lowered block order.
|
||||
let block_order = BlockLoweringOrder::new(f);
|
||||
|
||||
@@ -125,8 +125,6 @@ pub mod abi;
|
||||
pub use abi::*;
|
||||
pub mod abi_impl;
|
||||
pub use abi_impl::*;
|
||||
pub mod pretty_print;
|
||||
pub use pretty_print::*;
|
||||
pub mod buffer;
|
||||
pub use buffer::*;
|
||||
pub mod adapter;
|
||||
@@ -156,6 +154,11 @@ pub trait MachInst: Clone + Debug {
|
||||
/// Returns true if the instruction is an epilogue placeholder.
|
||||
fn is_epilogue_placeholder(&self) -> bool;
|
||||
|
||||
/// Should this instruction be included in the clobber-set?
|
||||
fn is_included_in_clobbers(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// Generate a move.
|
||||
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
|
||||
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
//! Pretty-printing for machine code (virtual-registerized or final).
|
||||
|
||||
use regalloc::{RealRegUniverse, Reg, Writable};
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
// FIXME: Should this go into regalloc.rs instead?
|
||||
|
||||
/// A trait for printing instruction bits and pieces, with the the ability to
|
||||
/// take a contextualising RealRegUniverse that is used to give proper names to
|
||||
/// registers.
|
||||
pub trait ShowWithRRU {
|
||||
/// Return a string that shows the implementing object in context of the
|
||||
/// given `RealRegUniverse`, if provided.
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String;
|
||||
|
||||
/// The same as |show_rru|, but with an optional hint giving a size in
|
||||
/// bytes. Its interpretation is object-dependent, and it is intended to
|
||||
/// pass around enough information to facilitate printing sub-parts of
|
||||
/// real registers correctly. Objects may ignore size hints that are
|
||||
/// irrelevant to them.
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
|
||||
// Default implementation is to ignore the hint.
|
||||
self.show_rru(mb_rru)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for Reg {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
if self.is_real() {
|
||||
if let Some(rru) = mb_rru {
|
||||
let reg_ix = self.get_index();
|
||||
if reg_ix < rru.regs.len() {
|
||||
return rru.regs[reg_ix].1.to_string();
|
||||
} else {
|
||||
// We have a real reg which isn't listed in the universe.
|
||||
// Per the regalloc.rs interface requirements, this is
|
||||
// Totally Not Allowed. Print it generically anyway, so
|
||||
// we have something to debug.
|
||||
return format!("!!{:?}!!", self);
|
||||
}
|
||||
}
|
||||
}
|
||||
// The reg is virtual, or we have no universe. Be generic.
|
||||
format!("%{:?}", self)
|
||||
}
|
||||
|
||||
fn show_rru_sized(&self, _mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
|
||||
// For the specific case of Reg, we demand not to have a size hint,
|
||||
// since interpretation of the size is target specific, but this code
|
||||
// is used by all targets.
|
||||
panic!("Reg::show_rru_sized: impossible to implement");
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: ShowWithRRU + Copy + Ord + Hash + Eq + Debug> ShowWithRRU for Writable<R> {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
self.to_reg().show_rru(mb_rru)
|
||||
}
|
||||
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
self.to_reg().show_rru_sized(mb_rru, size)
|
||||
}
|
||||
}
|
||||
@@ -25,8 +25,8 @@ use crate::timing;
|
||||
use regalloc::Function as RegallocFunction;
|
||||
use regalloc::Set as RegallocSet;
|
||||
use regalloc::{
|
||||
BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot,
|
||||
StackmapRequestInfo,
|
||||
BlockIx, InstIx, PrettyPrint, Range, RegAllocResult, RegClass, RegUsageCollector,
|
||||
RegUsageMapper, SpillSlot, StackmapRequestInfo,
|
||||
};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
@@ -543,6 +543,10 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_included_in_clobbers(&self, insn: &I) -> bool {
|
||||
insn.is_included_in_clobbers()
|
||||
}
|
||||
|
||||
fn get_regs(insn: &I, collector: &mut RegUsageCollector) {
|
||||
insn.get_regs(collector)
|
||||
}
|
||||
@@ -624,7 +628,7 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
|
||||
}
|
||||
|
||||
/// Pretty-printing with `RealRegUniverse` context.
|
||||
impl<I: VCodeInst> ShowWithRRU for VCode<I> {
|
||||
impl<I: VCodeInst> PrettyPrint for VCode<I> {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
use std::fmt::Write;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user