Fix AArch64 ABI to respect half-caller-save, half-callee-save vec regs.

This PR updates the AArch64 ABI implementation so that it (i) properly
respects that v8-v15 inclusive have callee-save lower halves, and
caller-save upper halves, by conservatively approximating (to full
registers) in the appropriate directions when generating prologue
caller-saves and when informing the regalloc of clobbered regs across
callsites.

In order to prevent saving all of these vector registers in the prologue
of every non-leaf function due to the above approximation, this also
makes use of a new regalloc.rs feature to exclude call instructions'
writes from the clobber set returned by register allocation. This is
safe whenever the caller and callee have the same ABI (because anything
the callee could clobber, the caller is allowed to clobber as well
without saving it in the prologue).

Fixes #2254.
This commit is contained in:
Chris Fallin
2020-10-05 18:43:26 -07:00
parent e22e2c3722
commit 71768bb6cf
29 changed files with 325 additions and 206 deletions

View File

@@ -2,6 +2,7 @@
use crate::binemit::StackMap;
use crate::ir::StackSlot;
use crate::isa::CallConv;
use crate::machinst::*;
use crate::settings;
@@ -25,6 +26,9 @@ pub trait ABICallee {
/// Get the settings controlling this function's compilation.
fn flags(&self) -> &settings::Flags;
/// Get the calling convention implemented by this ABI object.
fn call_conv(&self) -> CallConv;
/// Get the liveins of the function.
fn liveins(&self) -> Set<RealReg>;

View File

@@ -347,6 +347,8 @@ pub trait ABIMachineSpec {
loc: SourceLoc,
opcode: ir::Opcode,
tmp: Writable<Reg>,
callee_conv: isa::CallConv,
callee_conv: isa::CallConv,
) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;
/// Get the number of spillslots required for the given register-class and
@@ -359,8 +361,9 @@ pub trait ABIMachineSpec {
/// Get the "nominal SP to FP" offset from an instruction-emission state.
fn get_nominal_sp_to_fp(s: &<Self::I as MachInstEmit>::State) -> i64;
/// Get all caller-save registers.
fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>>;
/// Get all caller-save registers, that is, registers that we expect
/// not to be saved across a call to a callee with the given ABI.
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>>;
}
/// ABI information shared between body (callee) and caller.
@@ -682,6 +685,10 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
&self.flags
}
fn call_conv(&self) -> isa::CallConv {
self.sig.call_conv
}
fn liveins(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for &arg in &self.sig.args {
@@ -1040,7 +1047,7 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
}
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
let mut defs = M::get_caller_saves(sig.call_conv);
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
for ret in &sig.rets {
match ret {
&ABIArg::Reg(reg, ..) => defs.push(Writable::from_reg(reg.to_reg())),
@@ -1063,8 +1070,10 @@ pub struct ABICallerImpl<M: ABIMachineSpec> {
dest: CallDest,
/// Location of callsite.
loc: ir::SourceLoc,
/// Actuall call opcode; used to distinguish various types of calls.
/// Actual call opcode; used to distinguish various types of calls.
opcode: ir::Opcode,
/// Caller's calling convention.
caller_conv: isa::CallConv,
_mach: PhantomData<M>,
}
@@ -1085,6 +1094,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
extname: &ir::ExternalName,
dist: RelocDistance,
loc: ir::SourceLoc,
caller_conv: isa::CallConv,
) -> CodegenResult<ABICallerImpl<M>> {
let sig = ABISig::from_func_sig::<M>(sig)?;
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
@@ -1095,6 +1105,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
dest: CallDest::ExtName(extname.clone(), dist),
loc,
opcode: ir::Opcode::Call,
caller_conv,
_mach: PhantomData,
})
}
@@ -1106,6 +1117,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
ptr: Reg,
loc: ir::SourceLoc,
opcode: ir::Opcode,
caller_conv: isa::CallConv,
) -> CodegenResult<ABICallerImpl<M>> {
let sig = ABISig::from_func_sig::<M>(sig)?;
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
@@ -1116,6 +1128,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
dest: CallDest::Reg(ptr),
loc,
opcode,
caller_conv,
_mach: PhantomData,
})
}
@@ -1255,8 +1268,17 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
self.emit_copy_reg_to_arg(ctx, i, rd.to_reg());
}
let tmp = ctx.alloc_tmp(word_rc, word_type);
for (is_safepoint, inst) in
M::gen_call(&self.dest, uses, defs, self.loc, self.opcode, tmp).into_iter()
for (is_safepoint, inst) in M::gen_call(
&self.dest,
uses,
defs,
self.loc,
self.opcode,
tmp,
self.sig.call_conv,
self.caller_conv,
)
.into_iter()
{
match is_safepoint {
InstIsSafepoint::Yes => ctx.emit_safepoint(inst),

View File

@@ -6,7 +6,7 @@ use crate::settings;
use crate::timing;
use log::debug;
use regalloc::{allocate_registers_with_opts, Algorithm, Options};
use regalloc::{allocate_registers_with_opts, Algorithm, Options, PrettyPrint};
/// Compile the given function down to VCode with allocated registers, ready
/// for binary emission.
@@ -16,7 +16,7 @@ pub fn compile<B: LowerBackend + MachBackend>(
abi: Box<dyn ABICallee<I = B::MInst>>,
) -> CodegenResult<VCode<B::MInst>>
where
B::MInst: ShowWithRRU,
B::MInst: PrettyPrint,
{
// Compute lowered block order.
let block_order = BlockLoweringOrder::new(f);

View File

@@ -125,8 +125,6 @@ pub mod abi;
pub use abi::*;
pub mod abi_impl;
pub use abi_impl::*;
pub mod pretty_print;
pub use pretty_print::*;
pub mod buffer;
pub use buffer::*;
pub mod adapter;
@@ -156,6 +154,11 @@ pub trait MachInst: Clone + Debug {
/// Returns true if the instruction is an epilogue placeholder.
fn is_epilogue_placeholder(&self) -> bool;
/// Should this instruction be included in the clobber-set?
fn is_included_in_clobbers(&self) -> bool {
true
}
/// Generate a move.
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;

View File

@@ -1,66 +0,0 @@
//! Pretty-printing for machine code (virtual-registerized or final).
use regalloc::{RealRegUniverse, Reg, Writable};
use std::fmt::Debug;
use std::hash::Hash;
use std::string::{String, ToString};
// FIXME: Should this go into regalloc.rs instead?
/// A trait for printing instruction bits and pieces, with the the ability to
/// take a contextualising RealRegUniverse that is used to give proper names to
/// registers.
pub trait ShowWithRRU {
/// Return a string that shows the implementing object in context of the
/// given `RealRegUniverse`, if provided.
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String;
/// The same as |show_rru|, but with an optional hint giving a size in
/// bytes. Its interpretation is object-dependent, and it is intended to
/// pass around enough information to facilitate printing sub-parts of
/// real registers correctly. Objects may ignore size hints that are
/// irrelevant to them.
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
// Default implementation is to ignore the hint.
self.show_rru(mb_rru)
}
}
impl ShowWithRRU for Reg {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
if self.is_real() {
if let Some(rru) = mb_rru {
let reg_ix = self.get_index();
if reg_ix < rru.regs.len() {
return rru.regs[reg_ix].1.to_string();
} else {
// We have a real reg which isn't listed in the universe.
// Per the regalloc.rs interface requirements, this is
// Totally Not Allowed. Print it generically anyway, so
// we have something to debug.
return format!("!!{:?}!!", self);
}
}
}
// The reg is virtual, or we have no universe. Be generic.
format!("%{:?}", self)
}
fn show_rru_sized(&self, _mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
// For the specific case of Reg, we demand not to have a size hint,
// since interpretation of the size is target specific, but this code
// is used by all targets.
panic!("Reg::show_rru_sized: impossible to implement");
}
}
impl<R: ShowWithRRU + Copy + Ord + Hash + Eq + Debug> ShowWithRRU for Writable<R> {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.to_reg().show_rru(mb_rru)
}
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
self.to_reg().show_rru_sized(mb_rru, size)
}
}

View File

@@ -25,8 +25,8 @@ use crate::timing;
use regalloc::Function as RegallocFunction;
use regalloc::Set as RegallocSet;
use regalloc::{
BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot,
StackmapRequestInfo,
BlockIx, InstIx, PrettyPrint, Range, RegAllocResult, RegClass, RegUsageCollector,
RegUsageMapper, SpillSlot, StackmapRequestInfo,
};
use alloc::boxed::Box;
@@ -543,6 +543,10 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
}
}
fn is_included_in_clobbers(&self, insn: &I) -> bool {
insn.is_included_in_clobbers()
}
fn get_regs(insn: &I, collector: &mut RegUsageCollector) {
insn.get_regs(collector)
}
@@ -624,7 +628,7 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
}
/// Pretty-printing with `RealRegUniverse` context.
impl<I: VCodeInst> ShowWithRRU for VCode<I> {
impl<I: VCodeInst> PrettyPrint for VCode<I> {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
use std::fmt::Write;