Cranelift: Simplify leaf functions that do not use the stack (#2960)

* Cranelift AArch64: Simplify leaf functions that do not use the stack

Leaf functions that do not use the stack (e.g. do not clobber any
callee-saved registers) do not need a frame record.

Copyright (c) 2021, Arm Limited.
This commit is contained in:
Anton Kirilov
2021-08-27 11:12:37 +01:00
committed by GitHub
parent 12515e6646
commit 7b98be1bee
34 changed files with 650 additions and 1385 deletions

View File

@@ -693,20 +693,30 @@ impl ABIMachineSpec for AArch64MachineDeps {
// nominal SP offset; abi_impl generic code will do that.
fn gen_clobber_save(
call_conv: isa::CallConv,
setup_frame: bool,
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
clobbered_callee_saves: &Vec<Writable<RealReg>>,
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> (u64, SmallVec<[Inst; 16]>) {
let mut insts = SmallVec::new();
let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
let mut clobbered_int = vec![];
let mut clobbered_vec = vec![];
for &reg in clobbered_callee_saves.iter() {
match reg.to_reg().get_class() {
RegClass::I64 => clobbered_int.push(reg),
RegClass::V128 => clobbered_vec.push(reg),
class => panic!("Unexpected RegClass: {:?}", class),
}
}
let (int_save_bytes, vec_save_bytes) =
saved_reg_stack_size(call_conv, &clobbered_int, &clobbered_vec);
let total_save_bytes = int_save_bytes + vec_save_bytes;
let clobber_size = total_save_bytes as i32;
let mut insts = SmallVec::new();
if flags.unwind_info() {
if flags.unwind_info() && setup_frame {
// The *unwind* frame (but not the actual frame) starts at the
// clobbers, just below the saved FP/LR pair.
insts.push(Inst::Unwind {
@@ -916,7 +926,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
_outgoing_args_size: u32,
) -> SmallVec<[Inst; 16]> {
let mut insts = SmallVec::new();
let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
let (clobbered_int, clobbered_vec) = get_regs_restored_in_epilogue(call_conv, clobbers);
// Free the fixed frame if necessary.
if fixed_frame_storage_size > 0 {
@@ -1180,6 +1190,36 @@ impl ABIMachineSpec for AArch64MachineDeps {
ir::ArgumentExtension::None
}
}
fn get_clobbered_callee_saves(
call_conv: isa::CallConv,
regs: &Set<Writable<RealReg>>,
) -> Vec<Writable<RealReg>> {
let mut regs: Vec<Writable<RealReg>> = regs
.iter()
.cloned()
.filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg()))
.collect();
// Sort registers for deterministic code output. We can do an unstable
// sort because the registers will be unique (there are no dups).
regs.sort_unstable_by_key(|r| r.to_reg().get_index());
regs
}
fn is_frame_setup_needed(
is_leaf: bool,
stack_args_size: u32,
num_clobbered_callee_saves: usize,
fixed_frame_storage_size: u32,
) -> bool {
!is_leaf
// The function arguments that are passed on the stack are addressed
// relative to the Frame Pointer.
|| stack_args_size > 0
|| num_clobbered_callee_saves > 0
|| fixed_frame_storage_size > 0
}
}
/// Is this type supposed to be seen on this machine? E.g. references of the
@@ -1224,7 +1264,7 @@ fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool {
/// Return the set of all integer and vector registers that must be saved in the
/// prologue and restored in the epilogue, given the set of all registers
/// written by the function's body.
fn get_regs_saved_in_prologue(
fn get_regs_restored_in_epilogue(
call_conv: isa::CallConv,
regs: &Set<Writable<RealReg>>,
) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {

View File

@@ -66,3 +66,107 @@ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
Some(8)
}
}
#[cfg(test)]
mod tests {
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{
types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
StackSlotKind,
};
use crate::isa::{lookup, CallConv};
use crate::settings::{builder, Flags};
use crate::Context;
use gimli::write::Address;
use std::str::FromStr;
use target_lexicon::triple;
#[test]
fn test_simple_func() {
let isa = lookup(triple!("aarch64"))
.expect("expect aarch64 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
));
context.compile(&*isa).expect("expected compilation");
let fde = match context
.create_unwind_info(isa.as_ref())
.expect("can create unwind info")
{
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
info.to_fde(Address::Constant(1234))
}
_ => panic!("expected unwind information"),
};
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(0, ValExpression(Register(34), Expression { operations: [Simple(DwOp(48))] })), (4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
}
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
let mut func =
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
let block0 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().return_(&[]);
if let Some(stack_slot) = stack_slot {
func.stack_slots.push(stack_slot);
}
func
}
#[test]
fn test_multi_return_func() {
let isa = lookup(triple!("aarch64"))
.expect("expect aarch64 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
context.compile(&*isa).expect("expected compilation");
let fde = match context
.create_unwind_info(isa.as_ref())
.expect("can create unwind info")
{
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
info.to_fde(Address::Constant(4321))
}
_ => panic!("expected unwind information"),
};
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }");
}
fn create_multi_return_function(call_conv: CallConv) -> Function {
let mut sig = Signature::new(call_conv);
sig.params.push(AbiParam::new(types::I32));
let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
let block0 = func.dfg.make_block();
let v0 = func.dfg.append_block_param(block0, types::I32);
let block1 = func.dfg.make_block();
let block2 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().brnz(v0, block2, &[]);
pos.ins().jump(block1, &[]);
pos.insert_block(block1);
pos.ins().return_(&[]);
pos.insert_block(block2);
pos.ins().return_(&[]);
func
}
}

View File

@@ -218,15 +218,11 @@ mod test {
let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
let code = &buffer.data[..];
// stp x29, x30, [sp, #-16]!
// mov x29, sp
// mov x1, #0x1234
// add w0, w0, w1
// ldp x29, x30, [sp], #16
// ret
let golden = vec![
0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0x81, 0x46, 0x82, 0xd2, 0x00, 0x00,
0x01, 0x0b, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
0x81, 0x46, 0x82, 0xd2, 0x00, 0x00, 0x01, 0x0b, 0xc0, 0x03, 0x5f, 0xd6,
];
assert_eq!(code, &golden[..]);
@@ -277,8 +273,6 @@ mod test {
.unwrap();
let code = &result.buffer.data[..];
// stp x29, x30, [sp, #-16]!
// mov x29, sp
// mov x1, #0x1234 // #4660
// add w0, w0, w1
// mov w1, w0
@@ -291,13 +285,11 @@ mod test {
// cbnz x1, 0x18
// mov x1, #0x1234 // #4660
// sub w0, w0, w1
// ldp x29, x30, [sp], #16
// ret
let golden = vec![
253, 123, 191, 169, 253, 3, 0, 145, 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161,
0, 0, 181, 129, 70, 130, 210, 1, 0, 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3,
0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 253, 123, 193, 168, 192, 3,
95, 214,
129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161, 0, 0, 181, 129, 70, 130, 210, 1, 0,
1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3, 0, 42, 97, 255, 255, 181, 129, 70,
130, 210, 0, 0, 1, 75, 192, 3, 95, 214,
];
assert_eq!(code, &golden[..]);

View File

@@ -316,8 +316,9 @@ impl ABIMachineSpec for Arm32MachineDeps {
/// nominal SP offset; caller will do that.
fn gen_clobber_save(
_call_conv: isa::CallConv,
_setup_frame: bool,
_flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
clobbered_callee_saves: &Vec<Writable<RealReg>>,
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> (u64, SmallVec<[Inst; 16]>) {
@@ -325,8 +326,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
if fixed_frame_storage_size > 0 {
insts.extend(Self::gen_sp_reg_adjust(-(fixed_frame_storage_size as i32)).into_iter());
}
let clobbered_vec = get_callee_saves(clobbers);
let mut clobbered_vec: Vec<_> = clobbered_vec
let mut clobbered_vec: Vec<_> = clobbered_callee_saves
.into_iter()
.map(|r| r.to_reg().to_reg())
.collect();
@@ -345,14 +345,14 @@ impl ABIMachineSpec for Arm32MachineDeps {
}
fn gen_clobber_restore(
_call_conv: isa::CallConv,
call_conv: isa::CallConv,
_flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
_fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> SmallVec<[Inst; 16]> {
let mut insts = SmallVec::new();
let clobbered_vec = get_callee_saves(clobbers);
let clobbered_vec = Self::get_clobbered_callee_saves(call_conv, clobbers);
let mut clobbered_vec: Vec<_> = clobbered_vec
.into_iter()
.map(|r| Writable::from_reg(r.to_reg().to_reg()))
@@ -468,6 +468,31 @@ impl ABIMachineSpec for Arm32MachineDeps {
) -> ir::ArgumentExtension {
specified
}
fn get_clobbered_callee_saves(
_call_conv: isa::CallConv,
regs: &Set<Writable<RealReg>>,
) -> Vec<Writable<RealReg>> {
let mut ret = Vec::new();
for &reg in regs.iter() {
if is_callee_save(reg.to_reg()) {
ret.push(reg);
}
}
// Sort registers for deterministic code output.
ret.sort_by_key(|r| r.to_reg().get_index());
ret
}
fn is_frame_setup_needed(
_is_leaf: bool,
_stack_args_size: u32,
_num_clobbered_callee_saves: usize,
_fixed_frame_storage_size: u32,
) -> bool {
true
}
}
fn is_callee_save(r: RealReg) -> bool {
@@ -475,19 +500,6 @@ fn is_callee_save(r: RealReg) -> bool {
4 <= enc && enc <= 10
}
fn get_callee_saves(regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
let mut ret = Vec::new();
for &reg in regs.iter() {
if is_callee_save(reg.to_reg()) {
ret.push(reg);
}
}
// Sort registers for deterministic code output.
ret.sort_by_key(|r| r.to_reg().get_index());
ret
}
fn is_reg_clobbered_by_call(r: RealReg) -> bool {
let enc = r.get_hw_encoding();
enc <= 3

View File

@@ -459,16 +459,25 @@ impl ABIMachineSpec for S390xMachineDeps {
// Returns stack bytes used as well as instructions. Does not adjust
// nominal SP offset; abi_impl generic code will do that.
fn gen_clobber_save(
call_conv: isa::CallConv,
_call_conv: isa::CallConv,
_setup_frame: bool,
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
clobbered_callee_saves: &Vec<Writable<RealReg>>,
fixed_frame_storage_size: u32,
outgoing_args_size: u32,
) -> (u64, SmallVec<[Inst; 16]>) {
let mut insts = SmallVec::new();
let mut clobbered_fpr = vec![];
let mut clobbered_gpr = vec![];
for &reg in clobbered_callee_saves.iter() {
match reg.to_reg().get_class() {
RegClass::I64 => clobbered_gpr.push(reg),
RegClass::F64 => clobbered_fpr.push(reg),
class => panic!("Unexpected RegClass: {:?}", class),
}
}
// Collect clobbered registers.
let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers);
let mut first_clobbered_gpr = 16;
for reg in clobbered_gpr {
let enc = reg.to_reg().get_hw_encoding();
@@ -718,6 +727,32 @@ impl ABIMachineSpec for S390xMachineDeps {
) -> ir::ArgumentExtension {
specified
}
fn get_clobbered_callee_saves(
call_conv: isa::CallConv,
regs: &Set<Writable<RealReg>>,
) -> Vec<Writable<RealReg>> {
let mut regs: Vec<Writable<RealReg>> = regs
.iter()
.cloned()
.filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg()))
.collect();
// Sort registers for deterministic code output. We can do an unstable
// sort because the registers will be unique (there are no dups).
regs.sort_unstable_by_key(|r| r.to_reg().get_index());
regs
}
fn is_frame_setup_needed(
_is_leaf: bool,
_stack_args_size: u32,
_num_clobbered_callee_saves: usize,
_fixed_frame_storage_size: u32,
) -> bool {
// The call frame set-up is handled by gen_clobber_save().
false
}
}
fn is_reg_saved_in_prologue(_call_conv: isa::CallConv, r: RealReg) -> bool {

View File

@@ -496,18 +496,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
}
fn gen_clobber_save(
call_conv: isa::CallConv,
_call_conv: isa::CallConv,
setup_frame: bool,
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
clobbered_callee_saves: &Vec<Writable<RealReg>>,
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> (u64, SmallVec<[Self::I; 16]>) {
let mut insts = SmallVec::new();
// Find all clobbered registers that are callee-save.
let clobbered = get_callee_saves(&call_conv, clobbers);
let clobbered_size = compute_clobber_size(&clobbered);
let clobbered_size = compute_clobber_size(&clobbered_callee_saves);
if flags.unwind_info() {
if flags.unwind_info() && setup_frame {
// Emit unwind info: start the frame. The frame (from unwind
// consumers' point of view) starts at clobbbers, just below
// the FP and return address. Spill slots and stack slots are
@@ -534,7 +533,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
// Store each clobbered register in order at offsets from RSP,
// placing them above the fixed frame slots.
let mut cur_offset = fixed_frame_storage_size;
for reg in &clobbered {
for reg in clobbered_callee_saves {
let r_reg = reg.to_reg();
let off = cur_offset;
match r_reg.get_class() {
@@ -579,14 +578,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
) -> SmallVec<[Self::I; 16]> {
let mut insts = SmallVec::new();
let clobbered = get_callee_saves(&call_conv, clobbers);
let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered);
let clobbered_callee_saves = Self::get_clobbered_callee_saves(call_conv, clobbers);
let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves);
// Restore regs by loading from offsets of RSP. RSP will be
// returned to nominal-RSP at this point, so we can use the
// same offsets that we used when saving clobbers above.
let mut cur_offset = fixed_frame_storage_size;
for reg in &clobbered {
for reg in &clobbered_callee_saves {
let rreg = reg.to_reg();
match rreg.get_class() {
RegClass::I64 => {
@@ -797,6 +796,47 @@ impl ABIMachineSpec for X64ABIMachineSpec {
ir::ArgumentExtension::None
}
}
fn get_clobbered_callee_saves(
call_conv: CallConv,
regs: &Set<Writable<RealReg>>,
) -> Vec<Writable<RealReg>> {
let mut regs: Vec<Writable<RealReg>> = match call_conv {
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
.iter()
.cloned()
.filter(|r| is_callee_save_baldrdash(r.to_reg()))
.collect(),
CallConv::BaldrdashWindows => {
todo!("baldrdash windows");
}
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => regs
.iter()
.cloned()
.filter(|r| is_callee_save_systemv(r.to_reg()))
.collect(),
CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => regs
.iter()
.cloned()
.filter(|r| is_callee_save_fastcall(r.to_reg()))
.collect(),
CallConv::Probestack => todo!("probestack?"),
CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(),
};
// Sort registers for deterministic code output. We can do an unstable sort because the
// registers will be unique (there are no dups).
regs.sort_unstable_by_key(|r| r.to_reg().get_index());
regs
}
fn is_frame_setup_needed(
_is_leaf: bool,
_stack_args_size: u32,
_num_clobbered_callee_saves: usize,
_fixed_frame_storage_size: u32,
) -> bool {
true
}
}
impl From<StackAMode> for SyntheticAmode {
@@ -984,35 +1024,6 @@ fn is_callee_save_fastcall(r: RealReg) -> bool {
}
}
fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
let mut regs: Vec<Writable<RealReg>> = match call_conv {
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
.iter()
.cloned()
.filter(|r| is_callee_save_baldrdash(r.to_reg()))
.collect(),
CallConv::BaldrdashWindows => {
todo!("baldrdash windows");
}
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => regs
.iter()
.cloned()
.filter(|r| is_callee_save_systemv(r.to_reg()))
.collect(),
CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => regs
.iter()
.cloned()
.filter(|r| is_callee_save_fastcall(r.to_reg()))
.collect(),
CallConv::Probestack => todo!("probestack?"),
CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(),
};
// Sort registers for deterministic code output. We can do an unstable sort because the
// registers will be unique (there are no dups).
regs.sort_unstable_by_key(|r| r.to_reg().get_index());
regs
}
fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
let mut clobbered_size = 0;
for reg in clobbers {