From e8f772c1ac08bf77a9b0b33eec2fb07bba354db2 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 18 Aug 2020 15:54:21 -0700 Subject: [PATCH] x64 new backend: port ABI implementation to shared infrastructure with AArch64. Previously, in #2128, we factored out a common "vanilla 64-bit ABI" implementation from the AArch64 ABI code, with the idea that this should be largely compatible with x64. This PR alters the new x64 backend to make use of the shared infrastructure, removing the duplication that existed previously. The generated code is nearly (not exactly) the same; the only difference relates to how the clobber-save region is padded in the prologue. This also changes some register allocations in the aarch64 code because call support in the shared ABI infra now passes a temp vreg in, rather than requiring use of a fixed, non-allocable temp; tests have been updated, and the runtime behavior is unchanged. --- cranelift/codegen/src/isa/aarch64/abi.rs | 59 +- .../codegen/src/isa/aarch64/inst/emit.rs | 2 +- .../codegen/src/isa/aarch64/lower_inst.rs | 4 +- cranelift/codegen/src/isa/aarch64/mod.rs | 2 +- cranelift/codegen/src/isa/x64/abi.rs | 1677 ++++++----------- cranelift/codegen/src/isa/x64/inst/mod.rs | 2 +- cranelift/codegen/src/isa/x64/lower.rs | 6 +- cranelift/codegen/src/isa/x64/mod.rs | 2 +- cranelift/codegen/src/machinst/abi.rs | 12 +- cranelift/codegen/src/machinst/abi_impl.rs | 123 +- cranelift/codegen/src/machinst/compile.rs | 2 +- cranelift/codegen/src/machinst/lower.rs | 10 +- cranelift/codegen/src/machinst/mod.rs | 2 +- cranelift/codegen/src/machinst/vcode.rs | 8 +- .../filetests/vcode/aarch64/call.clif | 16 +- .../filetests/vcode/aarch64/reftypes.clif | 4 +- .../filetests/vcode/aarch64/stack-limit.clif | 8 +- 17 files changed, 747 insertions(+), 1192 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 802269d5c8..c9e88f655b 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -13,16 +13,15 @@ use alloc::boxed::Box; use alloc::vec::Vec; use regalloc::{RealReg, Reg, RegClass, Set, Writable}; use smallvec::SmallVec; -use std::convert::TryFrom; // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because // these ABIs are very similar. /// Support for the AArch64 ABI from the callee side (within a function body). -pub type AArch64ABIBody = ABIBodyImpl; +pub(crate) type AArch64ABICallee = ABICalleeImpl; /// Support for the AArch64 ABI from the caller side (at a callsite). -pub type AArch64ABICall = ABICallImpl; +pub(crate) type AArch64ABICaller = ABICallerImpl; // Spidermonkey specific ABI convention. @@ -105,9 +104,9 @@ impl Into for StackAMode { /// AArch64-specific ABI behavior. This struct just serves as an implementation /// point for the trait; it is never actually instantiated. -pub struct AArch64MachineImpl; +pub(crate) struct AArch64MachineDeps; -impl ABIMachineImpl for AArch64MachineImpl { +impl ABIMachineSpec for AArch64MachineDeps { type I = Inst; fn compute_arg_locs( @@ -285,7 +284,8 @@ impl ABIMachineImpl for AArch64MachineImpl { Inst::Ret } - fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u64) -> SmallVec<[Inst; 4]> { + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u32) -> SmallVec<[Inst; 4]> { + let imm = imm as u64; let mut insts = SmallVec::new(); if let Some(imm12) = Imm12::maybe_from_u64(imm) { insts.push(Inst::AluRRImm12 { @@ -296,6 +296,7 @@ impl ABIMachineImpl for AArch64MachineImpl { }); } else { let scratch2 = writable_tmp2_reg(); + assert_ne!(scratch2.to_reg(), from_reg); insts.extend(Inst::load_constant(scratch2, imm.into())); insts.push(Inst::AluRRRExtend { alu_op: ALUOp::Add64, @@ -334,29 +335,29 @@ impl ABIMachineImpl for AArch64MachineImpl { Inst::LoadAddr { rd: into_reg, mem } } - fn get_fixed_tmp_reg() -> Reg { + fn get_stacklimit_reg() -> Reg { spilltmp_reg() } - fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i64, ty: Type) -> Inst { - let mem = AMode::RegOffset(base, offset, ty); + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset as i64, ty); Inst::gen_load(into_reg, mem, ty) } - fn gen_store_base_offset(base: Reg, offset: i64, from_reg: Reg, ty: Type) -> Inst { - let mem = AMode::RegOffset(base, offset, ty); + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset as i64, ty); Inst::gen_store(mem, from_reg, ty) } - fn gen_sp_reg_adjust(amount: i64) -> SmallVec<[Inst; 2]> { + fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Inst; 2]> { if amount == 0 { return SmallVec::new(); } let (amount, is_sub) = if amount > 0 { - (u64::try_from(amount).unwrap(), false) + (amount as u64, false) } else { - (u64::try_from(-amount).unwrap(), true) + (-amount as u64, true) }; let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 }; @@ -389,8 +390,10 @@ impl ABIMachineImpl for AArch64MachineImpl { ret } - fn gen_nominal_sp_adj(offset: i64) -> Inst { - Inst::VirtualSPOffsetAdj { offset } + fn gen_nominal_sp_adj(offset: i32) -> Inst { + Inst::VirtualSPOffsetAdj { + offset: offset as i64, + } } fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> { @@ -553,11 +556,12 @@ impl ABIMachineImpl for AArch64MachineImpl { defs: Vec>, loc: SourceLoc, opcode: ir::Opcode, - ) -> SmallVec<[(/* is_safepoint = */ bool, Inst); 2]> { + tmp: Writable, + ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> { let mut insts = SmallVec::new(); match &dest { &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(( - true, + InstIsSafepoint::Yes, Inst::Call { info: Box::new(CallInfo { dest: name.clone(), @@ -570,19 +574,19 @@ impl ABIMachineImpl for AArch64MachineImpl { )), &CallDest::ExtName(ref name, RelocDistance::Far) => { insts.push(( - false, + InstIsSafepoint::No, Inst::LoadExtName { - rd: writable_spilltmp_reg(), + rd: tmp, name: Box::new(name.clone()), offset: 0, srcloc: loc, }, )); insts.push(( - true, + InstIsSafepoint::Yes, Inst::CallInd { info: Box::new(CallIndInfo { - rn: spilltmp_reg(), + rn: tmp.to_reg(), uses, defs, loc, @@ -592,7 +596,7 @@ impl ABIMachineImpl for AArch64MachineImpl { )); } &CallDest::Reg(reg) => insts.push(( - true, + InstIsSafepoint::Yes, Inst::CallInd { info: Box::new(CallIndInfo { rn: *reg, @@ -608,7 +612,7 @@ impl ABIMachineImpl for AArch64MachineImpl { insts } - fn get_spillslot_size(rc: RegClass, ty: Type) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { // We allocate in terms of 8-byte slots. match (rc, ty) { (RegClass::I64, _) => 1, @@ -698,9 +702,10 @@ fn get_callee_saves( } } } - // Sort registers for deterministic code output. - int_saves.sort_by_key(|r| r.to_reg().get_index()); - vec_saves.sort_by_key(|r| r.to_reg().get_index()); + // Sort registers for deterministic code output. We can do an unstable sort because the + // registers will be unique (there are no dups). + int_saves.sort_unstable_by_key(|r| r.to_reg().get_index()); + vec_saves.sort_unstable_by_key(|r| r.to_reg().get_index()); (int_saves, vec_saves) } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index ef05a600dc..d88bb56ec7 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -440,7 +440,7 @@ pub struct EmitState { } impl MachInstEmitState for EmitState { - fn new(abi: &dyn ABIBody) -> Self { + fn new(abi: &dyn ABICallee) -> Self { EmitState { virtual_sp_offset: 0, nominal_sp_to_fp: abi.frame_size() as i64, diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 7917f9c374..8888bbcacd 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1837,7 +1837,7 @@ pub(crate) fn lower_insn_to_regs>( assert!(inputs.len() == sig.params.len()); assert!(outputs.len() == sig.returns.len()); ( - AArch64ABICall::from_func(sig, &extname, dist, loc)?, + AArch64ABICaller::from_func(sig, &extname, dist, loc)?, &inputs[..], ) } @@ -1846,7 +1846,7 @@ pub(crate) fn lower_insn_to_regs>( let sig = ctx.call_sig(insn).unwrap(); assert!(inputs.len() - 1 == sig.params.len()); assert!(outputs.len() == sig.returns.len()); - (AArch64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) + (AArch64ABICaller::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) } _ => unreachable!(), }; diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 99239c9469..bd34a58cf5 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -47,7 +47,7 @@ impl AArch64Backend { func: &Function, flags: settings::Flags, ) -> CodegenResult> { - let abi = Box::new(abi::AArch64ABIBody::new(func, flags)?); + let abi = Box::new(abi::AArch64ABICallee::new(func, flags)?); compile::compile::(func, self, abi) } } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index b74cb39cfc..8742ddaf6e 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -1,73 +1,612 @@ //! Implementation of the standard x64 ABI. -use crate::binemit::StackMap; -use crate::ir::{self, types, ArgumentExtension, StackSlot, Type}; +use crate::ir::types::*; +use crate::ir::{self, types, SourceLoc, TrapCode, Type}; +use crate::isa; use crate::isa::{x64::inst::*, CallConv}; +use crate::machinst::abi_impl::*; use crate::machinst::*; use crate::settings; use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; use args::*; -use log::trace; -use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; -use std::mem; +use regalloc::{RealReg, Reg, RegClass, Set, Writable}; +use smallvec::{smallvec, SmallVec}; +use std::convert::TryFrom; /// This is the limit for the size of argument and return-value areas on the /// stack. We place a reasonable limit here to avoid integer overflow issues /// with 32-bit arithmetic: for now, 128 MB. static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; -#[derive(Clone, Debug)] -enum ABIArg { - Reg(RealReg, ir::Type, ir::ArgumentExtension), - Stack(i64, ir::Type, ir::ArgumentExtension), +/// Try to fill a Baldrdash register, returning it if it was found. +fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { + if call_conv.extends_baldrdash() { + match ¶m.purpose { + &ir::ArgumentPurpose::VMContext => { + // This is SpiderMonkey's `WasmTlsReg`. + Some(ABIArg::Reg( + regs::r14().to_real_reg(), + types::I64, + param.extension, + )) + } + &ir::ArgumentPurpose::SignatureId => { + // This is SpiderMonkey's `WasmTableCallSigReg`. + Some(ABIArg::Reg( + regs::r10().to_real_reg(), + types::I64, + param.extension, + )) + } + _ => None, + } + } else { + None + } } -/// X64 ABI information shared between body (callee) and caller. -struct ABISig { - /// Argument locations (regs or stack slots). Stack offsets are relative to - /// SP on entry to function. - args: Vec, - /// Return-value locations. Stack offsets are relative to the return-area - /// pointer. - rets: Vec, - /// Space on stack used to store arguments. - stack_arg_space: i64, - /// Space on stack used to store return values. - stack_ret_space: i64, - /// Index in `args` of the stack-return-value-area argument. - stack_ret_arg: Option, - /// Calling convention used. - call_conv: CallConv, +/// Support for the x64 ABI from the callee side (within a function body). +pub(crate) type X64ABICallee = ABICalleeImpl; + +/// Support for the x64 ABI from the caller side (at a callsite). +pub(crate) type X64ABICaller = ABICallerImpl; + +/// Implementation of ABI primitives for x64. +pub(crate) struct X64ABIMachineSpec; + +impl ABIMachineSpec for X64ABIMachineSpec { + type I = Inst; + + fn compute_arg_locs( + call_conv: isa::CallConv, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + ) -> CodegenResult<(Vec, i64, Option)> { + let is_baldrdash = call_conv.extends_baldrdash(); + + let mut next_gpr = 0; + let mut next_vreg = 0; + let mut next_stack: u64 = 0; + let mut ret = vec![]; + + for i in 0..params.len() { + // Process returns backward, according to the SpiderMonkey ABI (which we + // adopt internally if `is_baldrdash` is set). + let param = match (args_or_rets, is_baldrdash) { + (ArgsOrRets::Args, _) => ¶ms[i], + (ArgsOrRets::Rets, false) => ¶ms[i], + (ArgsOrRets::Rets, true) => ¶ms[params.len() - 1 - i], + }; + + // Validate "purpose". + match ¶m.purpose { + &ir::ArgumentPurpose::VMContext + | &ir::ArgumentPurpose::Normal + | &ir::ArgumentPurpose::StackLimit + | &ir::ArgumentPurpose::SignatureId => {} + _ => panic!( + "Unsupported argument purpose {:?} in signature: {:?}", + param.purpose, params + ), + } + + let intreg = in_int_reg(param.value_type); + let vecreg = in_vec_reg(param.value_type); + debug_assert!(intreg || vecreg); + debug_assert!(!(intreg && vecreg)); + + let (next_reg, candidate) = if intreg { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr), + ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i), + }; + debug_assert!(candidate + .map(|r| r.get_class() == RegClass::I64) + .unwrap_or(true)); + (&mut next_gpr, candidate) + } else { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg), + ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i), + }; + debug_assert!(candidate + .map(|r| r.get_class() == RegClass::V128) + .unwrap_or(true)); + (&mut next_vreg, candidate) + }; + + if let Some(param) = try_fill_baldrdash_reg(call_conv, param) { + assert!(intreg); + ret.push(param); + } else if let Some(reg) = candidate { + ret.push(ABIArg::Reg( + reg.to_real_reg(), + param.value_type, + param.extension, + )); + *next_reg += 1; + } else { + // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte + // stack alignment happens separately after all args.) + let size = (param.value_type.bits() / 8) as u64; + let size = std::cmp::max(size, 8); + // Align. + debug_assert!(size.is_power_of_two()); + next_stack = (next_stack + size - 1) & !(size - 1); + ret.push(ABIArg::Stack( + next_stack as i64, + param.value_type, + param.extension, + )); + next_stack += size; + } + } + + if args_or_rets == ArgsOrRets::Rets && is_baldrdash { + ret.reverse(); + } + + let extra_arg = if add_ret_area_ptr { + debug_assert!(args_or_rets == ArgsOrRets::Args); + if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) { + ret.push(ABIArg::Reg( + reg.to_real_reg(), + types::I64, + ir::ArgumentExtension::None, + )); + } else { + ret.push(ABIArg::Stack( + next_stack as i64, + types::I64, + ir::ArgumentExtension::None, + )); + next_stack += 8; + } + Some(ret.len() - 1) + } else { + None + }; + + next_stack = (next_stack + 15) & !15; + + // To avoid overflow issues, limit the arg/return size to something reasonable. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((ret, next_stack as i64, extra_arg)) + } + + fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64 { + if call_conv.extends_baldrdash() { + let num_words = flags.baldrdash_prologue_words() as i64; + debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words"); + num_words * 8 + } else { + 16 // frame pointer + return address. + } + } + + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I { + let (is_int, ext_mode) = match ty { + types::B1 | types::B8 | types::I8 => (true, Some(ExtMode::BQ)), + types::B16 | types::I16 => (true, Some(ExtMode::WQ)), + types::B32 | types::I32 => (true, Some(ExtMode::LQ)), + types::B64 | types::I64 | types::R64 => (true, None), + types::F32 | types::F64 => (false, None), + _ => panic!("load_stack({})", ty), + }; + + let mem = SyntheticAmode::from(mem); + + if is_int { + match ext_mode { + Some(ext_mode) => Inst::movsx_rm_r( + ext_mode, + RegMem::mem(mem), + into_reg, + /* infallible load */ None, + ), + None => Inst::mov64_m_r(mem, into_reg, None /* infallible */), + } + } else { + let sse_op = match ty { + types::F32 => SseOpcode::Movss, + types::F64 => SseOpcode::Movsd, + _ => unreachable!(), + }; + Inst::xmm_mov( + sse_op, + RegMem::mem(mem), + into_reg, + None, /* infallible */ + ) + } + } + + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I { + let (is_int, size) = match ty { + types::B1 | types::B8 | types::I8 => (true, 1), + types::B16 | types::I16 => (true, 2), + types::B32 | types::I32 => (true, 4), + types::B64 | types::I64 | types::R64 => (true, 8), + types::F32 => (false, 4), + types::F64 => (false, 8), + _ => unimplemented!("store_stack({})", ty), + }; + + let mem = SyntheticAmode::from(mem); + + if is_int { + Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None) + } else { + let sse_op = match size { + 4 => SseOpcode::Movss, + 8 => SseOpcode::Movsd, + _ => unreachable!(), + }; + Inst::xmm_mov_r_m(sse_op, from_reg, mem, /* infallible store */ None) + } + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self::I { + Inst::gen_move(to_reg, from_reg, ty) + } + + /// Generate an integer-extend operation. + fn gen_extend( + to_reg: Writable, + from_reg: Reg, + is_signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Self::I { + let ext_mode = match from_bits { + 1 | 8 => ExtMode::BQ, + 16 => ExtMode::WQ, + 32 => ExtMode::LQ, + _ => panic!("Bad extension: {} bits to {} bits", from_bits, to_bits), + }; + if is_signed { + Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg, None) + } else { + Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg, None) + } + } + + fn gen_ret() -> Self::I { + Inst::Ret + } + + fn gen_epilogue_placeholder() -> Self::I { + Inst::EpiloguePlaceholder + } + + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u32) -> SmallVec<[Self::I; 4]> { + let mut ret = SmallVec::new(); + if from_reg != into_reg.to_reg() { + ret.push(Inst::gen_move(into_reg, from_reg, I64)); + } + ret.push(Inst::alu_rmi_r( + true, + AluRmiROpcode::Add, + RegMemImm::imm(imm), + into_reg, + )); + ret + } + + fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Self::I; 2]> { + smallvec![ + Inst::cmp_rmi_r(/* bytes = */ 8, RegMemImm::reg(regs::rsp()), limit_reg), + Inst::TrapIf { + // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp. + cc: CC::NBE, + srcloc: SourceLoc::default(), + trap_code: TrapCode::StackOverflow, + }, + ] + } + + fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, _ty: Type) -> Self::I { + let mem: SyntheticAmode = mem.into(); + Inst::lea(mem, into_reg) + } + + fn get_stacklimit_reg() -> Reg { + debug_assert!( + !is_callee_save_systemv(regs::r10().to_real_reg()) + && !is_callee_save_baldrdash(regs::r10().to_real_reg()) + ); + + // As per comment on trait definition, we must return a caller-save + // register here. + regs::r10() + } + + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Self::I { + assert_eq!(ty, I64); // only ever used for I64s. + let simm32 = offset as u32; + let mem = Amode::imm_reg(simm32, base); + Inst::mov64_m_r(mem, into_reg, None) + } + + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I { + assert_eq!(ty, I64); // only ever used for I64s. + let simm32 = offset as u32; + let mem = Amode::imm_reg(simm32, base); + Inst::mov_r_m(/* bytes = */ 8, from_reg, mem, None) + } + + fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Self::I; 2]> { + let (alu_op, amount) = if amount >= 0 { + (AluRmiROpcode::Add, amount) + } else { + (AluRmiROpcode::Sub, -amount) + }; + + let amount = amount as u32; + + smallvec![Inst::alu_rmi_r( + true, + alu_op, + RegMemImm::imm(amount), + Writable::from_reg(regs::rsp()), + )] + } + + fn gen_nominal_sp_adj(offset: i32) -> Self::I { + Inst::VirtualSPOffsetAdj { + offset: offset as i64, + } + } + + fn gen_prologue_frame_setup() -> SmallVec<[Self::I; 2]> { + let r_rsp = regs::rsp(); + let r_rbp = regs::rbp(); + let w_rbp = Writable::from_reg(r_rbp); + let mut insts = SmallVec::new(); + // RSP before the call will be 0 % 16. So here, it is 8 % 16. + insts.push(Inst::push64(RegMemImm::reg(r_rbp))); + // RSP is now 0 % 16 + insts.push(Inst::mov_r_r(true, r_rsp, w_rbp)); + insts + } + + fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]> { + let mut insts = SmallVec::new(); + insts.push(Inst::mov_r_r( + true, + regs::rbp(), + Writable::from_reg(regs::rsp()), + )); + insts.push(Inst::pop64(Writable::from_reg(regs::rbp()))); + insts + } + + fn gen_clobber_save( + call_conv: isa::CallConv, + clobbers: &Set>, + ) -> (u64, SmallVec<[Self::I; 16]>) { + let mut insts = SmallVec::new(); + // Find all clobbered registers that are callee-save. These are only I64 + // registers (all XMM registers are caller-save) so we can compute the + // total size of the needed stack space easily. + let clobbered = get_callee_saves(&call_conv, clobbers); + let stack_size = 8 * clobbered.len() as u32; + // Align to 16 bytes. + let stack_size = (stack_size + 15) & !15; + // Adjust the stack pointer downward with one `sub rsp, IMM` + // instruction. + if stack_size > 0 { + insts.push(Inst::alu_rmi_r( + true, + AluRmiROpcode::Sub, + RegMemImm::imm(stack_size), + Writable::from_reg(regs::rsp()), + )); + } + // Store each clobbered register in order at offsets from RSP. + let mut cur_offset = 0; + for reg in &clobbered { + let r_reg = reg.to_reg(); + match r_reg.get_class() { + RegClass::I64 => { + insts.push(Inst::mov_r_m( + /* bytes = */ 8, + r_reg.to_reg(), + Amode::imm_reg(cur_offset, regs::rsp()), + None, + )); + cur_offset += 8; + } + // No XMM regs are callee-save, so we do not need to implement + // this. + _ => unimplemented!(), + } + } + + (stack_size as u64, insts) + } + + fn gen_clobber_restore( + call_conv: isa::CallConv, + clobbers: &Set>, + ) -> SmallVec<[Self::I; 16]> { + let mut insts = SmallVec::new(); + + let clobbered = get_callee_saves(&call_conv, clobbers); + let stack_size = 8 * clobbered.len() as u32; + let stack_size = (stack_size + 15) & !15; + + // Restore regs by loading from offsets of RSP. + let mut cur_offset = 0; + for reg in &clobbered { + let rreg = reg.to_reg(); + match rreg.get_class() { + RegClass::I64 => { + insts.push(Inst::mov64_m_r( + Amode::imm_reg(cur_offset, regs::rsp()), + Writable::from_reg(rreg.to_reg()), + None, + )); + cur_offset += 8; + } + _ => unimplemented!(), + } + } + // Adjust RSP back upward. + if stack_size > 0 { + insts.push(Inst::alu_rmi_r( + true, + AluRmiROpcode::Add, + RegMemImm::imm(stack_size), + Writable::from_reg(regs::rsp()), + )); + } + + insts + } + + /// Generate a call instruction/sequence. + fn gen_call( + dest: &CallDest, + uses: Vec, + defs: Vec>, + loc: SourceLoc, + opcode: ir::Opcode, + tmp: Writable, + ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]> { + let mut insts = SmallVec::new(); + match dest { + &CallDest::ExtName(ref name, RelocDistance::Near) => { + insts.push(( + InstIsSafepoint::Yes, + Inst::call_known(name.clone(), uses, defs, loc, opcode), + )); + } + &CallDest::ExtName(ref name, RelocDistance::Far) => { + insts.push(( + InstIsSafepoint::No, + Inst::LoadExtName { + dst: tmp, + name: Box::new(name.clone()), + offset: 0, + srcloc: loc, + }, + )); + insts.push(( + InstIsSafepoint::Yes, + Inst::call_unknown(RegMem::reg(tmp.to_reg()), uses, defs, loc, opcode), + )); + } + &CallDest::Reg(reg) => { + insts.push(( + InstIsSafepoint::Yes, + Inst::call_unknown(RegMem::reg(reg), uses, defs, loc, opcode), + )); + } + } + insts + } + + fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { + // We allocate in terms of 8-byte slots. + match (rc, ty) { + (RegClass::I64, _) => 1, + (RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1, + (RegClass::V128, _) => 2, + _ => panic!("Unexpected register class!"), + } + } + + fn get_virtual_sp_offset_from_state(s: &::State) -> i64 { + s.virtual_sp_offset + } + + fn get_nominal_sp_to_fp(s: &::State) -> i64 { + s.nominal_sp_to_fp + } + + fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { + let mut caller_saved = vec![ + // Systemv calling convention: + // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved). + Writable::from_reg(regs::rsi()), + Writable::from_reg(regs::rdi()), + Writable::from_reg(regs::rax()), + Writable::from_reg(regs::rcx()), + Writable::from_reg(regs::rdx()), + Writable::from_reg(regs::r8()), + Writable::from_reg(regs::r9()), + Writable::from_reg(regs::r10()), + Writable::from_reg(regs::r11()), + // - XMM: all the registers! + Writable::from_reg(regs::xmm0()), + Writable::from_reg(regs::xmm1()), + Writable::from_reg(regs::xmm2()), + Writable::from_reg(regs::xmm3()), + Writable::from_reg(regs::xmm4()), + Writable::from_reg(regs::xmm5()), + Writable::from_reg(regs::xmm6()), + Writable::from_reg(regs::xmm7()), + Writable::from_reg(regs::xmm8()), + Writable::from_reg(regs::xmm9()), + Writable::from_reg(regs::xmm10()), + Writable::from_reg(regs::xmm11()), + Writable::from_reg(regs::xmm12()), + Writable::from_reg(regs::xmm13()), + Writable::from_reg(regs::xmm14()), + Writable::from_reg(regs::xmm15()), + ]; + + if call_conv.extends_baldrdash() { + caller_saved.push(Writable::from_reg(regs::r12())); + caller_saved.push(Writable::from_reg(regs::r13())); + // Not r14; implicitly preserved in the entry. + caller_saved.push(Writable::from_reg(regs::r15())); + caller_saved.push(Writable::from_reg(regs::rbx())); + } + + caller_saved + } } -pub(crate) struct X64ABIBody { - sig: ABISig, - - /// Offsets to each stack slot. - stack_slots: Vec, - - /// Total stack size of all the stack slots. - stack_slots_size: usize, - - /// The register holding the return-area pointer, if needed. - ret_area_ptr: Option>, - - /// Clobbered registers, as indicated by regalloc. - clobbered: Set>, - - /// Total number of spill slots, as indicated by regalloc. - num_spill_slots: Option, - - /// Calculated while creating the prologue, and used when creating the epilogue. Amount by - /// which RSP is adjusted downwards to allocate the spill area. - frame_size_bytes: Option, - - call_conv: CallConv, - - /// The settings controlling this function's compilation. - flags: settings::Flags, +impl From for SyntheticAmode { + fn from(amode: StackAMode) -> Self { + // We enforce a 128 MB stack-frame size limit above, so these + // `expect()`s should never fail. + match amode { + StackAMode::FPOffset(off, _ty) => { + let off = i32::try_from(off) + .expect("Offset in FPOffset is greater than 2GB; should hit impl limit first"); + let simm32 = off as u32; + SyntheticAmode::Real(Amode::ImmReg { + simm32, + base: regs::rbp(), + }) + } + StackAMode::NominalSPOffset(off, _ty) => { + let off = i32::try_from(off).expect( + "Offset in NominalSPOffset is greater than 2GB; should hit impl limit first", + ); + let simm32 = off as u32; + SyntheticAmode::nominal_sp_offset(simm32) + } + StackAMode::SPOffset(off, _ty) => { + let off = i32::try_from(off) + .expect("Offset in SPOffset is greater than 2GB; should hit impl limit first"); + let simm32 = off as u32; + SyntheticAmode::Real(Amode::ImmReg { + simm32, + base: regs::rsp(), + }) + } + } + } } fn in_int_reg(ty: types::Type) -> bool { @@ -202,1042 +741,26 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool { } } -fn get_callee_saves(call_conv: &CallConv, regs: Vec>) -> Vec> { - match call_conv { +fn get_callee_saves(call_conv: &CallConv, regs: &Set>) -> Vec> { + let mut regs: Vec> = match call_conv { CallConv::BaldrdashSystemV => regs - .into_iter() + .iter() + .cloned() .filter(|r| is_callee_save_baldrdash(r.to_reg())) .collect(), CallConv::BaldrdashWindows => { todo!("baldrdash windows"); } CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs - .into_iter() + .iter() + .cloned() .filter(|r| is_callee_save_systemv(r.to_reg())) .collect(), CallConv::WindowsFastcall => todo!("windows fastcall"), CallConv::Probestack => todo!("probestack?"), - } -} - -impl X64ABIBody { - /// Create a new body ABI instance. - pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { - let sig = ABISig::from_func_sig(&f.signature)?; - - let call_conv = f.signature.call_conv; - debug_assert!( - call_conv == CallConv::SystemV || call_conv.extends_baldrdash(), - "unsupported or unimplemented calling convention {}", - call_conv - ); - - // Compute stackslot locations and total stackslot size. - let mut stack_offset: usize = 0; - let mut stack_slots = vec![]; - for (stackslot, data) in f.stack_slots.iter() { - let off = stack_offset; - stack_offset += data.size as usize; - stack_offset = (stack_offset + 7) & !7; - debug_assert_eq!(stackslot.as_u32() as usize, stack_slots.len()); - stack_slots.push(off); - } - - Ok(Self { - sig, - stack_slots, - stack_slots_size: stack_offset, - ret_area_ptr: None, - clobbered: Set::empty(), - num_spill_slots: None, - frame_size_bytes: None, - call_conv: f.signature.call_conv.clone(), - flags, - }) - } - - /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return - /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg). - fn fp_to_arg_offset(&self) -> i64 { - if self.call_conv.extends_baldrdash() { - let num_words = self.flags.baldrdash_prologue_words() as i64; - debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words"); - num_words * 8 - } else { - 16 // frame pointer + return address. - } - } -} - -impl ABIBody for X64ABIBody { - type I = Inst; - - fn temp_needed(&self) -> bool { - self.sig.stack_ret_arg.is_some() - } - - fn init(&mut self, maybe_tmp: Option>) { - if self.sig.stack_ret_arg.is_some() { - assert!(maybe_tmp.is_some()); - self.ret_area_ptr = maybe_tmp; - } - } - - fn flags(&self) -> &settings::Flags { - &self.flags - } - - fn num_args(&self) -> usize { - self.sig.args.len() - } - fn num_retvals(&self) -> usize { - self.sig.rets.len() - } - fn num_stackslots(&self) -> usize { - self.stack_slots.len() - } - - fn liveins(&self) -> Set { - let mut set: Set = Set::empty(); - for arg in &self.sig.args { - if let &ABIArg::Reg(r, ..) = arg { - set.insert(r); - } - } - set - } - - fn liveouts(&self) -> Set { - let mut set: Set = Set::empty(); - for ret in &self.sig.rets { - if let &ABIArg::Reg(r, ..) = ret { - set.insert(r); - } - } - set - } - - fn gen_copy_arg_to_reg(&self, idx: usize, to_reg: Writable) -> Inst { - match &self.sig.args[idx] { - ABIArg::Reg(from_reg, ty, _) => Inst::gen_move(to_reg, from_reg.to_reg(), *ty), - &ABIArg::Stack(off, ty, _) => { - assert!( - self.fp_to_arg_offset() + off <= u32::max_value() as i64, - "large offset nyi" - ); - let from_addr = Amode::imm_reg((self.fp_to_arg_offset() + off) as u32, regs::rbp()); - Inst::load( - ty, - from_addr, - to_reg, - ExtKind::ZeroExtend, - /* infallible load */ None, - ) - } - } - } - - fn gen_retval_area_setup(&self) -> Option { - if let Some(i) = self.sig.stack_ret_arg { - let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap()); - trace!( - "gen_retval_area_setup: inst {:?}; ptr reg is {:?}", - inst, - self.ret_area_ptr.unwrap().to_reg() - ); - Some(inst) - } else { - trace!("gen_retval_area_setup: not needed"); - None - } - } - - fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Writable) -> Vec { - let mut ret = Vec::new(); - match &self.sig.rets[idx] { - &ABIArg::Reg(r, ty, ext) => { - let from_bits = ty.bits() as u8; - let ext_mode = match from_bits { - 1 | 8 => Some(ExtMode::BQ), - 16 => Some(ExtMode::WQ), - 32 => Some(ExtMode::LQ), - 64 | 128 => None, - _ => unreachable!(), - }; - - let dest_reg = Writable::from_reg(r.to_reg()); - match (ext, ext_mode) { - (ArgumentExtension::Uext, Some(ext_mode)) => { - ret.push(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - dest_reg, - /* infallible load */ None, - )); - } - (ArgumentExtension::Sext, Some(ext_mode)) => { - ret.push(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - dest_reg, - /* infallible load */ None, - )); - } - _ => ret.push(Inst::gen_move(dest_reg, from_reg.to_reg(), ty)), - }; - } - - &ABIArg::Stack(off, ty, ext) => { - let from_bits = ty.bits() as u8; - let ext_mode = match from_bits { - 1 | 8 => Some(ExtMode::BQ), - 16 => Some(ExtMode::WQ), - 32 => Some(ExtMode::LQ), - 64 => None, - _ => unreachable!(), - }; - - // Trash the from_reg; it should be its last use. - match (ext, ext_mode) { - (ArgumentExtension::Uext, Some(ext_mode)) => { - ret.push(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - from_reg, - /* infallible load */ None, - )); - } - (ArgumentExtension::Sext, Some(ext_mode)) => { - ret.push(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - from_reg, - /* infallible load */ None, - )); - } - _ => {} - }; - - assert!( - off < u32::max_value() as i64, - "large stack return offset nyi" - ); - - let from_reg = from_reg.to_reg(); - let to_mem = Amode::imm_reg(off as u32, self.ret_area_ptr.unwrap().to_reg()); - let store = Inst::store(ty, from_reg, to_mem, /* infallible store */ None); - ret.push(store) - } - } - - ret - } - - fn gen_ret(&self) -> Inst { - Inst::ret() - } - - fn gen_epilogue_placeholder(&self) -> Inst { - Inst::epilogue_placeholder() - } - - fn set_num_spillslots(&mut self, slots: usize) { - self.num_spill_slots = Some(slots); - } - - fn set_clobbered(&mut self, clobbered: Set>) { - self.clobbered = clobbered; - } - - fn stackslot_addr(&self, slot: StackSlot, offset: u32, dst: Writable) -> Inst { - let stack_off = self.stack_slots[slot.as_u32() as usize] as i64; - let sp_off: i64 = stack_off + (offset as i64); - Inst::lea(SyntheticAmode::nominal_sp_offset(sp_off as u32), dst) - } - - fn load_stackslot( - &self, - _slot: StackSlot, - _offset: u32, - _ty: Type, - _into_reg: Writable, - ) -> Inst { - unimplemented!("load_stackslot") - } - - fn store_stackslot(&self, _slot: StackSlot, _offset: u32, _ty: Type, _from_reg: Reg) -> Inst { - unimplemented!("store_stackslot") - } - - fn load_spillslot(&self, slot: SpillSlot, ty: Type, to_reg: Writable) -> Inst { - // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. - let islot = slot.get() as i64; - let spill_off = islot * 8; - let sp_off = self.stack_slots_size as i64 + spill_off; - debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI"); - trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - let from_addr = SyntheticAmode::nominal_sp_offset(sp_off as u32); - Inst::load( - ty, - from_addr, - to_reg, - ExtKind::ZeroExtend, - /* infallible load */ None, - ) - } - - fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst { - // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. - let islot = slot.get() as i64; - let spill_off = islot * 8; - let sp_off = self.stack_slots_size as i64 + spill_off; - debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI"); - trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - let to_mem = SyntheticAmode::nominal_sp_offset(sp_off as u32); - Inst::store(ty, from_reg, to_mem, /* infallible store */ None) - } - - fn spillslots_to_stack_map(&self, slots: &[SpillSlot], state: &EmitState) -> StackMap { - assert!(state.virtual_sp_offset >= 0); - trace!( - "spillslots_to_stack_map: slots = {:?}, state = {:?}", - slots, - state - ); - let map_size = (state.virtual_sp_offset + state.nominal_sp_to_fp) as u32; - let map_words = (map_size + 7) / 8; - let mut bits = std::iter::repeat(false) - .take(map_words as usize) - .collect::>(); - - let first_spillslot_word = (self.stack_slots_size + state.virtual_sp_offset as usize) / 8; - for &slot in slots { - let slot = slot.get() as usize; - bits[first_spillslot_word + slot] = true; - } - - StackMap::from_slice(&bits[..]) - } - - fn gen_prologue(&mut self) -> Vec { - let r_rsp = regs::rsp(); - - let mut insts = vec![]; - - // Baldrdash generates its own prologue sequence, so we don't have to. - if !self.call_conv.extends_baldrdash() { - let r_rbp = regs::rbp(); - let w_rbp = Writable::from_reg(r_rbp); - - // The "traditional" pre-preamble - // RSP before the call will be 0 % 16. So here, it is 8 % 16. - insts.push(Inst::push64(RegMemImm::reg(r_rbp))); - // RSP is now 0 % 16 - insts.push(Inst::mov_r_r(true, r_rsp, w_rbp)); - } - - let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); - let callee_saved_used: usize = clobbered - .iter() - .map(|reg| match reg.to_reg().get_class() { - RegClass::I64 => 8, - _ => todo!(), - }) - .sum(); - - let mut total_stacksize = self.stack_slots_size + 8 * self.num_spill_slots.unwrap(); - if self.call_conv.extends_baldrdash() { - // Baldrdash expects the stack to take at least the number of words set in - // baldrdash_prologue_words; count them here. - debug_assert!( - !self.flags.enable_probestack(), - "baldrdash does not expect cranelift to emit stack probes" - ); - total_stacksize += self.flags.baldrdash_prologue_words() as usize * 8; - } - - // Now make sure the frame stack is aligned, so RSP == 0 % 16 in the function's body. - let padding = (16 - ((total_stacksize + callee_saved_used) % 16)) & 15; - let frame_size = total_stacksize + padding; - debug_assert!( - frame_size <= u32::max_value() as usize, - "gen_prologue(x86): total_stacksize >= 2G" - ); - debug_assert_eq!((frame_size + callee_saved_used) % 16, 0, "misaligned stack"); - - if !self.call_conv.extends_baldrdash() { - // Explicitly allocate the frame. - let w_rsp = Writable::from_reg(r_rsp); - if frame_size > 0 { - insts.push(Inst::alu_rmi_r( - true, - AluRmiROpcode::Sub, - RegMemImm::imm(frame_size as u32), - w_rsp, - )); - } - } - - // Save callee saved registers that we trash. Keep track of how much space we've used, so - // as to know what we have to do to get the base of the spill area 0 % 16. - let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); - for reg in clobbered { - let r_reg = reg.to_reg(); - match r_reg.get_class() { - RegClass::I64 => { - insts.push(Inst::push64(RegMemImm::reg(r_reg.to_reg()))); - } - _ => unimplemented!(), - } - } - - if callee_saved_used > 0 { - insts.push(Inst::VirtualSPOffsetAdj { - offset: callee_saved_used as i64, - }); - } - - // Stash this value. We'll need it for the epilogue. - debug_assert!(self.frame_size_bytes.is_none()); - self.frame_size_bytes = Some(frame_size); - - insts - } - - fn gen_epilogue(&self) -> Vec { - let mut insts = vec![]; - - // Undo what we did in the prologue. - - // Restore regs. - let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); - for wreg in clobbered.into_iter().rev() { - let rreg = wreg.to_reg(); - match rreg.get_class() { - RegClass::I64 => { - // TODO: make these conversion sequences less cumbersome. - insts.push(Inst::pop64(Writable::from_reg(rreg.to_reg()))); - } - _ => unimplemented!(), - } - } - - // No need to adjust the virtual sp offset here: - // - this would create issues when there's a return in the middle of a function, - // - and nothing in this sequence may try to access stack slots from the nominal SP. - - // Clear the spill area and the 16-alignment padding below it. - if !self.call_conv.extends_baldrdash() { - let frame_size = self.frame_size_bytes.unwrap(); - if frame_size > 0 { - let r_rsp = regs::rsp(); - let w_rsp = Writable::from_reg(r_rsp); - insts.push(Inst::alu_rmi_r( - true, - AluRmiROpcode::Add, - RegMemImm::imm(frame_size as u32), - w_rsp, - )); - } - } - - // Baldrdash generates its own preamble. - if !self.call_conv.extends_baldrdash() { - // Undo the "traditional" pre-preamble - // RSP before the call will be 0 % 16. So here, it is 8 % 16. - insts.push(Inst::pop64(Writable::from_reg(regs::rbp()))); - insts.push(Inst::ret()); - } - - insts - } - - fn frame_size(&self) -> u32 { - self.frame_size_bytes - .expect("frame size not computed before prologue generation") as u32 - } - - fn stack_args_size(&self) -> u32 { - unimplemented!("I need to be computed!") - } - - fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { - // We allocate in terms of 8-byte slots. - match (rc, ty) { - (RegClass::I64, _) => 1, - (RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1, - (RegClass::V128, _) => 2, - _ => panic!("Unexpected register class!"), - } - } - - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Inst { - let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty); - self.store_spillslot(to_slot, ty, from_reg.to_reg()) - } - - fn gen_reload( - &self, - to_reg: Writable, - from_slot: SpillSlot, - ty: Option, - ) -> Inst { - let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty); - self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg())) - } -} - -/// Return a type either from an optional type hint, or if not, from the default -/// type associated with the given register's class. This is used to generate -/// loads/spills appropriately given the type of value loaded/stored (which may -/// be narrower than the spillslot). We usually have the type because the -/// regalloc usually provides the vreg being spilled/reloaded, and we know every -/// vreg's type. However, the regalloc *can* request a spill/reload without an -/// associated vreg when needed to satisfy a safepoint (which requires all -/// ref-typed values, even those in real registers in the original vcode, to be -/// in spillslots). -fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { - match (ty, r.get_class()) { - // If the type is provided - (Some(t), _) => t, - // If no type is provided, this should be a register spill for a - // safepoint, so we only expect I64 (integer) registers. - (None, RegClass::I64) => types::I64, - _ => panic!("Unexpected register class!"), - } -} - -fn get_caller_saves(call_conv: CallConv) -> Vec> { - let mut caller_saved = Vec::new(); - - // Systemv calling convention: - // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved). - caller_saved.push(Writable::from_reg(regs::rsi())); - caller_saved.push(Writable::from_reg(regs::rdi())); - caller_saved.push(Writable::from_reg(regs::rax())); - caller_saved.push(Writable::from_reg(regs::rcx())); - caller_saved.push(Writable::from_reg(regs::rdx())); - caller_saved.push(Writable::from_reg(regs::r8())); - caller_saved.push(Writable::from_reg(regs::r9())); - caller_saved.push(Writable::from_reg(regs::r10())); - caller_saved.push(Writable::from_reg(regs::r11())); - - if call_conv.extends_baldrdash() { - caller_saved.push(Writable::from_reg(regs::r12())); - caller_saved.push(Writable::from_reg(regs::r13())); - // Not r14; implicitly preserved in the entry. - caller_saved.push(Writable::from_reg(regs::r15())); - caller_saved.push(Writable::from_reg(regs::rbx())); - } - - // - XMM: all the registers! - caller_saved.push(Writable::from_reg(regs::xmm0())); - caller_saved.push(Writable::from_reg(regs::xmm1())); - caller_saved.push(Writable::from_reg(regs::xmm2())); - caller_saved.push(Writable::from_reg(regs::xmm3())); - caller_saved.push(Writable::from_reg(regs::xmm4())); - caller_saved.push(Writable::from_reg(regs::xmm5())); - caller_saved.push(Writable::from_reg(regs::xmm6())); - caller_saved.push(Writable::from_reg(regs::xmm7())); - caller_saved.push(Writable::from_reg(regs::xmm8())); - caller_saved.push(Writable::from_reg(regs::xmm9())); - caller_saved.push(Writable::from_reg(regs::xmm10())); - caller_saved.push(Writable::from_reg(regs::xmm11())); - caller_saved.push(Writable::from_reg(regs::xmm12())); - caller_saved.push(Writable::from_reg(regs::xmm13())); - caller_saved.push(Writable::from_reg(regs::xmm14())); - caller_saved.push(Writable::from_reg(regs::xmm15())); - - caller_saved -} - -fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { - // Compute uses: all arg regs. - let mut uses = Vec::new(); - for arg in &sig.args { - match arg { - &ABIArg::Reg(reg, ..) => uses.push(reg.to_reg()), - _ => {} - } - } - - // Compute defs: all retval regs, and all caller-save (clobbered) regs. - let mut defs = get_caller_saves(sig.call_conv); - for ret in &sig.rets { - match ret { - &ABIArg::Reg(reg, ..) => defs.push(Writable::from_reg(reg.to_reg())), - _ => {} - } - } - - (uses, defs) -} - -/// Try to fill a Baldrdash register, returning it if it was found. -fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { - if call_conv.extends_baldrdash() { - match ¶m.purpose { - &ir::ArgumentPurpose::VMContext => { - // This is SpiderMonkey's `WasmTlsReg`. - Some(ABIArg::Reg( - regs::r14().to_real_reg(), - types::I64, - param.extension, - )) - } - &ir::ArgumentPurpose::SignatureId => { - // This is SpiderMonkey's `WasmTableCallSigReg`. - Some(ABIArg::Reg( - regs::r10().to_real_reg(), - types::I64, - param.extension, - )) - } - _ => None, - } - } else { - None - } -} - -/// Are we computing information about arguments or return values? Much of the -/// handling is factored out into common routines; this enum allows us to -/// distinguish which case we're handling. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum ArgsOrRets { - Args, - Rets, -} - -/// Process a list of parameters or return values and allocate them to X-regs, -/// V-regs, and stack slots. -/// -/// Returns the list of argument locations, the stack-space used (rounded up -/// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the -/// index of the extra synthetic arg that was added. -fn compute_arg_locs( - call_conv: CallConv, - params: &[ir::AbiParam], - args_or_rets: ArgsOrRets, - add_ret_area_ptr: bool, -) -> CodegenResult<(Vec, i64, Option)> { - let is_baldrdash = call_conv.extends_baldrdash(); - - let mut next_gpr = 0; - let mut next_vreg = 0; - let mut next_stack: u64 = 0; - let mut ret = vec![]; - - for i in 0..params.len() { - // Process returns backward, according to the SpiderMonkey ABI (which we - // adopt internally if `is_baldrdash` is set). - let param = match (args_or_rets, is_baldrdash) { - (ArgsOrRets::Args, _) => ¶ms[i], - (ArgsOrRets::Rets, false) => ¶ms[i], - (ArgsOrRets::Rets, true) => ¶ms[params.len() - 1 - i], - }; - - // Validate "purpose". - match ¶m.purpose { - &ir::ArgumentPurpose::VMContext - | &ir::ArgumentPurpose::Normal - | &ir::ArgumentPurpose::StackLimit - | &ir::ArgumentPurpose::SignatureId => {} - _ => panic!( - "Unsupported argument purpose {:?} in signature: {:?}", - param.purpose, params - ), - } - - let intreg = in_int_reg(param.value_type); - let vecreg = in_vec_reg(param.value_type); - debug_assert!(intreg || vecreg); - debug_assert!(!(intreg && vecreg)); - - let (next_reg, candidate) = if intreg { - let candidate = match args_or_rets { - ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr), - ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i), - }; - debug_assert!(candidate - .map(|r| r.get_class() == RegClass::I64) - .unwrap_or(true)); - (&mut next_gpr, candidate) - } else { - let candidate = match args_or_rets { - ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg), - ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i), - }; - debug_assert!(candidate - .map(|r| r.get_class() == RegClass::V128) - .unwrap_or(true)); - (&mut next_vreg, candidate) - }; - - if let Some(param) = try_fill_baldrdash_reg(call_conv, param) { - assert!(intreg); - ret.push(param); - } else if let Some(reg) = candidate { - ret.push(ABIArg::Reg( - reg.to_real_reg(), - param.value_type, - param.extension, - )); - *next_reg += 1; - } else { - // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte - // stack alignment happens separately after all args.) - let size = (param.value_type.bits() / 8) as u64; - let size = std::cmp::max(size, 8); - // Align. - debug_assert!(size.is_power_of_two()); - next_stack = (next_stack + size - 1) & !(size - 1); - ret.push(ABIArg::Stack( - next_stack as i64, - param.value_type, - param.extension, - )); - next_stack += size; - } - } - - if args_or_rets == ArgsOrRets::Rets && is_baldrdash { - ret.reverse(); - } - - let extra_arg = if add_ret_area_ptr { - debug_assert!(args_or_rets == ArgsOrRets::Args); - if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) { - ret.push(ABIArg::Reg( - reg.to_real_reg(), - types::I64, - ir::ArgumentExtension::None, - )); - } else { - ret.push(ABIArg::Stack( - next_stack as i64, - types::I64, - ir::ArgumentExtension::None, - )); - next_stack += 8; - } - Some(ret.len() - 1) - } else { - None }; - - next_stack = (next_stack + 15) & !15; - - // To avoid overflow issues, limit the arg/return size to something reasonable. - if next_stack > STACK_ARG_RET_SIZE_LIMIT { - return Err(CodegenError::ImplLimitExceeded); - } - - Ok((ret, next_stack as i64, extra_arg)) -} - -impl ABISig { - fn from_func_sig(sig: &ir::Signature) -> CodegenResult { - // Compute args and retvals from signature. Handle retvals first, - // because we may need to add a return-area arg to the args. - let (rets, stack_ret_space, _) = compute_arg_locs( - sig.call_conv, - &sig.returns, - ArgsOrRets::Rets, - /* extra ret-area ptr = */ false, - )?; - let need_stack_return_area = stack_ret_space > 0; - let (args, stack_arg_space, stack_ret_arg) = compute_arg_locs( - sig.call_conv, - &sig.params, - ArgsOrRets::Args, - need_stack_return_area, - )?; - - trace!( - "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", - sig, - args, - rets, - stack_arg_space, - stack_ret_space, - stack_ret_arg - ); - - Ok(ABISig { - args, - rets, - stack_arg_space, - stack_ret_space, - stack_ret_arg, - call_conv: sig.call_conv, - }) - } -} - -enum CallDest { - ExtName(ir::ExternalName, RelocDistance), - Reg(Reg), -} - -fn adjust_stack>(ctx: &mut C, amount: u64, is_sub: bool) { - if amount == 0 { - return; - } - - let (alu_op, sp_adjustment) = if is_sub { - (AluRmiROpcode::Sub, amount as i64) - } else { - (AluRmiROpcode::Add, -(amount as i64)) - }; - - ctx.emit(Inst::VirtualSPOffsetAdj { - offset: sp_adjustment, - }); - - if amount <= u32::max_value() as u64 { - ctx.emit(Inst::alu_rmi_r( - true, - alu_op, - RegMemImm::imm(amount as u32), - Writable::from_reg(regs::rsp()), - )); - } else { - // TODO will require a scratch register. - unimplemented!("adjust stack with large offset"); - } -} - -/// X64 ABI object for a function call. -pub struct X64ABICall { - sig: ABISig, - uses: Vec, - defs: Vec>, - dest: CallDest, - loc: ir::SourceLoc, - opcode: ir::Opcode, -} - -impl X64ABICall { - /// Create a callsite ABI object for a call directly to the specified function. - pub fn from_func( - sig: &ir::Signature, - extname: &ir::ExternalName, - dist: RelocDistance, - loc: ir::SourceLoc, - ) -> CodegenResult { - let sig = ABISig::from_func_sig(sig)?; - let (uses, defs) = abisig_to_uses_and_defs(&sig); - Ok(Self { - sig, - uses, - defs, - dest: CallDest::ExtName(extname.clone(), dist), - loc, - opcode: ir::Opcode::Call, - }) - } - - /// Create a callsite ABI object for a call to a function pointer with the - /// given signature. - pub fn from_ptr( - sig: &ir::Signature, - ptr: Reg, - loc: ir::SourceLoc, - opcode: ir::Opcode, - ) -> CodegenResult { - let sig = ABISig::from_func_sig(sig)?; - let (uses, defs) = abisig_to_uses_and_defs(&sig); - Ok(Self { - sig, - uses, - defs, - dest: CallDest::Reg(ptr), - loc, - opcode, - }) - } -} - -impl ABICall for X64ABICall { - type I = Inst; - - fn num_args(&self) -> usize { - if self.sig.stack_ret_arg.is_some() { - self.sig.args.len() - 1 - } else { - self.sig.args.len() - } - } - - fn emit_stack_pre_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack(ctx, off as u64, /* is_sub = */ true) - } - - fn emit_stack_post_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack(ctx, off as u64, /* is_sub = */ false) - } - - fn emit_copy_reg_to_arg>( - &self, - ctx: &mut C, - idx: usize, - from_reg: Reg, - ) { - match &self.sig.args[idx] { - &ABIArg::Reg(reg, ty, ext) if ext != ir::ArgumentExtension::None && ty.bits() < 64 => { - assert_eq!(RegClass::I64, reg.get_class()); - let dest_reg = Writable::from_reg(reg.to_reg()); - let ext_mode = match ty.bits() { - 1 | 8 => ExtMode::BQ, - 16 => ExtMode::WQ, - 32 => ExtMode::LQ, - _ => unreachable!(), - }; - match ext { - ir::ArgumentExtension::Uext => { - ctx.emit(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - ir::ArgumentExtension::Sext => { - ctx.emit(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - _ => unreachable!(), - }; - } - &ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move( - Writable::from_reg(reg.to_reg()), - from_reg, - ty, - )), - &ABIArg::Stack(off, ty, ext) => { - if ext != ir::ArgumentExtension::None && ty.bits() < 64 { - assert_eq!(RegClass::I64, from_reg.get_class()); - let dest_reg = Writable::from_reg(from_reg); - let ext_mode = match ty.bits() { - 1 | 8 => ExtMode::BQ, - 16 => ExtMode::WQ, - 32 => ExtMode::LQ, - _ => unreachable!(), - }; - // Extend in place in the source register. Our convention is to - // treat high bits as undefined for values in registers, so this - // is safe, even for an argument that is nominally read-only. - match ext { - ir::ArgumentExtension::Uext => { - ctx.emit(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - ir::ArgumentExtension::Sext => { - ctx.emit(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - _ => unreachable!(), - }; - } - - debug_assert!(off <= u32::max_value() as i64); - debug_assert!(off >= 0); - let to_mem = Amode::imm_reg(off as u32, regs::rsp()); - let store = Inst::store(ty, from_reg, to_mem, /* infallible store */ None); - ctx.emit(store) - } - } - } - - fn emit_copy_retval_to_reg>( - &self, - ctx: &mut C, - idx: usize, - to_reg: Writable, - ) { - match &self.sig.rets[idx] { - &ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move(to_reg, reg.to_reg(), ty)), - &ABIArg::Stack(off, ty, _) => { - let ret_area_base = self.sig.stack_arg_space; - let sp_offset = off + ret_area_base; - // TODO handle offsets bigger than u32::max - debug_assert!(sp_offset >= 0); - debug_assert!(sp_offset <= u32::max_value() as i64); - let from_addr = Amode::imm_reg(sp_offset as u32, regs::rsp()); - let load = Inst::load( - ty, - from_addr, - to_reg, - ExtKind::ZeroExtend, - /* infallible load */ None, - ); - ctx.emit(load); - } - } - } - - fn emit_call>(&mut self, ctx: &mut C) { - let (uses, defs) = ( - mem::replace(&mut self.uses, Default::default()), - mem::replace(&mut self.defs, Default::default()), - ); - - if let Some(i) = self.sig.stack_ret_arg { - let dst = ctx.alloc_tmp(RegClass::I64, types::I64); - let ret_area_base = self.sig.stack_arg_space; - debug_assert!( - ret_area_base <= u32::max_value() as i64, - "large offset for ret area NYI" - ); - ctx.emit(Inst::lea( - Amode::imm_reg(ret_area_base as u32, regs::rsp()), - dst, - )); - self.emit_copy_reg_to_arg(ctx, i, dst.to_reg()); - } - - match &self.dest { - &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit_safepoint( - Inst::call_known(name.clone(), uses, defs, self.loc, self.opcode), - ), - &CallDest::ExtName(ref name, RelocDistance::Far) => { - let tmp = ctx.alloc_tmp(RegClass::I64, types::I64); - ctx.emit(Inst::LoadExtName { - dst: tmp, - name: Box::new(name.clone()), - offset: 0, - srcloc: self.loc, - }); - ctx.emit_safepoint(Inst::call_unknown( - RegMem::reg(tmp.to_reg()), - uses, - defs, - self.loc, - self.opcode, - )); - } - &CallDest::Reg(reg) => ctx.emit_safepoint(Inst::call_unknown( - RegMem::reg(reg), - uses, - defs, - self.loc, - self.opcode, - )), - } - } + // Sort registers for deterministic code output. We can do an unstable sort because the + // registers will be unique (there are no dups). + regs.sort_unstable_by_key(|r| r.to_reg().get_index()); + regs } diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 7c3a83e98f..32dadd6210 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -2498,7 +2498,7 @@ impl MachInstEmit for Inst { } impl MachInstEmitState for EmitState { - fn new(abi: &dyn ABIBody) -> Self { + fn new(abi: &dyn ABICallee) -> Self { EmitState { virtual_sp_offset: 0, nominal_sp_to_fp: abi.frame_size() as i64, diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 3e455a9688..c13057090a 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -396,7 +396,7 @@ fn emit_vm_call>( let sig = make_libcall_sig(ctx, insn, call_conv, types::I64); let loc = ctx.srcloc(insn); - let mut abi = X64ABICall::from_func(&sig, &extname, dist, loc)?; + let mut abi = X64ABICaller::from_func(&sig, &extname, dist, loc)?; abi.emit_stack_pre_adjust(ctx); @@ -1277,7 +1277,7 @@ fn lower_insn_to_regs>( assert_eq!(inputs.len(), sig.params.len()); assert_eq!(outputs.len(), sig.returns.len()); ( - X64ABICall::from_func(sig, &extname, dist, loc)?, + X64ABICaller::from_func(sig, &extname, dist, loc)?, &inputs[..], ) } @@ -1287,7 +1287,7 @@ fn lower_insn_to_regs>( let sig = ctx.call_sig(insn).unwrap(); assert_eq!(inputs.len() - 1, sig.params.len()); assert_eq!(outputs.len(), sig.returns.len()); - (X64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) + (X64ABICaller::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) } _ => unreachable!(), diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index c54cfe6ad6..5dfc078a75 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -41,7 +41,7 @@ impl X64Backend { fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult> { // This performs lowering to VCode, register-allocates the code, computes // block layout and finalizes branches. The result is ready for binary emission. - let abi = Box::new(abi::X64ABIBody::new(&func, flags)?); + let abi = Box::new(abi::X64ABICallee::new(&func, flags)?); compile::compile::(&func, self, abi) } } diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 7f44941959..e7c79a4de7 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -9,7 +9,7 @@ use regalloc::{Reg, Set, SpillSlot, Writable}; /// Trait implemented by an object that tracks ABI-related state (e.g., stack /// layout) and can generate code while emitting the *body* of a function. -pub trait ABIBody { +pub trait ABICallee { /// The instruction type for the ISA associated with this ABI. type I: VCodeInst; @@ -17,7 +17,7 @@ pub trait ABIBody { /// as the `maybe_tmp` arg if so. fn temp_needed(&self) -> bool; - /// Initialize. This is called after the ABIBody is constructed because it + /// Initialize. This is called after the ABICallee is constructed because it /// may be provided with a temp vreg, which can only be allocated once the /// lowering context exists. fn init(&mut self, maybe_tmp: Option>); @@ -155,14 +155,14 @@ pub trait ABIBody { /// callsite. It will usually be computed from the called function's /// signature. /// -/// Unlike `ABIBody` above, methods on this trait are not invoked directly +/// Unlike `ABICallee` above, methods on this trait are not invoked directly /// by the machine-independent code. Rather, the machine-specific lowering -/// code will typically create an `ABICall` when creating machine instructions +/// code will typically create an `ABICaller` when creating machine instructions /// for an IR call instruction inside `lower()`, directly emit the arg and /// and retval copies, and attach the register use/def info to the call. /// /// This trait is thus provided for convenience to the backends. -pub trait ABICall { +pub trait ABICaller { /// The instruction type for the ISA associated with this ABI. type I: VCodeInst; @@ -203,6 +203,6 @@ pub trait ABICall { /// sense.) /// /// This function should only be called once, as it is allowed to re-use - /// parts of the ABICall object in emitting instructions. + /// parts of the ABICaller object in emitting instructions. fn emit_call>(&mut self, ctx: &mut C); } diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index b90bc7916b..c332bfb1c8 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -119,6 +119,7 @@ use crate::{ir, isa}; use alloc::vec::Vec; use log::{debug, trace}; use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; +use std::convert::TryFrom; use std::marker::PhantomData; use std::mem; @@ -142,6 +143,16 @@ pub enum ArgsOrRets { Rets, } +/// Is an instruction returned by an ABI machine-specific backend a safepoint, +/// or not? +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum InstIsSafepoint { + /// The instruction is a safepoint. + Yes, + /// The instruction is not a safepoint. + No, +} + /// Abstract location for a machine-specific ABI impl to translate into the /// appropriate addressing mode. #[derive(Clone, Copy, Debug)] @@ -160,7 +171,7 @@ pub enum StackAMode { /// Trait implemented by machine-specific backend to provide information about /// register assignments and to allow generating the specific instructions for /// stack loads/saves, prologues/epilogues, etc. -pub trait ABIMachineImpl { +pub trait ABIMachineSpec { /// The instruction type. type I: VCodeInst; @@ -207,13 +218,15 @@ pub trait ABIMachineImpl { fn gen_epilogue_placeholder() -> Self::I; /// Generate an add-with-immediate. Note that even if this uses a scratch - /// register, the sequence must still be correct if the given source or dest - /// is the register returned by `get_fixed_tmp_reg()`; hence, for machines - /// that may need a scratch register to synthesize an arbitrary constant, - /// the machine backend should reserve *another* fixed temp register for - /// this purpose. (E.g., on AArch64, x16 is the ordinary fixed tmp, and x17 - /// is the secondary fixed tmp used to implement this.) - fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u64) -> SmallVec<[Self::I; 4]>; + /// register, it must satisfy two requirements: + /// + /// - The add-imm sequence must only clobber caller-save registers, because + /// it will be placed in the prologue before the clobbered callee-save + /// registers are saved. + /// + /// - The add-imm sequence must work correctly when `from_reg` and/or + /// `into_reg` are the register returned by `get_stacklimit_reg()`. + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u32) -> SmallVec<[Self::I; 4]>; /// Generate a sequence that traps with a `TrapCode::StackOverflow` code if /// the stack pointer is less than the given limit register (assuming the @@ -224,21 +237,30 @@ pub trait ABIMachineImpl { /// SP-based offset). fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I; - /// Get a fixed (not used by regalloc) temp. This is needed for certain - /// sequences generated after the register allocator has already run. - fn get_fixed_tmp_reg() -> Reg; + /// Get a fixed register to use to compute a stack limit. This is needed for + /// certain sequences generated after the register allocator has already + /// run. This must satisfy two requirements: + /// + /// - It must be a caller-save register, because it will be clobbered in the + /// prologue before the clobbered callee-save registers are saved. + /// + /// - It must be safe to pass as an argument and/or destination to + /// `gen_add_imm()`. This is relevant when an addition with a large + /// immediate needs its own temporary; it cannot use the same fixed + /// temporary as this one. + fn get_stacklimit_reg() -> Reg; /// Generate a store to the given [base+offset] address. - fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i64, ty: Type) -> Self::I; + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Self::I; /// Generate a load from the given [base+offset] address. - fn gen_store_base_offset(base: Reg, offset: i64, from_reg: Reg, ty: Type) -> Self::I; + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I; /// Adjust the stack pointer up or down. - fn gen_sp_reg_adjust(amount: i64) -> SmallVec<[Self::I; 2]>; + fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Self::I; 2]>; /// Generate a meta-instruction that adjusts the nominal SP offset. - fn gen_nominal_sp_adj(amount: i64) -> Self::I; + fn gen_nominal_sp_adj(amount: i32) -> Self::I; /// Generate the usual frame-setup sequence for this architecture: e.g., /// `push rbp / mov rbp, rsp` on x86-64, or `stp fp, lr, [sp, #-16]!` on @@ -272,18 +294,20 @@ pub trait ABIMachineImpl { clobbers: &Set>, ) -> SmallVec<[Self::I; 16]>; - /// Generate a call instruction/sequence. + /// Generate a call instruction/sequence. This method is provided one + /// temporary register to use to synthesize the called address, if needed. fn gen_call( dest: &CallDest, uses: Vec, defs: Vec>, loc: SourceLoc, opcode: ir::Opcode, - ) -> SmallVec<[(/* is_safepoint = */ bool, Self::I); 2]>; + tmp: Writable, + ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>; /// Get the number of spillslots required for the given register-class and /// type. - fn get_spillslot_size(rc: RegClass, ty: Type) -> u32; + fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32; /// Get the current virtual-SP offset from an instruction-emission state. fn get_virtual_sp_offset_from_state(s: &::State) -> i64; @@ -314,7 +338,7 @@ struct ABISig { } impl ABISig { - fn from_func_sig(sig: &ir::Signature) -> CodegenResult { + fn from_func_sig(sig: &ir::Signature) -> CodegenResult { // Compute args and retvals from signature. Handle retvals first, // because we may need to add a return-area arg to the args. let (rets, stack_ret_space, _) = M::compute_arg_locs( @@ -353,7 +377,7 @@ impl ABISig { } /// ABI object for a function body. -pub struct ABIBodyImpl { +pub struct ABICalleeImpl { /// Signature: arg and retval regs. sig: ABISig, /// Offsets to each stackslot. @@ -405,7 +429,7 @@ fn get_special_purpose_param_register( } } -impl ABIBodyImpl { +impl ABICalleeImpl { /// Create a new body ABI instance. pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { debug!("ABI: func signature {:?}", f.signature); @@ -506,8 +530,7 @@ impl ABIBodyImpl { // `scratch`. If our stack size doesn't fit into an immediate this // means we need a second scratch register for loading the stack size // into a register. - let scratch = Writable::from_reg(M::get_fixed_tmp_reg()); - let stack_size = u64::from(stack_size); + let scratch = Writable::from_reg(M::get_stacklimit_reg()); insts.extend(M::gen_add_imm(scratch, stack_limit, stack_size).into_iter()); insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg())); } @@ -532,7 +555,7 @@ impl ABIBodyImpl { /// temporary register to store values in if necessary. Currently after we write /// to this register there's guaranteed to be no spilled values between where /// it's used, because we're not participating in register allocation anyway! -fn gen_stack_limit( +fn gen_stack_limit( f: &ir::Function, abi: &ABISig, gv: ir::GlobalValue, @@ -542,7 +565,7 @@ fn gen_stack_limit( return (reg, insts); } -fn generate_gv( +fn generate_gv( f: &ir::Function, abi: &ABISig, gv: ir::GlobalValue, @@ -563,7 +586,7 @@ fn generate_gv( readonly: _, } => { let base = generate_gv::(f, abi, base, insts); - let into_reg = Writable::from_reg(M::get_fixed_tmp_reg()); + let into_reg = Writable::from_reg(M::get_stacklimit_reg()); insts.push(M::gen_load_base_offset(into_reg, base, offset.into(), I64)); return into_reg.to_reg(); } @@ -591,7 +614,7 @@ fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { } } -impl ABIBody for ABIBodyImpl { +impl ABICallee for ABICalleeImpl { type I = M::I; fn temp_needed(&self) -> bool { @@ -676,6 +699,11 @@ impl ABIBody for ABIBodyImpl { } &ABIArg::Stack(off, mut ty, ext) => { let from_bits = ty_bits(ty) as u8; + // A machine ABI implementation should ensure that stack frames + // have "reasonable" size. All current ABIs for machinst + // backends (aarch64 and x64) enforce a 128MB limit. + let off = i32::try_from(off) + .expect("Argument stack offset greater than 2GB; should hit impl limit first"); // Trash the from_reg; it should be its last use. match (ext, from_bits) { (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) if n < 64 => { @@ -864,7 +892,7 @@ impl ABIBody for ABIBodyImpl { if total_sp_adjust > 0 { // sub sp, sp, #total_stacksize - let adj = total_sp_adjust as i64; + let adj = total_sp_adjust as i32; insts.extend(M::gen_sp_reg_adjust(-adj)); } @@ -873,7 +901,7 @@ impl ABIBody for ABIBodyImpl { insts.extend(clobber_insts); if clobber_size > 0 { - insts.push(M::gen_nominal_sp_adj(clobber_size as i64)); + insts.push(M::gen_nominal_sp_adj(clobber_size as i32)); } self.total_frame_size = Some(total_stacksize); @@ -911,7 +939,7 @@ impl ABIBody for ABIBodyImpl { } fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { - M::get_spillslot_size(rc, ty) + M::get_number_of_spillslots_for_value(rc, ty) } fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Self::I { @@ -930,7 +958,7 @@ impl ABIBody for ABIBodyImpl { } } -fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { +fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { // Compute uses: all arg regs. let mut uses = Vec::new(); for arg in &sig.args { @@ -953,7 +981,7 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec { +pub struct ABICallerImpl { /// The called function's signature. sig: ABISig, /// All uses for the callsite, i.e., function args. @@ -979,17 +1007,17 @@ pub enum CallDest { Reg(Reg), } -impl ABICallImpl { +impl ABICallerImpl { /// Create a callsite ABI object for a call directly to the specified function. pub fn from_func( sig: &ir::Signature, extname: &ir::ExternalName, dist: RelocDistance, loc: ir::SourceLoc, - ) -> CodegenResult> { + ) -> CodegenResult> { let sig = ABISig::from_func_sig::(sig)?; let (uses, defs) = abisig_to_uses_and_defs::(&sig); - Ok(ABICallImpl { + Ok(ABICallerImpl { sig, uses, defs, @@ -1007,10 +1035,10 @@ impl ABICallImpl { ptr: Reg, loc: ir::SourceLoc, opcode: ir::Opcode, - ) -> CodegenResult> { + ) -> CodegenResult> { let sig = ABISig::from_func_sig::(sig)?; let (uses, defs) = abisig_to_uses_and_defs::(&sig); - Ok(ABICallImpl { + Ok(ABICallerImpl { sig, uses, defs, @@ -1022,15 +1050,14 @@ impl ABICallImpl { } } -fn adjust_stack_and_nominal_sp>( +fn adjust_stack_and_nominal_sp>( ctx: &mut C, - off: u64, + off: i32, is_sub: bool, ) { if off == 0 { return; } - let off = off as i64; let amt = if is_sub { -off } else { off }; for inst in M::gen_sp_reg_adjust(amt) { ctx.emit(inst); @@ -1038,7 +1065,7 @@ fn adjust_stack_and_nominal_sp>( ctx.emit(M::gen_nominal_sp_adj(-amt)); } -impl ABICall for ABICallImpl { +impl ABICaller for ABICallerImpl { type I = M::I; fn num_args(&self) -> usize { @@ -1051,12 +1078,12 @@ impl ABICall for ABICallImpl { fn emit_stack_pre_adjust>(&self, ctx: &mut C) { let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack_and_nominal_sp::(ctx, off as u64, /* is_sub = */ true) + adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ true) } fn emit_stack_post_adjust>(&self, ctx: &mut C) { let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack_and_nominal_sp::(ctx, off as u64, /* is_sub = */ false) + adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ false) } fn emit_copy_reg_to_arg>( @@ -1152,13 +1179,13 @@ impl ABICall for ABICallImpl { )); self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); } + let tmp = ctx.alloc_tmp(RegClass::I64, I64); for (is_safepoint, inst) in - M::gen_call(&self.dest, uses, defs, self.loc, self.opcode).into_iter() + M::gen_call(&self.dest, uses, defs, self.loc, self.opcode, tmp).into_iter() { - if is_safepoint { - ctx.emit_safepoint(inst); - } else { - ctx.emit(inst); + match is_safepoint { + InstIsSafepoint::Yes => ctx.emit_safepoint(inst), + InstIsSafepoint::No => ctx.emit(inst), } } } diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index 7d2d896b25..67db2dbafa 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -13,7 +13,7 @@ use regalloc::{allocate_registers_with_opts, Algorithm, Options}; pub fn compile( f: &Function, b: &B, - abi: Box>, + abi: Box>, ) -> CodegenResult> where B::MInst: ShowWithRRU, diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 9ec313916e..b3765e409f 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -13,7 +13,7 @@ use crate::ir::{ ValueDef, }; use crate::machinst::{ - ABIBody, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder, + ABICallee, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder, VCodeInst, }; use crate::CodegenResult; @@ -61,8 +61,8 @@ pub trait LowerCtx { // Function-level queries: - /// Get the `ABIBody`. - fn abi(&mut self) -> &dyn ABIBody; + /// Get the `ABICallee`. + fn abi(&mut self) -> &dyn ABICallee; /// Get the (virtual) register that receives the return value. A return /// instruction should lower into a sequence that fills this register. (Why /// not allow the backend to specify its own result register for the return? @@ -312,7 +312,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { /// Prepare a new lowering context for the given IR function. pub fn new( f: &'func Function, - abi: Box>, + abi: Box>, block_order: BlockLoweringOrder, ) -> CodegenResult> { let mut vcode = VCodeBuilder::new(abi, block_order); @@ -844,7 +844,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { type I = I; - fn abi(&mut self) -> &dyn ABIBody { + fn abi(&mut self) -> &dyn ABICallee { self.vcode.abi() } diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 915764436e..72f351aa88 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -282,7 +282,7 @@ pub trait MachInstEmit: MachInst { /// emitting a function body. pub trait MachInstEmitState: Default + Clone + Debug { /// Create a new emission state given the ABI object. - fn new(abi: &dyn ABIBody) -> Self; + fn new(abi: &dyn ABICallee) -> Self; /// Update the emission state before emitting an instruction that is a /// safepoint. fn pre_safepoint(&mut self, _stack_map: StackMap) {} diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 9e8b1b21e0..6733e89da4 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -86,7 +86,7 @@ pub struct VCode { block_order: BlockLoweringOrder, /// ABI object. - abi: Box>, + abi: Box>, /// Safepoint instruction indices. Filled in post-regalloc. (Prior to /// regalloc, the safepoint instructions are listed in the separate @@ -132,7 +132,7 @@ pub struct VCodeBuilder { impl VCodeBuilder { /// Create a new VCodeBuilder. - pub fn new(abi: Box>, block_order: BlockLoweringOrder) -> VCodeBuilder { + pub fn new(abi: Box>, block_order: BlockLoweringOrder) -> VCodeBuilder { let reftype_class = I::ref_type_regclass(abi.flags()); let vcode = VCode::new(abi, block_order); let stack_map_info = StackmapRequestInfo { @@ -151,7 +151,7 @@ impl VCodeBuilder { } /// Access the ABI object. - pub fn abi(&mut self) -> &mut dyn ABIBody { + pub fn abi(&mut self) -> &mut dyn ABICallee { &mut *self.vcode.abi } @@ -263,7 +263,7 @@ fn is_reftype(ty: Type) -> bool { impl VCode { /// New empty VCode. - fn new(abi: Box>, block_order: BlockLoweringOrder) -> VCode { + fn new(abi: Box>, block_order: BlockLoweringOrder) -> VCode { VCode { liveins: abi.liveins(), liveouts: abi.liveouts(), diff --git a/cranelift/filetests/filetests/vcode/aarch64/call.clif b/cranelift/filetests/filetests/vcode/aarch64/call.clif index cded47dc5b..ad14eca992 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/call.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/call.clif @@ -11,8 +11,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x16, 8 ; b 12 ; data -; nextln: blr x16 +; nextln: ldr x1, 8 ; b 12 ; data +; nextln: blr x1 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -28,8 +28,8 @@ block0(v0: i32): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: mov w0, w0 -; nextln: ldr x16, 8 ; b 12 ; data -; nextln: blr x16 +; nextln: ldr x1, 8 ; b 12 ; data +; nextln: blr x1 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -57,8 +57,8 @@ block0(v0: i32): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: sxtw x0, w0 -; nextln: ldr x16, 8 ; b 12 ; data -; nextln: blr x16 +; nextln: ldr x1, 8 ; b 12 ; data +; nextln: blr x1 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -99,8 +99,8 @@ block0(v0: i8): ; nextln: movz x7, #42 ; nextln: sxtb x8, w8 ; nextln: stur x8, [sp] -; nextln: ldr x16, 8 ; b 12 ; data -; nextln: blr x16 +; nextln: ldr x8, 8 ; b 12 ; data +; nextln: blr x8 ; nextln: add sp, sp, #16 ; nextln: virtual_sp_offset_adjust -16 ; nextln: mov sp, fp diff --git a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif index 2458516cfc..97234a7da0 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif @@ -83,12 +83,12 @@ block3(v7: r64, v8: r64): ; nextln: mov x19, x0 ; nextln: mov x20, x1 ; nextln: mov x0, x19 -; nextln: ldr x16, 8 ; b 12 ; data +; nextln: ldr x1, 8 ; b 12 ; data ; nextln: stur x0, [sp, #24] ; nextln: stur x19, [sp, #32] ; nextln: stur x20, [sp, #40] ; nextln: (safepoint: slots [S0, S1, S2] -; nextln: blr x16 +; nextln: blr x1 ; nextln: ldur x19, [sp, #32] ; nextln: ldur x20, [sp, #40] ; nextln: add x1, sp, #16 diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif index f4f8bd78bf..98c49767d1 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif @@ -44,8 +44,8 @@ block0(v0: i64): ; nextln: mov fp, sp ; nextln: subs xzr, sp, x0 ; nextln: b.hs 8 ; udf -; nextln: ldr x16 -; nextln: blr x16 +; nextln: ldr x0 +; nextln: blr x0 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -67,8 +67,8 @@ block0(v0: i64): ; nextln: ldur x16, [x16, #4] ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; udf -; nextln: ldr x16 -; nextln: blr x16 +; nextln: ldr x0 +; nextln: blr x0 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret