diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 06fb9c5793..5f918dfe59 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1,145 +1,28 @@ -//! Implementation of the standard AArch64 ABI. -//! -//! We implement the standard AArch64 ABI, as documented by ARM. This ABI -//! specifies how arguments are passed (in registers or on the stack, as -//! appropriate), which registers are caller- and callee-saved, and how a -//! particular part of the stack frame (the FP/LR pair) must be linked through -//! the active stack frames. -//! -//! Note, however, that the exact stack layout is up to us. We settled on the -//! below design based on several requirements. In particular, we need to be -//! able to generate instructions (or instruction sequences) to access -//! arguments, stack slots, and spill slots before we know how many spill slots -//! or clobber-saves there will be, because of our pass structure. We also -//! prefer positive offsets to negative offsets because of an asymmetry in -//! AArch64 addressing modes (positive offsets have a larger possible range -//! without a long-form sequence to synthesize an arbitrary offset). Finally, it -//! is not allowed to access memory below the current SP value. -//! -//! As a result, we keep the FP/LR pair just below stack args so that we can -//! access these args at known offsets from FP, and we access on-stack storage -//! using positive offsets from SP. In order to allow codegen for the latter -//! before knowing how many clobber-saves we have, and also allow it while SP is -//! being adjusted to set up a call, we implement a "nominal SP" tracking -//! feature by which a fixup (distance between actual SP and a "nominal" SP) is -//! known at each instruction. See the documentation for -//! `MemArg::NominalSPOffset` for more on this. -//! -//! The stack looks like: -//! -//! ```plain -//! (high address) -//! -//! +---------------------------+ -//! | ... | -//! | stack args | -//! | (accessed via FP) | -//! +---------------------------+ -//! SP at function entry -----> | LR (pushed by prologue) | -//! +---------------------------+ -//! FP after prologue --------> | FP (pushed by prologue) | -//! +---------------------------+ -//! | ... | -//! | spill slots | -//! | (accessed via nominal-SP) | -//! | ... | -//! | stack slots | -//! | (accessed via nominal-SP) | -//! nominal SP ---------------> | (alloc'd by prologue) | -//! +---------------------------+ -//! | ... | -//! | clobbered callee-saves | -//! SP at end of prologue ----> | (pushed by prologue) | -//! +---------------------------+ -//! | ... | -//! | args for call | -//! SP before making a call --> | (pushed at callsite) | -//! +---------------------------+ -//! -//! (low address) -//! ``` -//! -//! # Multi-value Returns -//! -//! Note that we support multi-value returns by adopting the SpiderMonkey Wasm -//! ABI internally. Because we do not support linking with externally-compiled -//! multi-value-returning functions (yet), this choice is arbitrary and we are -//! free to make it as we please. Wasmtime generates trampolines to enter -//! toplevel multi-value-returning functions, so this does not concern the -//! Wasmtime embedding. -//! -//! For details of the multi-value return ABI, see: -//! -//! https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134 -//! -//! In brief: -//! - Return values are processed in *reverse* order. -//! - The first return value in this order (so the last return) goes into the -//! ordinary return register, X0. -//! - Any further returns go in a struct-return area, allocated upwards (in -//! address order) during the reverse traversal. -//! - This struct-return area is provided by the caller, and a pointer to its -//! start is passed as an invisible last (extra) argument. Normally the caller -//! will allocate this area on the stack. When we generate calls, we place it -//! just above the on-stack argument area. -//! - So, for example, a function returning 4 i64's (v0, v1, v2, v3), with no -//! formal arguments, would: -//! - Accept a pointer P to the struct return area in x0 on entry. -//! - Return v3 in x0. -//! - Return v2 in memory at `[P]`. -//! - Return v1 in memory at `[P+8]`. -//! - Return v0 in memory at `[P+16]`. +//! Implementation of a standard AArch64 ABI. -use crate::binemit::StackMap; use crate::ir; use crate::ir::types; use crate::ir::types::*; -use crate::ir::{ArgumentExtension, StackSlot}; +use crate::ir::SourceLoc; use crate::isa; -use crate::isa::aarch64::{inst::EmitState, inst::*, lower::ty_bits}; +use crate::isa::aarch64::{inst::EmitState, inst::*}; use crate::machinst::*; use crate::settings; use crate::{CodegenError, CodegenResult}; - use alloc::boxed::Box; use alloc::vec::Vec; +use regalloc::{RealReg, Reg, RegClass, Set, Writable}; +use smallvec::SmallVec; +use std::convert::TryFrom; -use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; +// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because +// these ABIs are very similar. -use core::mem; -use log::{debug, trace}; +/// Support for the AArch64 ABI from the callee side (within a function body). +pub type AArch64ABIBody = ABIBodyImpl; -/// A location for an argument or return value. -#[derive(Clone, Copy, Debug)] -enum ABIArg { - /// In a real register. - Reg(RealReg, ir::Type, ir::ArgumentExtension), - /// Arguments only: on stack, at given offset from SP at entry. - Stack(i64, ir::Type, ir::ArgumentExtension), -} - -/// AArch64 ABI information shared between body (callee) and caller. -struct ABISig { - /// Argument locations (regs or stack slots). Stack offsets are relative to - /// SP on entry to function. - args: Vec, - /// Return-value locations. Stack offsets are relative to the return-area - /// pointer. - rets: Vec, - /// Space on stack used to store arguments. - stack_arg_space: i64, - /// Space on stack used to store return values. - stack_ret_space: i64, - /// Index in `args` of the stack-return-value-area argument. - stack_ret_arg: Option, - /// Calling convention used. - call_conv: isa::CallConv, -} - -/// This is the limit for the size of argument and return-value areas on the -/// stack. We place a reasonable limit here to avoid integer overflow issues -/// with 32-bit arithmetic: for now, 128 MB. -static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; +/// Support for the AArch64 ABI from the caller side (at a callsite). +pub type AArch64ABICall = ABICallImpl; // Spidermonkey specific ABI convention. @@ -178,6 +61,11 @@ static BALDRDASH_JIT_CALLEE_SAVED_FPU: &[bool] = &[ /* 24 = */ false, false, false, false, false, false, false, true /* v31 / d31 */ ]; +/// This is the limit for the size of argument and return-value areas on the +/// stack. We place a reasonable limit here to avoid integer overflow issues +/// with 32-bit arithmetic: for now, 128 MB. +static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; + /// Try to fill a Baldrdash register, returning it if it was found. fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option { if call_conv.extends_baldrdash() { @@ -205,373 +93,148 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt } } -/// Are we computing information about arguments or return values? Much of the -/// handling is factored out into common routines; this enum allows us to -/// distinguish which case we're handling. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum ArgsOrRets { - Args, - Rets, +impl Into for StackAMode { + fn into(self) -> AMode { + match self { + StackAMode::FPOffset(off, ty) => AMode::FPOffset(off, ty), + StackAMode::NominalSPOffset(off, ty) => AMode::NominalSPOffset(off, ty), + StackAMode::SPOffset(off, ty) => AMode::SPOffset(off, ty), + } + } } -/// Process a list of parameters or return values and allocate them to X-regs, -/// V-regs, and stack slots. -/// -/// Returns the list of argument locations, the stack-space used (rounded up -/// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the -/// index of the extra synthetic arg that was added. -fn compute_arg_locs( - call_conv: isa::CallConv, - params: &[ir::AbiParam], - args_or_rets: ArgsOrRets, - add_ret_area_ptr: bool, -) -> CodegenResult<(Vec, i64, Option)> { - let is_baldrdash = call_conv.extends_baldrdash(); +/// AArch64-specific ABI behavior. This struct just serves as an implementation +/// point for the trait; it is never actually instantiated. +pub struct AArch64MachineImpl; - // See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4. - let mut next_xreg = 0; - let mut next_vreg = 0; - let mut next_stack: u64 = 0; - let mut ret = vec![]; +impl ABIMachineImpl for AArch64MachineImpl { + type I = Inst; - let max_reg_vals = match (args_or_rets, is_baldrdash) { - (ArgsOrRets::Args, _) => 8, // x0-x7, v0-v7 - (ArgsOrRets::Rets, false) => 8, // x0-x7, v0-v7 - (ArgsOrRets::Rets, true) => 1, // x0 or v0 - }; + fn compute_arg_locs( + call_conv: isa::CallConv, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + ) -> CodegenResult<(Vec, i64, Option)> { + let is_baldrdash = call_conv.extends_baldrdash(); - for i in 0..params.len() { - // Process returns backward, according to the SpiderMonkey ABI (which we - // adopt internally if `is_baldrdash` is set). - let param = match (args_or_rets, is_baldrdash) { - (ArgsOrRets::Args, _) => ¶ms[i], - (ArgsOrRets::Rets, false) => ¶ms[i], - (ArgsOrRets::Rets, true) => ¶ms[params.len() - 1 - i], + // See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4. + let mut next_xreg = 0; + let mut next_vreg = 0; + let mut next_stack: u64 = 0; + let mut ret = vec![]; + + let max_reg_vals = match (args_or_rets, is_baldrdash) { + (ArgsOrRets::Args, _) => 8, // x0-x7, v0-v7 + (ArgsOrRets::Rets, false) => 8, // x0-x7, v0-v7 + (ArgsOrRets::Rets, true) => 1, // x0 or v0 }; - // Validate "purpose". - match ¶m.purpose { - &ir::ArgumentPurpose::VMContext - | &ir::ArgumentPurpose::Normal - | &ir::ArgumentPurpose::StackLimit - | &ir::ArgumentPurpose::SignatureId => {} - _ => panic!( - "Unsupported argument purpose {:?} in signature: {:?}", - param.purpose, params - ), - } - - let intreg = in_int_reg(param.value_type); - let vecreg = in_vec_reg(param.value_type); - debug_assert!(intreg || vecreg); - debug_assert!(!(intreg && vecreg)); - - let next_reg = if intreg { - &mut next_xreg - } else { - &mut next_vreg - }; - - if let Some(param) = try_fill_baldrdash_reg(call_conv, param) { - assert!(intreg); - ret.push(param); - } else if *next_reg < max_reg_vals { - let reg = if intreg { - xreg(*next_reg) - } else { - vreg(*next_reg) + for i in 0..params.len() { + // Process returns backward, according to the SpiderMonkey ABI (which we + // adopt internally if `is_baldrdash` is set). + let param = match (args_or_rets, is_baldrdash) { + (ArgsOrRets::Args, _) => ¶ms[i], + (ArgsOrRets::Rets, false) => ¶ms[i], + (ArgsOrRets::Rets, true) => ¶ms[params.len() - 1 - i], }; - ret.push(ABIArg::Reg( - reg.to_real_reg(), - param.value_type, - param.extension, - )); - *next_reg += 1; - } else { - // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte - // stack alignment happens separately after all args.) - let size = (ty_bits(param.value_type) / 8) as u64; - let size = std::cmp::max(size, 8); - // Align. - debug_assert!(size.is_power_of_two()); - next_stack = (next_stack + size - 1) & !(size - 1); - ret.push(ABIArg::Stack( - next_stack as i64, - param.value_type, - param.extension, - )); - next_stack += size; - } - } - if args_or_rets == ArgsOrRets::Rets && is_baldrdash { - ret.reverse(); - } - - let extra_arg = if add_ret_area_ptr { - debug_assert!(args_or_rets == ArgsOrRets::Args); - if next_xreg < max_reg_vals { - ret.push(ABIArg::Reg( - xreg(next_xreg).to_real_reg(), - I64, - ir::ArgumentExtension::None, - )); - } else { - ret.push(ABIArg::Stack( - next_stack as i64, - I64, - ir::ArgumentExtension::None, - )); - next_stack += 8; - } - Some(ret.len() - 1) - } else { - None - }; - - next_stack = (next_stack + 15) & !15; - - // To avoid overflow issues, limit the arg/return size to something - // reasonable -- here, 128 MB. - if next_stack > STACK_ARG_RET_SIZE_LIMIT { - return Err(CodegenError::ImplLimitExceeded); - } - - Ok((ret, next_stack as i64, extra_arg)) -} - -impl ABISig { - fn from_func_sig(sig: &ir::Signature) -> CodegenResult { - // Compute args and retvals from signature. Handle retvals first, - // because we may need to add a return-area arg to the args. - let (rets, stack_ret_space, _) = compute_arg_locs( - sig.call_conv, - &sig.returns, - ArgsOrRets::Rets, - /* extra ret-area ptr = */ false, - )?; - let need_stack_return_area = stack_ret_space > 0; - let (args, stack_arg_space, stack_ret_arg) = compute_arg_locs( - sig.call_conv, - &sig.params, - ArgsOrRets::Args, - need_stack_return_area, - )?; - - trace!( - "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", - sig, - args, - rets, - stack_arg_space, - stack_ret_space, - stack_ret_arg - ); - - Ok(ABISig { - args, - rets, - stack_arg_space, - stack_ret_space, - stack_ret_arg, - call_conv: sig.call_conv, - }) - } -} - -/// AArch64 ABI object for a function body. -pub struct AArch64ABIBody { - /// Signature: arg and retval regs. - sig: ABISig, - /// Offsets to each stackslot. - stackslots: Vec, - /// Total stack size of all stackslots. - stackslots_size: u32, - /// Clobbered registers, from regalloc. - clobbered: Set>, - /// Total number of spillslots, from regalloc. - spillslots: Option, - /// "Total frame size", as defined by "distance between FP and nominal-SP". - /// Some items are pushed below nominal SP, so the function may actually use - /// more stack than this would otherwise imply. It is simply the initial - /// frame/allocation size needed for stackslots and spillslots. - total_frame_size: Option, - /// The register holding the return-area pointer, if needed. - ret_area_ptr: Option>, - /// Calling convention this function expects. - call_conv: isa::CallConv, - /// The settings controlling this function's compilation. - flags: settings::Flags, - /// Whether or not this function is a "leaf", meaning it calls no other - /// functions - is_leaf: bool, - /// If this function has a stack limit specified, then `Reg` is where the - /// stack limit will be located after the instructions specified have been - /// executed. - /// - /// Note that this is intended for insertion into the prologue, if - /// present. Also note that because the instructions here execute in the - /// prologue this happens after legalization/register allocation/etc so we - /// need to be extremely careful with each instruction. The instructions are - /// manually register-allocated and carefully only use caller-saved - /// registers and keep nothing live after this sequence of instructions. - stack_limit: Option<(Reg, Vec)>, -} - -fn in_int_reg(ty: ir::Type) -> bool { - match ty { - types::I8 | types::I16 | types::I32 | types::I64 => true, - types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true, - types::R64 => true, - types::R32 => panic!("Unexpected 32-bit reference on a 64-bit platform!"), - _ => false, - } -} - -fn in_vec_reg(ty: ir::Type) -> bool { - match ty { - types::F32 | types::F64 => true, - types::B8X16 - | types::I8X16 - | types::B16X8 - | types::I16X8 - | types::B32X4 - | types::I32X4 - | types::B64X2 - | types::I64X2 => true, - _ => false, - } -} - -/// Generates the instructions necessary for the `gv` to be materialized into a -/// register. -/// -/// This function will return a register that will contain the result of -/// evaluating `gv`. It will also return any instructions necessary to calculate -/// the value of the register. -/// -/// Note that global values are typically lowered to instructions via the -/// standard legalization pass. Unfortunately though prologue generation happens -/// so late in the pipeline that we can't use these legalization passes to -/// generate the instructions for `gv`. As a result we duplicate some lowering -/// of `gv` here and support only some global values. This is similar to what -/// the x86 backend does for now, and hopefully this can be somewhat cleaned up -/// in the future too! -/// -/// Also note that this function will make use of `writable_spilltmp_reg()` as a -/// temporary register to store values in if necessary. Currently after we write -/// to this register there's guaranteed to be no spilled values between where -/// it's used, because we're not participating in register allocation anyway! -fn gen_stack_limit(f: &ir::Function, abi: &ABISig, gv: ir::GlobalValue) -> (Reg, Vec) { - let mut insts = Vec::new(); - let reg = generate_gv(f, abi, gv, &mut insts); - return (reg, insts); - - fn generate_gv( - f: &ir::Function, - abi: &ABISig, - gv: ir::GlobalValue, - insts: &mut Vec, - ) -> Reg { - match f.global_values[gv] { - // Return the direct register the vmcontext is in - ir::GlobalValueData::VMContext => { - get_special_purpose_param_register(f, abi, ir::ArgumentPurpose::VMContext) - .expect("no vmcontext parameter found") + // Validate "purpose". + match ¶m.purpose { + &ir::ArgumentPurpose::VMContext + | &ir::ArgumentPurpose::Normal + | &ir::ArgumentPurpose::StackLimit + | &ir::ArgumentPurpose::SignatureId => {} + _ => panic!( + "Unsupported argument purpose {:?} in signature: {:?}", + param.purpose, params + ), } - // Load our base value into a register, then load from that register - // in to a temporary register. - ir::GlobalValueData::Load { - base, - offset, - global_type: _, - readonly: _, - } => { - let base = generate_gv(f, abi, base, insts); - let into_reg = writable_spilltmp_reg(); - let mem = MemArg::RegOffset(base, offset.into(), I64); - insts.push(Inst::ULoad64 { - rd: into_reg, - mem, - srcloc: None, - }); - return into_reg.to_reg(); + + assert!( + legal_type_for_machine(param.value_type), + "Invalid type for AArch64: {:?}", + param.value_type + ); + let rc = Inst::rc_for_type(param.value_type).unwrap(); + + let next_reg = match rc { + RegClass::I64 => &mut next_xreg, + RegClass::V128 => &mut next_vreg, + _ => panic!("Invalid register class: {:?}", rc), + }; + + if let Some(param) = try_fill_baldrdash_reg(call_conv, param) { + assert!(rc == RegClass::I64); + ret.push(param); + } else if *next_reg < max_reg_vals { + let reg = match rc { + RegClass::I64 => xreg(*next_reg), + RegClass::V128 => vreg(*next_reg), + _ => unreachable!(), + }; + ret.push(ABIArg::Reg( + reg.to_real_reg(), + param.value_type, + param.extension, + )); + *next_reg += 1; + } else { + // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte + // stack alignment happens separately after all args.) + let size = (ty_bits(param.value_type) / 8) as u64; + let size = std::cmp::max(size, 8); + // Align. + debug_assert!(size.is_power_of_two()); + next_stack = (next_stack + size - 1) & !(size - 1); + ret.push(ABIArg::Stack( + next_stack as i64, + param.value_type, + param.extension, + )); + next_stack += size; } - ref other => panic!("global value for stack limit not supported: {}", other), - } - } -} - -fn get_special_purpose_param_register( - f: &ir::Function, - abi: &ABISig, - purpose: ir::ArgumentPurpose, -) -> Option { - let idx = f.signature.special_param_index(purpose)?; - match abi.args[idx] { - ABIArg::Reg(reg, ..) => Some(reg.to_reg()), - ABIArg::Stack(..) => None, - } -} - -impl AArch64ABIBody { - /// Create a new body ABI instance. - pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { - debug!("AArch64 ABI: func signature {:?}", f.signature); - - let sig = ABISig::from_func_sig(&f.signature)?; - - let call_conv = f.signature.call_conv; - // Only these calling conventions are supported. - debug_assert!( - call_conv == isa::CallConv::SystemV - || call_conv == isa::CallConv::Fast - || call_conv == isa::CallConv::Cold - || call_conv.extends_baldrdash(), - "Unsupported calling convention: {:?}", - call_conv - ); - - // Compute stackslot locations and total stackslot size. - let mut stack_offset: u32 = 0; - let mut stackslots = vec![]; - for (stackslot, data) in f.stack_slots.iter() { - let off = stack_offset; - stack_offset += data.size; - stack_offset = (stack_offset + 7) & !7; - debug_assert_eq!(stackslot.as_u32() as usize, stackslots.len()); - stackslots.push(off); } - // Figure out what instructions, if any, will be needed to check the - // stack limit. This can either be specified as a special-purpose - // argument or as a global value which often calculates the stack limit - // from the arguments. - let stack_limit = - get_special_purpose_param_register(f, &sig, ir::ArgumentPurpose::StackLimit) - .map(|reg| (reg, Vec::new())) - .or_else(|| f.stack_limit.map(|gv| gen_stack_limit(f, &sig, gv))); + if args_or_rets == ArgsOrRets::Rets && is_baldrdash { + ret.reverse(); + } - Ok(Self { - sig, - stackslots, - stackslots_size: stack_offset, - clobbered: Set::empty(), - spillslots: None, - total_frame_size: None, - ret_area_ptr: None, - call_conv, - flags, - is_leaf: f.is_leaf(), - stack_limit, - }) + let extra_arg = if add_ret_area_ptr { + debug_assert!(args_or_rets == ArgsOrRets::Args); + if next_xreg < max_reg_vals { + ret.push(ABIArg::Reg( + xreg(next_xreg).to_real_reg(), + I64, + ir::ArgumentExtension::None, + )); + } else { + ret.push(ABIArg::Stack( + next_stack as i64, + I64, + ir::ArgumentExtension::None, + )); + next_stack += 8; + } + Some(ret.len() - 1) + } else { + None + }; + + next_stack = (next_stack + 15) & !15; + + // To avoid overflow issues, limit the arg/return size to something + // reasonable -- here, 128 MB. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((ret, next_stack as i64, extra_arg)) } - /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return - /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg). - fn fp_to_arg_offset(&self) -> i64 { - if self.call_conv.extends_baldrdash() { - let num_words = self.flags.baldrdash_prologue_words() as i64; + fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64 { + if call_conv.extends_baldrdash() { + let num_words = flags.baldrdash_prologue_words() as i64; debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words"); debug_assert_eq!(num_words % 2, 0, "stack must be 16-aligned"); num_words * 8 @@ -580,164 +243,410 @@ impl AArch64ABIBody { } } - /// Inserts instructions necessary for checking the stack limit into the - /// prologue. - /// - /// This function will generate instructions necessary for perform a stack - /// check at the header of a function. The stack check is intended to trap - /// if the stack pointer goes below a particular threshold, preventing stack - /// overflow in wasm or other code. The `stack_limit` argument here is the - /// register which holds the threshold below which we're supposed to trap. - /// This function is known to allocate `stack_size` bytes and we'll push - /// instructions onto `insts`. - /// - /// Note that the instructions generated here are special because this is - /// happening so late in the pipeline (e.g. after register allocation). This - /// means that we need to do manual register allocation here and also be - /// careful to not clobber any callee-saved or argument registers. For now - /// this routine makes do with the `spilltmp_reg` as one temporary - /// register, and a second register of `tmp2` which is caller-saved. This - /// should be fine for us since no spills should happen in this sequence of - /// instructions, so our register won't get accidentally clobbered. - /// - /// No values can be live after the prologue, but in this case that's ok - /// because we just need to perform a stack check before progressing with - /// the rest of the function. - fn insert_stack_check(&self, stack_limit: Reg, stack_size: u32, insts: &mut Vec) { - // With no explicit stack allocated we can just emit the simple check of - // the stack registers against the stack limit register, and trap if - // it's out of bounds. - if stack_size == 0 { - return push_check(stack_limit, insts); - } + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { + Inst::gen_load(into_reg, mem.into(), ty) + } - // Note that the 32k stack size here is pretty special. See the - // documentation in x86/abi.rs for why this is here. The general idea is - // that we're protecting against overflow in the addition that happens - // below. - if stack_size >= 32 * 1024 { - push_check(stack_limit, insts); - } + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_store(mem.into(), from_reg, ty) + } - // Add the `stack_size` to `stack_limit`, placing the result in - // `scratch`. - // - // Note though that `stack_limit`'s register may be the same as - // `scratch`. If our stack size doesn't fit into an immediate this - // means we need a second scratch register for loading the stack size - // into a register. - let scratch = writable_spilltmp_reg(); - let scratch2 = writable_tmp2_reg(); - let stack_size = u64::from(stack_size); - if let Some(imm12) = Imm12::maybe_from_u64(stack_size) { + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_move(to_reg, from_reg, ty) + } + + fn gen_extend( + to_reg: Writable, + from_reg: Reg, + signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Inst { + assert!(from_bits < to_bits); + Inst::Extend { + rd: to_reg, + rn: from_reg, + signed, + from_bits, + to_bits, + } + } + + fn gen_ret() -> Inst { + Inst::Ret + } + + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u64) -> SmallVec<[Inst; 4]> { + let mut insts = SmallVec::new(); + if let Some(imm12) = Imm12::maybe_from_u64(imm) { insts.push(Inst::AluRRImm12 { alu_op: ALUOp::Add64, - rd: scratch, - rn: stack_limit, + rd: into_reg, + rn: from_reg, imm12, }); } else { - insts.extend(Inst::load_constant(scratch2, stack_size.into())); + let scratch2 = writable_tmp2_reg(); + insts.extend(Inst::load_constant(scratch2, imm.into())); insts.push(Inst::AluRRRExtend { alu_op: ALUOp::Add64, - rd: scratch, - rn: stack_limit, + rd: into_reg, + rn: from_reg, rm: scratch2.to_reg(), extendop: ExtendOp::UXTX, }); } - push_check(scratch.to_reg(), insts); + insts + } - fn push_check(stack_limit: Reg, insts: &mut Vec) { - insts.push(Inst::AluRRR { - alu_op: ALUOp::SubS64XR, - rd: writable_zero_reg(), + fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> { + let mut insts = SmallVec::new(); + insts.push(Inst::AluRRR { + alu_op: ALUOp::SubS64XR, + rd: writable_zero_reg(), + rn: stack_reg(), + rm: limit_reg, + }); + insts.push(Inst::TrapIf { + trap_info: (ir::SourceLoc::default(), ir::TrapCode::StackOverflow), + // Here `Lo` == "less than" when interpreting the two + // operands as unsigned integers. + kind: CondBrKind::Cond(Cond::Lo), + }); + insts + } + + fn gen_epilogue_placeholder() -> Inst { + Inst::EpiloguePlaceholder + } + + fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, _ty: Type) -> Inst { + let mem = mem.into(); + Inst::LoadAddr { rd: into_reg, mem } + } + + fn get_fixed_tmp_reg() -> Reg { + spilltmp_reg() + } + + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i64, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset, ty); + Inst::gen_load(into_reg, mem, ty) + } + + fn gen_store_base_offset(base: Reg, offset: i64, from_reg: Reg, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset, ty); + Inst::gen_store(mem, from_reg, ty) + } + + fn gen_sp_reg_adjust(amount: i64) -> SmallVec<[Inst; 2]> { + if amount == 0 { + return SmallVec::new(); + } + + let (amount, is_sub) = if amount > 0 { + (u64::try_from(amount).unwrap(), false) + } else { + (u64::try_from(-amount).unwrap(), true) + }; + + let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 }; + + let mut ret = SmallVec::new(); + if let Some(imm12) = Imm12::maybe_from_u64(amount) { + let adj_inst = Inst::AluRRImm12 { + alu_op, + rd: writable_stack_reg(), rn: stack_reg(), - rm: stack_limit, + imm12, + }; + ret.push(adj_inst); + } else { + let tmp = writable_spilltmp_reg(); + let const_inst = Inst::LoadConst64 { + rd: tmp, + const_data: amount, + }; + let adj_inst = Inst::AluRRRExtend { + alu_op, + rd: writable_stack_reg(), + rn: stack_reg(), + rm: tmp.to_reg(), + extendop: ExtendOp::UXTX, + }; + ret.push(const_inst); + ret.push(adj_inst); + } + ret + } + + fn gen_nominal_sp_adj(offset: i64) -> Inst { + Inst::VirtualSPOffsetAdj { offset } + } + + fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> { + let mut insts = SmallVec::new(); + // stp fp (x29), lr (x30), [sp, #-16]! + insts.push(Inst::StoreP64 { + rt: fp_reg(), + rt2: link_reg(), + mem: PairAMode::PreIndexed( + writable_stack_reg(), + SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), + ), + }); + // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because + // the usual encoding (`ORR`) does not work with SP. + insts.push(Inst::AluRRImm12 { + alu_op: ALUOp::Add64, + rd: writable_fp_reg(), + rn: stack_reg(), + imm12: Imm12 { + bits: 0, + shift12: false, + }, + }); + insts + } + + fn gen_epilogue_frame_restore() -> SmallVec<[Inst; 2]> { + let mut insts = SmallVec::new(); + + // MOV (alias of ORR) interprets x31 as XZR, so use an ADD here. + // MOV to SP is an alias of ADD. + insts.push(Inst::AluRRImm12 { + alu_op: ALUOp::Add64, + rd: writable_stack_reg(), + rn: fp_reg(), + imm12: Imm12 { + bits: 0, + shift12: false, + }, + }); + insts.push(Inst::LoadP64 { + rt: writable_fp_reg(), + rt2: writable_link_reg(), + mem: PairAMode::PostIndexed( + writable_stack_reg(), + SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(), + ), + }); + + insts + } + + // Returns stack bytes used as well as instructions. Does not adjust + // nominal SP offset; abi_impl generic code will do that. + fn gen_clobber_save( + call_conv: isa::CallConv, + clobbers: &Set>, + ) -> (u64, SmallVec<[Inst; 16]>) { + let mut insts = SmallVec::new(); + let (clobbered_int, clobbered_vec) = get_callee_saves(call_conv, clobbers); + let mut clobber_size = 0; + for reg_pair in clobbered_int.chunks(2) { + let (r1, r2) = if reg_pair.len() == 2 { + // .to_reg().to_reg(): Writable --> RealReg --> Reg + (reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg()) + } else { + (reg_pair[0].to_reg().to_reg(), zero_reg()) + }; + + debug_assert!(r1.get_class() == RegClass::I64); + debug_assert!(r2.get_class() == RegClass::I64); + + // stp r1, r2, [sp, #-16]! + insts.push(Inst::StoreP64 { + rt: r1, + rt2: r2, + mem: PairAMode::PreIndexed( + writable_stack_reg(), + SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), + ), }); - insts.push(Inst::TrapIf { - trap_info: (ir::SourceLoc::default(), ir::TrapCode::StackOverflow), - // Here `Lo` == "less than" when interpreting the two - // operands as unsigned integers. - kind: CondBrKind::Cond(Cond::Lo), + clobber_size += 16; + } + let vec_save_bytes = clobbered_vec.len() * 16; + if vec_save_bytes != 0 { + insts.push(Inst::AluRRImm12 { + alu_op: ALUOp::Sub64, + rd: writable_stack_reg(), + rn: stack_reg(), + imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(), + }); + clobber_size += vec_save_bytes; + } + for (i, reg) in clobbered_vec.iter().enumerate() { + insts.push(Inst::FpuStore128 { + rd: reg.to_reg().to_reg(), + mem: AMode::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()), + srcloc: None, }); } + + (clobber_size as u64, insts) + } + + fn gen_clobber_restore( + call_conv: isa::CallConv, + clobbers: &Set>, + ) -> SmallVec<[Inst; 16]> { + let mut insts = SmallVec::new(); + let (clobbered_int, clobbered_vec) = get_callee_saves(call_conv, clobbers); + + for (i, reg) in clobbered_vec.iter().enumerate() { + insts.push(Inst::FpuLoad128 { + rd: Writable::from_reg(reg.to_reg().to_reg()), + mem: AMode::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()), + srcloc: None, + }); + } + let vec_save_bytes = clobbered_vec.len() * 16; + if vec_save_bytes != 0 { + insts.push(Inst::AluRRImm12 { + alu_op: ALUOp::Add64, + rd: writable_stack_reg(), + rn: stack_reg(), + imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(), + }); + } + + for reg_pair in clobbered_int.chunks(2).rev() { + let (r1, r2) = if reg_pair.len() == 2 { + ( + reg_pair[0].map(|r| r.to_reg()), + reg_pair[1].map(|r| r.to_reg()), + ) + } else { + (reg_pair[0].map(|r| r.to_reg()), writable_zero_reg()) + }; + + debug_assert!(r1.to_reg().get_class() == RegClass::I64); + debug_assert!(r2.to_reg().get_class() == RegClass::I64); + + // ldp r1, r2, [sp], #16 + insts.push(Inst::LoadP64 { + rt: r1, + rt2: r2, + mem: PairAMode::PostIndexed( + writable_stack_reg(), + SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(), + ), + }); + } + + insts + } + + fn gen_call( + dest: &CallDest, + uses: Vec, + defs: Vec>, + loc: SourceLoc, + opcode: ir::Opcode, + ) -> SmallVec<[(/* is_safepoint = */ bool, Inst); 2]> { + let mut insts = SmallVec::new(); + match &dest { + &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(( + true, + Inst::Call { + info: Box::new(CallInfo { + dest: name.clone(), + uses, + defs, + loc, + opcode, + }), + }, + )), + &CallDest::ExtName(ref name, RelocDistance::Far) => { + insts.push(( + false, + Inst::LoadExtName { + rd: writable_spilltmp_reg(), + name: Box::new(name.clone()), + offset: 0, + srcloc: loc, + }, + )); + insts.push(( + true, + Inst::CallInd { + info: Box::new(CallIndInfo { + rn: spilltmp_reg(), + uses, + defs, + loc, + opcode, + }), + }, + )); + } + &CallDest::Reg(reg) => insts.push(( + true, + Inst::CallInd { + info: Box::new(CallIndInfo { + rn: *reg, + uses, + defs, + loc, + opcode, + }), + }, + )), + } + + insts + } + + fn get_spillslot_size(rc: RegClass, ty: Type) -> u32 { + // We allocate in terms of 8-byte slots. + match (rc, ty) { + (RegClass::I64, _) => 1, + (RegClass::V128, F32) | (RegClass::V128, F64) => 1, + (RegClass::V128, _) => 2, + _ => panic!("Unexpected register class!"), + } + } + + /// Get the current virtual-SP offset from an instruction-emission state. + fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 { + s.virtual_sp_offset + } + + /// Get the nominal-SP-to-FP offset from an instruction-emission state. + fn get_nominal_sp_to_fp(s: &EmitState) -> i64 { + s.nominal_sp_to_fp + } + + fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { + let mut caller_saved = Vec::new(); + for i in 0..29 { + let x = writable_xreg(i); + if is_caller_save_reg(call_conv, x.to_reg().to_real_reg()) { + caller_saved.push(x); + } + } + for i in 0..32 { + let v = writable_vreg(i); + if is_caller_save_reg(call_conv, v.to_reg().to_real_reg()) { + caller_saved.push(v); + } + } + caller_saved } } -fn load_stack(mem: MemArg, into_reg: Writable, ty: Type) -> Inst { +/// Is this type supposed to be seen on this machine? E.g. references of the +/// wrong width are invalid. +fn legal_type_for_machine(ty: Type) -> bool { match ty { - types::B1 | types::B8 | types::I8 => Inst::ULoad8 { - rd: into_reg, - mem, - srcloc: None, - }, - types::B16 | types::I16 => Inst::ULoad16 { - rd: into_reg, - mem, - srcloc: None, - }, - types::B32 | types::I32 | types::R32 => Inst::ULoad32 { - rd: into_reg, - mem, - srcloc: None, - }, - types::B64 | types::I64 | types::R64 => Inst::ULoad64 { - rd: into_reg, - mem, - srcloc: None, - }, - types::F32 => Inst::FpuLoad32 { - rd: into_reg, - mem, - srcloc: None, - }, - types::F64 => Inst::FpuLoad64 { - rd: into_reg, - mem, - srcloc: None, - }, - _ => unimplemented!("load_stack({})", ty), + R32 => false, + _ => true, } } -fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { - match ty { - types::B1 | types::B8 | types::I8 => Inst::Store8 { - rd: from_reg, - mem, - srcloc: None, - }, - types::B16 | types::I16 => Inst::Store16 { - rd: from_reg, - mem, - srcloc: None, - }, - types::B32 | types::I32 | types::R32 => Inst::Store32 { - rd: from_reg, - mem, - srcloc: None, - }, - types::B64 | types::I64 | types::R64 => Inst::Store64 { - rd: from_reg, - mem, - srcloc: None, - }, - types::F32 => Inst::FpuStore32 { - rd: from_reg, - mem, - srcloc: None, - }, - types::F64 => Inst::FpuStore64 { - rd: from_reg, - mem, - srcloc: None, - }, - _ => unimplemented!("store_stack({})", ty), - } -} - -fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool { +fn is_callee_save_reg(call_conv: isa::CallConv, r: RealReg) -> bool { if call_conv.extends_baldrdash() { match r.get_class() { RegClass::I64 => { @@ -767,12 +676,12 @@ fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool { fn get_callee_saves( call_conv: isa::CallConv, - regs: Vec>, + regs: &Set>, ) -> (Vec>, Vec>) { let mut int_saves = vec![]; let mut vec_saves = vec![]; - for reg in regs.into_iter() { - if is_callee_save(call_conv, reg.to_reg()) { + for ® in regs.iter() { + if is_callee_save_reg(call_conv, reg.to_reg()) { match reg.to_reg().get_class() { RegClass::I64 => int_saves.push(reg), RegClass::V128 => vec_saves.push(reg), @@ -780,10 +689,13 @@ fn get_callee_saves( } } } + // Sort registers for deterministic code output. + int_saves.sort_by_key(|r| r.to_reg().get_index()); + vec_saves.sort_by_key(|r| r.to_reg().get_index()); (int_saves, vec_saves) } -fn is_caller_save(call_conv: isa::CallConv, r: RealReg) -> bool { +fn is_caller_save_reg(call_conv: isa::CallConv, r: RealReg) -> bool { if call_conv.extends_baldrdash() { match r.get_class() { RegClass::I64 => { @@ -816,793 +728,3 @@ fn is_caller_save(call_conv: isa::CallConv, r: RealReg) -> bool { _ => panic!("Unexpected RegClass"), } } - -fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { - let mut caller_saved = Vec::new(); - for i in 0..29 { - let x = writable_xreg(i); - if is_caller_save(call_conv, x.to_reg().to_real_reg()) { - caller_saved.push(x); - } - } - for i in 0..32 { - let v = writable_vreg(i); - if is_caller_save(call_conv, v.to_reg().to_real_reg()) { - caller_saved.push(v); - } - } - caller_saved -} - -fn gen_sp_adjust_insts(adj: u64, is_sub: bool, mut f: F) { - let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 }; - - if let Some(imm12) = Imm12::maybe_from_u64(adj) { - let adj_inst = Inst::AluRRImm12 { - alu_op, - rd: writable_stack_reg(), - rn: stack_reg(), - imm12, - }; - f(adj_inst); - } else { - let tmp = writable_spilltmp_reg(); - let const_inst = Inst::LoadConst64 { - rd: tmp, - const_data: adj, - }; - let adj_inst = Inst::AluRRRExtend { - alu_op, - rd: writable_stack_reg(), - rn: stack_reg(), - rm: tmp.to_reg(), - extendop: ExtendOp::UXTX, - }; - f(const_inst); - f(adj_inst); - } -} - -impl ABIBody for AArch64ABIBody { - type I = Inst; - - fn temp_needed(&self) -> bool { - self.sig.stack_ret_arg.is_some() - } - - fn init(&mut self, maybe_tmp: Option>) { - if self.sig.stack_ret_arg.is_some() { - assert!(maybe_tmp.is_some()); - self.ret_area_ptr = maybe_tmp; - } - } - - fn flags(&self) -> &settings::Flags { - &self.flags - } - - fn liveins(&self) -> Set { - let mut set: Set = Set::empty(); - for &arg in &self.sig.args { - if let ABIArg::Reg(r, ..) = arg { - set.insert(r); - } - } - set - } - - fn liveouts(&self) -> Set { - let mut set: Set = Set::empty(); - for &ret in &self.sig.rets { - if let ABIArg::Reg(r, ..) = ret { - set.insert(r); - } - } - set - } - - fn num_args(&self) -> usize { - self.sig.args.len() - } - - fn num_retvals(&self) -> usize { - self.sig.rets.len() - } - - fn num_stackslots(&self) -> usize { - self.stackslots.len() - } - - fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable) -> Inst { - match &self.sig.args[idx] { - // Extension mode doesn't matter (we're copying out, not in; we - // ignore high bits by convention). - &ABIArg::Reg(r, ty, _) => Inst::gen_move(into_reg, r.to_reg(), ty), - &ABIArg::Stack(off, ty, _) => load_stack( - MemArg::FPOffset(self.fp_to_arg_offset() + off, ty), - into_reg, - ty, - ), - } - } - - fn gen_retval_area_setup(&self) -> Option { - if let Some(i) = self.sig.stack_ret_arg { - let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap()); - trace!( - "gen_retval_area_setup: inst {:?}; ptr reg is {:?}", - inst, - self.ret_area_ptr.unwrap().to_reg() - ); - Some(inst) - } else { - trace!("gen_retval_area_setup: not needed"); - None - } - } - - fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Writable) -> Vec { - let mut ret = Vec::new(); - match &self.sig.rets[idx] { - &ABIArg::Reg(r, ty, ext) => { - let from_bits = ty_bits(ty) as u8; - let dest_reg = Writable::from_reg(r.to_reg()); - match (ext, from_bits) { - (ArgumentExtension::Uext, n) if n < 64 => { - ret.push(Inst::Extend { - rd: dest_reg, - rn: from_reg.to_reg(), - signed: false, - from_bits, - to_bits: 64, - }); - } - (ArgumentExtension::Sext, n) if n < 64 => { - ret.push(Inst::Extend { - rd: dest_reg, - rn: from_reg.to_reg(), - signed: true, - from_bits, - to_bits: 64, - }); - } - _ => ret.push(Inst::gen_move(dest_reg, from_reg.to_reg(), ty)), - }; - } - &ABIArg::Stack(off, ty, ext) => { - let from_bits = ty_bits(ty) as u8; - // Trash the from_reg; it should be its last use. - match (ext, from_bits) { - (ArgumentExtension::Uext, n) if n < 64 => { - ret.push(Inst::Extend { - rd: from_reg, - rn: from_reg.to_reg(), - signed: false, - from_bits, - to_bits: 64, - }); - } - (ArgumentExtension::Sext, n) if n < 64 => { - ret.push(Inst::Extend { - rd: from_reg, - rn: from_reg.to_reg(), - signed: true, - from_bits, - to_bits: 64, - }); - } - _ => {} - }; - let mem = MemArg::RegOffset(self.ret_area_ptr.unwrap().to_reg(), off, ty); - ret.push(store_stack(mem, from_reg.to_reg(), ty)) - } - } - ret - } - - fn gen_ret(&self) -> Inst { - Inst::Ret {} - } - - fn gen_epilogue_placeholder(&self) -> Inst { - Inst::EpiloguePlaceholder {} - } - - fn set_num_spillslots(&mut self, slots: usize) { - self.spillslots = Some(slots); - } - - fn set_clobbered(&mut self, clobbered: Set>) { - self.clobbered = clobbered; - } - - /// Load from a stackslot. - fn load_stackslot( - &self, - slot: StackSlot, - offset: u32, - ty: Type, - into_reg: Writable, - ) -> Inst { - // Offset from beginning of stackslot area, which is at nominal-SP (see - // [MemArg::NominalSPOffset] for more details on nominal-SP tracking). - let stack_off = self.stackslots[slot.as_u32() as usize] as i64; - let sp_off: i64 = stack_off + (offset as i64); - trace!("load_stackslot: slot {} -> sp_off {}", slot, sp_off); - load_stack(MemArg::NominalSPOffset(sp_off, ty), into_reg, ty) - } - - /// Store to a stackslot. - fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Inst { - // Offset from beginning of stackslot area, which is at nominal-SP (see - // [MemArg::NominalSPOffset] for more details on nominal-SP tracking). - let stack_off = self.stackslots[slot.as_u32() as usize] as i64; - let sp_off: i64 = stack_off + (offset as i64); - trace!("store_stackslot: slot {} -> sp_off {}", slot, sp_off); - store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty) - } - - /// Produce an instruction that computes a stackslot address. - fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Inst { - // Offset from beginning of stackslot area, which is at nominal-SP (see - // [MemArg::NominalSPOffset] for more details on nominal-SP tracking). - let stack_off = self.stackslots[slot.as_u32() as usize] as i64; - let sp_off: i64 = stack_off + (offset as i64); - Inst::LoadAddr { - rd: into_reg, - mem: MemArg::NominalSPOffset(sp_off, I8), - } - } - - /// Load from a spillslot. - fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable) -> Inst { - // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. - let islot = slot.get() as i64; - let spill_off = islot * 8; - let sp_off = self.stackslots_size as i64 + spill_off; - trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - load_stack(MemArg::NominalSPOffset(sp_off, ty), into_reg, ty) - } - - /// Store to a spillslot. - fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst { - // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. - let islot = slot.get() as i64; - let spill_off = islot * 8; - let sp_off = self.stackslots_size as i64 + spill_off; - trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty) - } - - fn spillslots_to_stack_map(&self, slots: &[SpillSlot], state: &EmitState) -> StackMap { - assert!(state.virtual_sp_offset >= 0); - trace!( - "spillslots_to_stack_map: slots = {:?}, state = {:?}", - slots, - state - ); - let map_size = (state.virtual_sp_offset + state.nominal_sp_to_fp) as u32; - let map_words = (map_size + 7) / 8; - let mut bits = std::iter::repeat(false) - .take(map_words as usize) - .collect::>(); - - let first_spillslot_word = - ((self.stackslots_size + state.virtual_sp_offset as u32) / 8) as usize; - for &slot in slots { - let slot = slot.get() as usize; - bits[first_spillslot_word + slot] = true; - } - - StackMap::from_slice(&bits[..]) - } - - fn gen_prologue(&mut self) -> Vec { - let mut insts = vec![]; - if !self.call_conv.extends_baldrdash() { - // stp fp (x29), lr (x30), [sp, #-16]! - insts.push(Inst::StoreP64 { - rt: fp_reg(), - rt2: link_reg(), - mem: PairMemArg::PreIndexed( - writable_stack_reg(), - SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), - ), - }); - // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because - // the usual encoding (`ORR`) does not work with SP. - insts.push(Inst::AluRRImm12 { - alu_op: ALUOp::Add64, - rd: writable_fp_reg(), - rn: stack_reg(), - imm12: Imm12 { - bits: 0, - shift12: false, - }, - }); - } - - let mut total_stacksize = self.stackslots_size + 8 * self.spillslots.unwrap() as u32; - if self.call_conv.extends_baldrdash() { - debug_assert!( - !self.flags.enable_probestack(), - "baldrdash does not expect cranelift to emit stack probes" - ); - total_stacksize += self.flags.baldrdash_prologue_words() as u32 * 8; - } - let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack. - - let mut total_sp_adjust = 0; - let mut nominal_sp_to_real_sp = 0; - - if !self.call_conv.extends_baldrdash() { - // Leaf functions with zero stack don't need a stack check if one's - // specified, otherwise always insert the stack check. - if total_stacksize > 0 || !self.is_leaf { - if let Some((reg, stack_limit_load)) = &self.stack_limit { - insts.extend_from_slice(stack_limit_load); - self.insert_stack_check(*reg, total_stacksize, &mut insts); - } - } - if total_stacksize > 0 { - total_sp_adjust += total_stacksize as u64; - } - } - - // N.B.: "nominal SP", which we use to refer to stackslots and - // spillslots, is defined to be equal to the stack pointer at this point - // in the prologue. - // - // If we push any clobbers below, we emit a virtual-SP adjustment - // meta-instruction so that the nominal-SP references behave as if SP - // were still at this point. See documentation for - // [crate::isa::aarch64::abi](this module) for more details on - // stackframe layout and nominal-SP maintenance. - - if total_sp_adjust > 0 { - // sub sp, sp, #total_stacksize - gen_sp_adjust_insts( - total_sp_adjust, - /* is_sub = */ true, - |inst| insts.push(inst), - ); - } - - // Save clobbered registers. - let (clobbered_int, clobbered_vec) = - get_callee_saves(self.call_conv, self.clobbered.to_vec()); - let mut clobber_size = 0; - for reg_pair in clobbered_int.chunks(2) { - let (r1, r2) = if reg_pair.len() == 2 { - // .to_reg().to_reg(): Writable --> RealReg --> Reg - (reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg()) - } else { - (reg_pair[0].to_reg().to_reg(), zero_reg()) - }; - - debug_assert!(r1.get_class() == RegClass::I64); - debug_assert!(r2.get_class() == RegClass::I64); - - // stp r1, r2, [sp, #-16]! - insts.push(Inst::StoreP64 { - rt: r1, - rt2: r2, - mem: PairMemArg::PreIndexed( - writable_stack_reg(), - SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), - ), - }); - clobber_size += 16; - } - let vec_save_bytes = clobbered_vec.len() * 16; - if vec_save_bytes != 0 { - insts.push(Inst::AluRRImm12 { - alu_op: ALUOp::Sub64, - rd: writable_stack_reg(), - rn: stack_reg(), - imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(), - }); - clobber_size += vec_save_bytes; - } - for (i, reg) in clobbered_vec.iter().enumerate() { - insts.push(Inst::FpuStore128 { - rd: reg.to_reg().to_reg(), - mem: MemArg::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()), - srcloc: None, - }); - } - nominal_sp_to_real_sp += clobber_size as i64; - - if clobber_size > 0 { - insts.push(Inst::VirtualSPOffsetAdj { - offset: nominal_sp_to_real_sp, - }); - } - - self.total_frame_size = Some(total_stacksize); - insts - } - - fn gen_epilogue(&self) -> Vec { - let mut insts = vec![]; - - // Restore clobbered registers. - let (clobbered_int, clobbered_vec) = - get_callee_saves(self.call_conv, self.clobbered.to_vec()); - - for (i, reg) in clobbered_vec.iter().enumerate() { - insts.push(Inst::FpuLoad128 { - rd: Writable::from_reg(reg.to_reg().to_reg()), - mem: MemArg::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()), - srcloc: None, - }); - } - let vec_save_bytes = clobbered_vec.len() * 16; - if vec_save_bytes != 0 { - insts.push(Inst::AluRRImm12 { - alu_op: ALUOp::Add64, - rd: writable_stack_reg(), - rn: stack_reg(), - imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(), - }); - } - - for reg_pair in clobbered_int.chunks(2).rev() { - let (r1, r2) = if reg_pair.len() == 2 { - ( - reg_pair[0].map(|r| r.to_reg()), - reg_pair[1].map(|r| r.to_reg()), - ) - } else { - (reg_pair[0].map(|r| r.to_reg()), writable_zero_reg()) - }; - - debug_assert!(r1.to_reg().get_class() == RegClass::I64); - debug_assert!(r2.to_reg().get_class() == RegClass::I64); - - // ldp r1, r2, [sp], #16 - insts.push(Inst::LoadP64 { - rt: r1, - rt2: r2, - mem: PairMemArg::PostIndexed( - writable_stack_reg(), - SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(), - ), - }); - } - - // N.B.: we do *not* emit a nominal-SP adjustment here, because (i) there will be no - // references to nominal-SP offsets before the return below, and (ii) the instruction - // emission tracks running SP offset linearly (in straight-line order), not according to - // the CFG, so early returns in the middle of function bodies would cause an incorrect - // offset for the rest of the body. - - if !self.call_conv.extends_baldrdash() { - // The MOV (alias of ORR) interprets x31 as XZR, so use an ADD here. - // MOV to SP is an alias of ADD. - insts.push(Inst::AluRRImm12 { - alu_op: ALUOp::Add64, - rd: writable_stack_reg(), - rn: fp_reg(), - imm12: Imm12 { - bits: 0, - shift12: false, - }, - }); - insts.push(Inst::LoadP64 { - rt: writable_fp_reg(), - rt2: writable_link_reg(), - mem: PairMemArg::PostIndexed( - writable_stack_reg(), - SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(), - ), - }); - insts.push(Inst::Ret {}); - } - - debug!("Epilogue: {:?}", insts); - insts - } - - fn frame_size(&self) -> u32 { - self.total_frame_size - .expect("frame size not computed before prologue generation") - } - - fn stack_args_size(&self) -> u32 { - self.sig.stack_arg_space as u32 - } - - fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { - // We allocate in terms of 8-byte slots. - match (rc, ty) { - (RegClass::I64, _) => 1, - (RegClass::V128, F32) | (RegClass::V128, F64) => 1, - (RegClass::V128, _) => 2, - _ => panic!("Unexpected register class!"), - } - } - - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Inst { - let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty); - self.store_spillslot(to_slot, ty, from_reg.to_reg()) - } - - fn gen_reload( - &self, - to_reg: Writable, - from_slot: SpillSlot, - ty: Option, - ) -> Inst { - let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty); - self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg())) - } -} - -/// Return a type either from an optional type hint, or if not, from the default -/// type associated with the given register's class. This is used to generate -/// loads/spills appropriately given the type of value loaded/stored (which may -/// be narrower than the spillslot). We usually have the type because the -/// regalloc usually provides the vreg being spilled/reloaded, and we know every -/// vreg's type. However, the regalloc *can* request a spill/reload without an -/// associated vreg when needed to satisfy a safepoint (which requires all -/// ref-typed values, even those in real registers in the original vcode, to be -/// in spillslots). -fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { - match (ty, r.get_class()) { - // If the type is provided - (Some(t), _) => t, - // If no type is provided, this should be a register spill for a - // safepoint, so we only expect I64 (integer) registers. - (None, RegClass::I64) => I64, - _ => panic!("Unexpected register class!"), - } -} - -enum CallDest { - ExtName(ir::ExternalName, RelocDistance), - Reg(Reg), -} - -/// AArch64 ABI object for a function call. -pub struct AArch64ABICall { - sig: ABISig, - uses: Vec, - defs: Vec>, - dest: CallDest, - loc: ir::SourceLoc, - opcode: ir::Opcode, -} - -fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { - // Compute uses: all arg regs. - let mut uses = Vec::new(); - for arg in &sig.args { - match arg { - &ABIArg::Reg(reg, ..) => uses.push(reg.to_reg()), - _ => {} - } - } - - // Compute defs: all retval regs, and all caller-save (clobbered) regs. - let mut defs = get_caller_saves(sig.call_conv); - for ret in &sig.rets { - match ret { - &ABIArg::Reg(reg, ..) => defs.push(Writable::from_reg(reg.to_reg())), - _ => {} - } - } - - (uses, defs) -} - -impl AArch64ABICall { - /// Create a callsite ABI object for a call directly to the specified function. - pub fn from_func( - sig: &ir::Signature, - extname: &ir::ExternalName, - dist: RelocDistance, - loc: ir::SourceLoc, - ) -> CodegenResult { - let sig = ABISig::from_func_sig(sig)?; - let (uses, defs) = abisig_to_uses_and_defs(&sig); - Ok(AArch64ABICall { - sig, - uses, - defs, - dest: CallDest::ExtName(extname.clone(), dist), - loc, - opcode: ir::Opcode::Call, - }) - } - - /// Create a callsite ABI object for a call to a function pointer with the - /// given signature. - pub fn from_ptr( - sig: &ir::Signature, - ptr: Reg, - loc: ir::SourceLoc, - opcode: ir::Opcode, - ) -> CodegenResult { - let sig = ABISig::from_func_sig(sig)?; - let (uses, defs) = abisig_to_uses_and_defs(&sig); - Ok(AArch64ABICall { - sig, - uses, - defs, - dest: CallDest::Reg(ptr), - loc, - opcode, - }) - } -} - -fn adjust_stack_and_nominal_sp>(ctx: &mut C, amount: u64, is_sub: bool) { - if amount == 0 { - return; - } - - let sp_adjustment = if is_sub { - amount as i64 - } else { - -(amount as i64) - }; - ctx.emit(Inst::VirtualSPOffsetAdj { - offset: sp_adjustment, - }); - - gen_sp_adjust_insts(amount, is_sub, |inst| { - ctx.emit(inst); - }); -} - -impl ABICall for AArch64ABICall { - type I = Inst; - - fn num_args(&self) -> usize { - if self.sig.stack_ret_arg.is_some() { - self.sig.args.len() - 1 - } else { - self.sig.args.len() - } - } - - fn emit_stack_pre_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack_and_nominal_sp(ctx, off as u64, /* is_sub = */ true) - } - - fn emit_stack_post_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack_and_nominal_sp(ctx, off as u64, /* is_sub = */ false) - } - - fn emit_copy_reg_to_arg>( - &self, - ctx: &mut C, - idx: usize, - from_reg: Reg, - ) { - match &self.sig.args[idx] { - &ABIArg::Reg(reg, ty, ext) - if ext != ir::ArgumentExtension::None && ty_bits(ty) < 64 => - { - assert_eq!(RegClass::I64, reg.get_class()); - let signed = match ext { - ir::ArgumentExtension::Uext => false, - ir::ArgumentExtension::Sext => true, - _ => unreachable!(), - }; - ctx.emit(Inst::Extend { - rd: Writable::from_reg(reg.to_reg()), - rn: from_reg, - signed, - from_bits: ty_bits(ty) as u8, - to_bits: 64, - }); - } - &ABIArg::Reg(reg, ty, _) => { - ctx.emit(Inst::gen_move( - Writable::from_reg(reg.to_reg()), - from_reg, - ty, - )); - } - &ABIArg::Stack(off, ty, ext) => { - if ext != ir::ArgumentExtension::None && ty_bits(ty) < 64 { - assert_eq!(RegClass::I64, from_reg.get_class()); - let signed = match ext { - ir::ArgumentExtension::Uext => false, - ir::ArgumentExtension::Sext => true, - _ => unreachable!(), - }; - // Extend in place in the source register. Our convention is to - // treat high bits as undefined for values in registers, so this - // is safe, even for an argument that is nominally read-only. - ctx.emit(Inst::Extend { - rd: Writable::from_reg(from_reg), - rn: from_reg, - signed, - from_bits: ty_bits(ty) as u8, - to_bits: 64, - }); - } - ctx.emit(store_stack(MemArg::SPOffset(off, ty), from_reg, ty)) - } - } - } - - fn emit_copy_retval_to_reg>( - &self, - ctx: &mut C, - idx: usize, - into_reg: Writable, - ) { - match &self.sig.rets[idx] { - // Extension mode doesn't matter because we're copying out, not in, - // and we ignore high bits in our own registers by convention. - &ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move(into_reg, reg.to_reg(), ty)), - &ABIArg::Stack(off, ty, _) => { - let ret_area_base = self.sig.stack_arg_space; - ctx.emit(load_stack( - MemArg::SPOffset(off + ret_area_base, ty), - into_reg, - ty, - )); - } - } - } - - fn emit_call>(&mut self, ctx: &mut C) { - let (uses, defs) = ( - mem::replace(&mut self.uses, Default::default()), - mem::replace(&mut self.defs, Default::default()), - ); - if let Some(i) = self.sig.stack_ret_arg { - let rd = ctx.alloc_tmp(RegClass::I64, I64); - let ret_area_base = self.sig.stack_arg_space; - ctx.emit(Inst::LoadAddr { - rd, - mem: MemArg::SPOffset(ret_area_base, I8), - }); - self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); - } - match &self.dest { - &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit_safepoint(Inst::Call { - info: Box::new(CallInfo { - dest: name.clone(), - uses, - defs, - loc: self.loc, - opcode: self.opcode, - }), - }), - &CallDest::ExtName(ref name, RelocDistance::Far) => { - ctx.emit(Inst::LoadExtName { - rd: writable_spilltmp_reg(), - name: Box::new(name.clone()), - offset: 0, - srcloc: self.loc, - }); - ctx.emit_safepoint(Inst::CallInd { - info: Box::new(CallIndInfo { - rn: spilltmp_reg(), - uses, - defs, - loc: self.loc, - opcode: self.opcode, - }), - }); - } - &CallDest::Reg(reg) => ctx.emit_safepoint(Inst::CallInd { - info: Box::new(CallIndInfo { - rn: reg, - uses, - defs, - loc: self.loc, - opcode: self.opcode, - }), - }), - } - } -} diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 28fad2763d..060660fbd9 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -7,8 +7,7 @@ use crate::ir; use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8}; use crate::ir::Type; use crate::isa::aarch64::inst::*; -use crate::isa::aarch64::lower::ty_bits; -use crate::machinst::MachLabel; +use crate::machinst::{ty_bits, MachLabel}; use regalloc::{RealRegUniverse, Reg, Writable}; @@ -119,9 +118,9 @@ pub enum MemLabel { PCRel(i32), } -/// A memory argument to load/store, encapsulating the possible addressing modes. +/// An addressing mode specified for a load/store operation. #[derive(Clone, Debug)] -pub enum MemArg { +pub enum AMode { // // Real ARM64 addressing modes: // @@ -183,39 +182,39 @@ pub enum MemArg { NominalSPOffset(i64, Type), } -impl MemArg { +impl AMode { /// Memory reference using an address in a register. - pub fn reg(reg: Reg) -> MemArg { + pub fn reg(reg: Reg) -> AMode { // Use UnsignedOffset rather than Unscaled to use ldr rather than ldur. // This also does not use PostIndexed / PreIndexed as they update the register. - MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64)) + AMode::UnsignedOffset(reg, UImm12Scaled::zero(I64)) } /// Memory reference using the sum of two registers as an address. - pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg { - MemArg::RegReg(reg1, reg2) + pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> AMode { + AMode::RegReg(reg1, reg2) } /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address. - pub fn reg_plus_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> MemArg { - MemArg::RegScaled(reg1, reg2, ty) + pub fn reg_plus_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> AMode { + AMode::RegScaled(reg1, reg2, ty) } /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address, with `reg2` sign- or /// zero-extended as per `op`. - pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> MemArg { - MemArg::RegScaledExtended(reg1, reg2, ty, op) + pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> AMode { + AMode::RegScaledExtended(reg1, reg2, ty, op) } /// Memory reference to a label: a global function or value, or data in the constant pool. - pub fn label(label: MemLabel) -> MemArg { - MemArg::Label(label) + pub fn label(label: MemLabel) -> AMode { + AMode::Label(label) } } /// A memory argument to a load/store-pair. #[derive(Clone, Debug)] -pub enum PairMemArg { +pub enum PairAMode { SignedOffset(Reg, SImm7Scaled), PreIndexed(Writable, SImm7Scaled), PostIndexed(Writable, SImm7Scaled), @@ -381,27 +380,27 @@ fn shift_for_type(ty: Type) -> usize { } } -impl ShowWithRRU for MemArg { +impl ShowWithRRU for AMode { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { match self { - &MemArg::Unscaled(reg, simm9) => { + &AMode::Unscaled(reg, simm9) => { if simm9.value != 0 { format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru)) } else { format!("[{}]", reg.show_rru(mb_rru)) } } - &MemArg::UnsignedOffset(reg, uimm12) => { + &AMode::UnsignedOffset(reg, uimm12) => { if uimm12.value != 0 { format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru)) } else { format!("[{}]", reg.show_rru(mb_rru)) } } - &MemArg::RegReg(r1, r2) => { + &AMode::RegReg(r1, r2) => { format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),) } - &MemArg::RegScaled(r1, r2, ty) => { + &AMode::RegScaled(r1, r2, ty) => { let shift = shift_for_type(ty); format!( "[{}, {}, LSL #{}]", @@ -410,7 +409,7 @@ impl ShowWithRRU for MemArg { shift, ) } - &MemArg::RegScaledExtended(r1, r2, ty, op) => { + &AMode::RegScaledExtended(r1, r2, ty, op) => { let shift = shift_for_type(ty); let size = match op { ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32, @@ -425,7 +424,7 @@ impl ShowWithRRU for MemArg { shift ) } - &MemArg::RegExtended(r1, r2, op) => { + &AMode::RegExtended(r1, r2, op) => { let size = match op { ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32, _ => OperandSize::Size64, @@ -438,44 +437,44 @@ impl ShowWithRRU for MemArg { op, ) } - &MemArg::Label(ref label) => label.show_rru(mb_rru), - &MemArg::PreIndexed(r, simm9) => format!( + &AMode::Label(ref label) => label.show_rru(mb_rru), + &AMode::PreIndexed(r, simm9) => format!( "[{}, {}]!", r.to_reg().show_rru(mb_rru), simm9.show_rru(mb_rru) ), - &MemArg::PostIndexed(r, simm9) => format!( + &AMode::PostIndexed(r, simm9) => format!( "[{}], {}", r.to_reg().show_rru(mb_rru), simm9.show_rru(mb_rru) ), // Eliminated by `mem_finalize()`. - &MemArg::SPOffset(..) - | &MemArg::FPOffset(..) - | &MemArg::NominalSPOffset(..) - | &MemArg::RegOffset(..) => { + &AMode::SPOffset(..) + | &AMode::FPOffset(..) + | &AMode::NominalSPOffset(..) + | &AMode::RegOffset(..) => { panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!") } } } } -impl ShowWithRRU for PairMemArg { +impl ShowWithRRU for PairAMode { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { match self { - &PairMemArg::SignedOffset(reg, simm7) => { + &PairAMode::SignedOffset(reg, simm7) => { if simm7.value != 0 { format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru)) } else { format!("[{}]", reg.show_rru(mb_rru)) } } - &PairMemArg::PreIndexed(reg, simm7) => format!( + &PairAMode::PreIndexed(reg, simm7) => format!( "[{}, {}]!", reg.to_reg().show_rru(mb_rru), simm7.show_rru(mb_rru) ), - &PairMemArg::PostIndexed(reg, simm7) => format!( + &PairAMode::PostIndexed(reg, simm7) => format!( "[{}], {}", reg.to_reg().show_rru(mb_rru), simm7.show_rru(mb_rru) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 9349f63bec..6dcfb56249 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -5,7 +5,7 @@ use crate::ir::constant::ConstantData; use crate::ir::types::*; use crate::ir::TrapCode; use crate::isa::aarch64::inst::*; -use crate::isa::aarch64::lower::ty_bits; +use crate::machinst::ty_bits; use regalloc::{Reg, RegClass, Writable}; @@ -26,22 +26,22 @@ pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 { /// of this amode. pub fn mem_finalize( insn_off: CodeOffset, - mem: &MemArg, + mem: &AMode, state: &EmitState, -) -> (SmallVec<[Inst; 4]>, MemArg) { +) -> (SmallVec<[Inst; 4]>, AMode) { match mem { - &MemArg::RegOffset(_, off, ty) - | &MemArg::SPOffset(off, ty) - | &MemArg::FPOffset(off, ty) - | &MemArg::NominalSPOffset(off, ty) => { + &AMode::RegOffset(_, off, ty) + | &AMode::SPOffset(off, ty) + | &AMode::FPOffset(off, ty) + | &AMode::NominalSPOffset(off, ty) => { let basereg = match mem { - &MemArg::RegOffset(reg, _, _) => reg, - &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(), - &MemArg::FPOffset(..) => fp_reg(), + &AMode::RegOffset(reg, _, _) => reg, + &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => stack_reg(), + &AMode::FPOffset(..) => fp_reg(), _ => unreachable!(), }; let adj = match mem { - &MemArg::NominalSPOffset(..) => { + &AMode::NominalSPOffset(..) => { debug!( "mem_finalize: nominal SP offset {} + adj {} -> {}", off, @@ -55,10 +55,10 @@ pub fn mem_finalize( let off = off + adj; if let Some(simm9) = SImm9::maybe_from_i64(off) { - let mem = MemArg::Unscaled(basereg, simm9); + let mem = AMode::Unscaled(basereg, simm9); (smallvec![], mem) } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) { - let mem = MemArg::UnsignedOffset(basereg, uimm12s); + let mem = AMode::UnsignedOffset(basereg, uimm12s); (smallvec![], mem) } else { let tmp = writable_spilltmp_reg(); @@ -75,13 +75,13 @@ pub fn mem_finalize( extendop: ExtendOp::UXTX, }; const_insts.push(add_inst); - (const_insts, MemArg::reg(tmp.to_reg())) + (const_insts, AMode::reg(tmp.to_reg())) } } - &MemArg::Label(ref label) => { + &AMode::Label(ref label) => { let off = memlabel_finalize(insn_off, label); - (smallvec![], MemArg::Label(MemLabel::PCRel(off))) + (smallvec![], AMode::Label(MemLabel::PCRel(off))) } _ => (smallvec![], mem.clone()), @@ -226,7 +226,7 @@ fn enc_ldst_reg( Some(ExtendOp::SXTW) => 0b110, Some(ExtendOp::SXTX) => 0b111, None => 0b011, // LSL - _ => panic!("bad extend mode for ld/st MemArg"), + _ => panic!("bad extend mode for ld/st AMode"), }; (op_31_22 << 22) | (1 << 21) @@ -780,32 +780,32 @@ impl MachInstEmit for Inst { } match &mem { - &MemArg::Unscaled(reg, simm9) => { + &AMode::Unscaled(reg, simm9) => { sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); } - &MemArg::UnsignedOffset(reg, uimm12scaled) => { + &AMode::UnsignedOffset(reg, uimm12scaled) => { if uimm12scaled.value() != 0 { assert_eq!(bits, ty_bits(uimm12scaled.scale_ty())); } sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); } - &MemArg::RegReg(r1, r2) => { + &AMode::RegReg(r1, r2) => { sink.put4(enc_ldst_reg( op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, )); } - &MemArg::RegScaled(r1, r2, ty) | &MemArg::RegScaledExtended(r1, r2, ty, _) => { + &AMode::RegScaled(r1, r2, ty) | &AMode::RegScaledExtended(r1, r2, ty, _) => { assert_eq!(bits, ty_bits(ty)); let extendop = match &mem { - &MemArg::RegScaled(..) => None, - &MemArg::RegScaledExtended(_, _, _, op) => Some(op), + &AMode::RegScaled(..) => None, + &AMode::RegScaledExtended(_, _, _, op) => Some(op), _ => unreachable!(), }; sink.put4(enc_ldst_reg( op, r1, r2, /* scaled = */ true, extendop, rd, )); } - &MemArg::RegExtended(r1, r2, extendop) => { + &AMode::RegExtended(r1, r2, extendop) => { sink.put4(enc_ldst_reg( op, r1, @@ -815,7 +815,7 @@ impl MachInstEmit for Inst { rd, )); } - &MemArg::Label(ref label) => { + &AMode::Label(ref label) => { let offset = match label { // cast i32 to u32 (two's-complement) &MemLabel::PCRel(off) => off as u32, @@ -843,17 +843,17 @@ impl MachInstEmit for Inst { _ => panic!("Unspported size for LDR from constant pool!"), } } - &MemArg::PreIndexed(reg, simm9) => { + &AMode::PreIndexed(reg, simm9) => { sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd)); } - &MemArg::PostIndexed(reg, simm9) => { + &AMode::PostIndexed(reg, simm9) => { sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); } // Eliminated by `mem_finalize()` above. - &MemArg::SPOffset(..) - | &MemArg::FPOffset(..) - | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"), - &MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"), + &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => { + panic!("Should not see stack-offset here!") + } + &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"), } } @@ -916,32 +916,31 @@ impl MachInstEmit for Inst { } match &mem { - &MemArg::Unscaled(reg, simm9) => { + &AMode::Unscaled(reg, simm9) => { sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); } - &MemArg::UnsignedOffset(reg, uimm12scaled) => { + &AMode::UnsignedOffset(reg, uimm12scaled) => { if uimm12scaled.value() != 0 { assert_eq!(bits, ty_bits(uimm12scaled.scale_ty())); } sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); } - &MemArg::RegReg(r1, r2) => { + &AMode::RegReg(r1, r2) => { sink.put4(enc_ldst_reg( op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, )); } - &MemArg::RegScaled(r1, r2, _ty) - | &MemArg::RegScaledExtended(r1, r2, _ty, _) => { + &AMode::RegScaled(r1, r2, _ty) | &AMode::RegScaledExtended(r1, r2, _ty, _) => { let extendop = match &mem { - &MemArg::RegScaled(..) => None, - &MemArg::RegScaledExtended(_, _, _, op) => Some(op), + &AMode::RegScaled(..) => None, + &AMode::RegScaledExtended(_, _, _, op) => Some(op), _ => unreachable!(), }; sink.put4(enc_ldst_reg( op, r1, r2, /* scaled = */ true, extendop, rd, )); } - &MemArg::RegExtended(r1, r2, extendop) => { + &AMode::RegExtended(r1, r2, extendop) => { sink.put4(enc_ldst_reg( op, r1, @@ -951,33 +950,33 @@ impl MachInstEmit for Inst { rd, )); } - &MemArg::Label(..) => { + &AMode::Label(..) => { panic!("Store to a MemLabel not implemented!"); } - &MemArg::PreIndexed(reg, simm9) => { + &AMode::PreIndexed(reg, simm9) => { sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd)); } - &MemArg::PostIndexed(reg, simm9) => { + &AMode::PostIndexed(reg, simm9) => { sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); } // Eliminated by `mem_finalize()` above. - &MemArg::SPOffset(..) - | &MemArg::FPOffset(..) - | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"), - &MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"), + &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => { + panic!("Should not see stack-offset here!") + } + &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"), } } &Inst::StoreP64 { rt, rt2, ref mem } => match mem { - &PairMemArg::SignedOffset(reg, simm7) => { + &PairAMode::SignedOffset(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2)); } - &PairMemArg::PreIndexed(reg, simm7) => { + &PairAMode::PreIndexed(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2)); } - &PairMemArg::PostIndexed(reg, simm7) => { + &PairAMode::PostIndexed(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2)); } @@ -986,15 +985,15 @@ impl MachInstEmit for Inst { let rt = rt.to_reg(); let rt2 = rt2.to_reg(); match mem { - &PairMemArg::SignedOffset(reg, simm7) => { + &PairAMode::SignedOffset(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2)); } - &PairMemArg::PreIndexed(reg, simm7) => { + &PairAMode::PreIndexed(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2)); } - &PairMemArg::PostIndexed(reg, simm7) => { + &PairAMode::PostIndexed(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2)); } @@ -1475,7 +1474,7 @@ impl MachInstEmit for Inst { &Inst::LoadFpuConst32 { rd, const_data } => { let inst = Inst::FpuLoad32 { rd, - mem: MemArg::Label(MemLabel::PCRel(8)), + mem: AMode::Label(MemLabel::PCRel(8)), srcloc: None, }; inst.emit(sink, flags, state); @@ -1488,7 +1487,7 @@ impl MachInstEmit for Inst { &Inst::LoadFpuConst64 { rd, const_data } => { let inst = Inst::FpuLoad64 { rd, - mem: MemArg::Label(MemLabel::PCRel(8)), + mem: AMode::Label(MemLabel::PCRel(8)), srcloc: None, }; inst.emit(sink, flags, state); @@ -1501,7 +1500,7 @@ impl MachInstEmit for Inst { &Inst::LoadFpuConst128 { rd, const_data } => { let inst = Inst::FpuLoad128 { rd, - mem: MemArg::Label(MemLabel::PCRel(8)), + mem: AMode::Label(MemLabel::PCRel(8)), srcloc: None, }; inst.emit(sink, flags, state); @@ -1970,7 +1969,7 @@ impl MachInstEmit for Inst { // Load value out of jump table let inst = Inst::SLoad32 { rd: rtmp2, - mem: MemArg::reg_plus_reg_scaled_extended( + mem: AMode::reg_plus_reg_scaled_extended( rtmp1.to_reg(), rtmp2.to_reg(), I32, @@ -2018,7 +2017,7 @@ impl MachInstEmit for Inst { &Inst::LoadConst64 { rd, const_data } => { let inst = Inst::ULoad64 { rd, - mem: MemArg::Label(MemLabel::PCRel(8)), + mem: AMode::Label(MemLabel::PCRel(8)), srcloc: None, // can't cause a user trap. }; inst.emit(sink, flags, state); @@ -2036,7 +2035,7 @@ impl MachInstEmit for Inst { } => { let inst = Inst::ULoad64 { rd, - mem: MemArg::Label(MemLabel::PCRel(8)), + mem: AMode::Label(MemLabel::PCRel(8)), srcloc: None, // can't cause a user trap. }; inst.emit(sink, flags, state); @@ -2058,8 +2057,8 @@ impl MachInstEmit for Inst { } let (reg, offset) = match mem { - MemArg::Unscaled(r, simm9) => (r, simm9.value()), - MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32), + AMode::Unscaled(r, simm9) => (r, simm9.value()), + AMode::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32), _ => panic!("Unsupported case for LoadAddr: {:?}", mem), }; let abs_offset = if offset < 0 { @@ -2085,7 +2084,7 @@ impl MachInstEmit for Inst { }; add.emit(sink, flags, state); } else { - // Use `tmp2` here: `reg` may be `spilltmp` if the `MemArg` on this instruction + // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note // that no other instructions will be inserted here (we're emitting directly), // and a live range of `tmp2` should not span this instruction, so this use diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 1d6e2070b0..2b2f48f802 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1079,7 +1079,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad8 { rd: writable_xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), srcloc: None, }, "41004038", @@ -1088,7 +1088,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad8 { rd: writable_xreg(1), - mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::zero(I8)), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::zero(I8)), srcloc: None, }, "41004039", @@ -1097,7 +1097,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad8 { rd: writable_xreg(1), - mem: MemArg::RegReg(xreg(2), xreg(5)), + mem: AMode::RegReg(xreg(2), xreg(5)), srcloc: None, }, "41686538", @@ -1106,7 +1106,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::SLoad8 { rd: writable_xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), srcloc: None, }, "41008038", @@ -1115,7 +1115,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::SLoad8 { rd: writable_xreg(1), - mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(63, I8).unwrap()), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(63, I8).unwrap()), srcloc: None, }, "41FC8039", @@ -1124,7 +1124,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::SLoad8 { rd: writable_xreg(1), - mem: MemArg::RegReg(xreg(2), xreg(5)), + mem: AMode::RegReg(xreg(2), xreg(5)), srcloc: None, }, "4168A538", @@ -1133,7 +1133,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad16 { rd: writable_xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::maybe_from_i64(5).unwrap()), + mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(5).unwrap()), srcloc: None, }, "41504078", @@ -1142,7 +1142,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad16 { rd: writable_xreg(1), - mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8, I16).unwrap()), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8, I16).unwrap()), srcloc: None, }, "41104079", @@ -1151,7 +1151,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad16 { rd: writable_xreg(1), - mem: MemArg::RegScaled(xreg(2), xreg(3), I16), + mem: AMode::RegScaled(xreg(2), xreg(3), I16), srcloc: None, }, "41786378", @@ -1160,7 +1160,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::SLoad16 { rd: writable_xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), srcloc: None, }, "41008078", @@ -1169,7 +1169,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::SLoad16 { rd: writable_xreg(28), - mem: MemArg::UnsignedOffset(xreg(20), UImm12Scaled::maybe_from_i64(24, I16).unwrap()), + mem: AMode::UnsignedOffset(xreg(20), UImm12Scaled::maybe_from_i64(24, I16).unwrap()), srcloc: None, }, "9C328079", @@ -1178,7 +1178,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::SLoad16 { rd: writable_xreg(28), - mem: MemArg::RegScaled(xreg(20), xreg(20), I16), + mem: AMode::RegScaled(xreg(20), xreg(20), I16), srcloc: None, }, "9C7AB478", @@ -1187,7 +1187,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad32 { rd: writable_xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), srcloc: None, }, "410040B8", @@ -1196,7 +1196,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad32 { rd: writable_xreg(12), - mem: MemArg::UnsignedOffset(xreg(0), UImm12Scaled::maybe_from_i64(204, I32).unwrap()), + mem: AMode::UnsignedOffset(xreg(0), UImm12Scaled::maybe_from_i64(204, I32).unwrap()), srcloc: None, }, "0CCC40B9", @@ -1205,7 +1205,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad32 { rd: writable_xreg(1), - mem: MemArg::RegScaled(xreg(2), xreg(12), I32), + mem: AMode::RegScaled(xreg(2), xreg(12), I32), srcloc: None, }, "41786CB8", @@ -1214,7 +1214,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::SLoad32 { rd: writable_xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), srcloc: None, }, "410080B8", @@ -1223,7 +1223,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::SLoad32 { rd: writable_xreg(12), - mem: MemArg::UnsignedOffset(xreg(1), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()), + mem: AMode::UnsignedOffset(xreg(1), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()), srcloc: None, }, "2CFCBFB9", @@ -1232,7 +1232,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::SLoad32 { rd: writable_xreg(1), - mem: MemArg::RegScaled(xreg(5), xreg(1), I32), + mem: AMode::RegScaled(xreg(5), xreg(1), I32), srcloc: None, }, "A178A1B8", @@ -1241,7 +1241,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), srcloc: None, }, "410040F8", @@ -1250,7 +1250,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::maybe_from_i64(-256).unwrap()), + mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(-256).unwrap()), srcloc: None, }, "410050F8", @@ -1259,7 +1259,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::maybe_from_i64(255).unwrap()), + mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(255).unwrap()), srcloc: None, }, "41F04FF8", @@ -1268,7 +1268,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()), srcloc: None, }, "41FC7FF9", @@ -1277,7 +1277,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::RegReg(xreg(2), xreg(3)), + mem: AMode::RegReg(xreg(2), xreg(3)), srcloc: None, }, "416863F8", @@ -1286,7 +1286,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::RegScaled(xreg(2), xreg(3), I64), + mem: AMode::RegScaled(xreg(2), xreg(3), I64), srcloc: None, }, "417863F8", @@ -1295,7 +1295,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::SXTW), + mem: AMode::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::SXTW), srcloc: None, }, "41D863F8", @@ -1304,7 +1304,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::RegExtended(xreg(2), xreg(3), ExtendOp::SXTW), + mem: AMode::RegExtended(xreg(2), xreg(3), ExtendOp::SXTW), srcloc: None, }, "41C863F8", @@ -1313,7 +1313,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::Label(MemLabel::PCRel(64)), + mem: AMode::Label(MemLabel::PCRel(64)), srcloc: None, }, "01020058", @@ -1322,7 +1322,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), srcloc: None, }, "410C41F8", @@ -1331,7 +1331,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), srcloc: None, }, "410441F8", @@ -1340,7 +1340,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(32768, I8), + mem: AMode::FPOffset(32768, I8), srcloc: None, }, "100090D2B063308B010240F9", @@ -1349,7 +1349,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(-32768, I8), + mem: AMode::FPOffset(-32768, I8), srcloc: None, }, "F0FF8F92B063308B010240F9", @@ -1358,7 +1358,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(1048576, I8), // 2^20 + mem: AMode::FPOffset(1048576, I8), // 2^20 srcloc: None, }, "1002A0D2B063308B010240F9", @@ -1367,7 +1367,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(1048576 + 1, I8), // 2^20 + 1 + mem: AMode::FPOffset(1048576 + 1, I8), // 2^20 + 1 srcloc: None, }, "300080D21002A0F2B063308B010240F9", @@ -1377,7 +1377,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::RegOffset(xreg(7), 8, I64), + mem: AMode::RegOffset(xreg(7), 8, I64), srcloc: None, }, "E18040F8", @@ -1387,7 +1387,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::RegOffset(xreg(7), 1024, I64), + mem: AMode::RegOffset(xreg(7), 1024, I64), srcloc: None, }, "E10042F9", @@ -1397,7 +1397,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::RegOffset(xreg(7), 1048576, I64), + mem: AMode::RegOffset(xreg(7), 1048576, I64), srcloc: None, }, "1002A0D2F060308B010240F9", @@ -1407,7 +1407,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store8 { rd: xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), srcloc: None, }, "41000038", @@ -1416,7 +1416,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store8 { rd: xreg(1), - mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(4095, I8).unwrap()), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(4095, I8).unwrap()), srcloc: None, }, "41FC3F39", @@ -1425,7 +1425,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store16 { rd: xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), srcloc: None, }, "41000078", @@ -1434,7 +1434,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store16 { rd: xreg(1), - mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8190, I16).unwrap()), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8190, I16).unwrap()), srcloc: None, }, "41FC3F79", @@ -1443,7 +1443,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store32 { rd: xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), srcloc: None, }, "410000B8", @@ -1452,7 +1452,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store32 { rd: xreg(1), - mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()), srcloc: None, }, "41FC3FB9", @@ -1461,7 +1461,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store64 { rd: xreg(1), - mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), srcloc: None, }, "410000F8", @@ -1470,7 +1470,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store64 { rd: xreg(1), - mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()), srcloc: None, }, "41FC3FF9", @@ -1479,7 +1479,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store64 { rd: xreg(1), - mem: MemArg::RegReg(xreg(2), xreg(3)), + mem: AMode::RegReg(xreg(2), xreg(3)), srcloc: None, }, "416823F8", @@ -1488,7 +1488,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store64 { rd: xreg(1), - mem: MemArg::RegScaled(xreg(2), xreg(3), I64), + mem: AMode::RegScaled(xreg(2), xreg(3), I64), srcloc: None, }, "417823F8", @@ -1497,7 +1497,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store64 { rd: xreg(1), - mem: MemArg::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::UXTW), + mem: AMode::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::UXTW), srcloc: None, }, "415823F8", @@ -1506,7 +1506,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store64 { rd: xreg(1), - mem: MemArg::RegExtended(xreg(2), xreg(3), ExtendOp::UXTW), + mem: AMode::RegExtended(xreg(2), xreg(3), ExtendOp::UXTW), srcloc: None, }, "414823F8", @@ -1515,7 +1515,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store64 { rd: xreg(1), - mem: MemArg::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), srcloc: None, }, "410C01F8", @@ -1524,7 +1524,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::Store64 { rd: xreg(1), - mem: MemArg::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), srcloc: None, }, "410401F8", @@ -1535,7 +1535,7 @@ fn test_aarch64_binemit() { Inst::StoreP64 { rt: xreg(8), rt2: xreg(9), - mem: PairMemArg::SignedOffset(xreg(10), SImm7Scaled::zero(I64)), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::zero(I64)), }, "482500A9", "stp x8, x9, [x10]", @@ -1544,7 +1544,7 @@ fn test_aarch64_binemit() { Inst::StoreP64 { rt: xreg(8), rt2: xreg(9), - mem: PairMemArg::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()), }, "48A51FA9", "stp x8, x9, [x10, #504]", @@ -1553,7 +1553,7 @@ fn test_aarch64_binemit() { Inst::StoreP64 { rt: xreg(8), rt2: xreg(9), - mem: PairMemArg::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()), }, "48253CA9", "stp x8, x9, [x10, #-64]", @@ -1562,7 +1562,7 @@ fn test_aarch64_binemit() { Inst::StoreP64 { rt: xreg(21), rt2: xreg(28), - mem: PairMemArg::SignedOffset(xreg(1), SImm7Scaled::maybe_from_i64(-512, I64).unwrap()), + mem: PairAMode::SignedOffset(xreg(1), SImm7Scaled::maybe_from_i64(-512, I64).unwrap()), }, "357020A9", "stp x21, x28, [x1, #-512]", @@ -1571,7 +1571,7 @@ fn test_aarch64_binemit() { Inst::StoreP64 { rt: xreg(8), rt2: xreg(9), - mem: PairMemArg::PreIndexed( + mem: PairAMode::PreIndexed( writable_xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap(), ), @@ -1583,7 +1583,7 @@ fn test_aarch64_binemit() { Inst::StoreP64 { rt: xreg(15), rt2: xreg(16), - mem: PairMemArg::PostIndexed( + mem: PairAMode::PostIndexed( writable_xreg(20), SImm7Scaled::maybe_from_i64(504, I64).unwrap(), ), @@ -1596,7 +1596,7 @@ fn test_aarch64_binemit() { Inst::LoadP64 { rt: writable_xreg(8), rt2: writable_xreg(9), - mem: PairMemArg::SignedOffset(xreg(10), SImm7Scaled::zero(I64)), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::zero(I64)), }, "482540A9", "ldp x8, x9, [x10]", @@ -1605,7 +1605,7 @@ fn test_aarch64_binemit() { Inst::LoadP64 { rt: writable_xreg(8), rt2: writable_xreg(9), - mem: PairMemArg::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()), }, "48A55FA9", "ldp x8, x9, [x10, #504]", @@ -1614,7 +1614,7 @@ fn test_aarch64_binemit() { Inst::LoadP64 { rt: writable_xreg(8), rt2: writable_xreg(9), - mem: PairMemArg::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()), }, "48257CA9", "ldp x8, x9, [x10, #-64]", @@ -1623,10 +1623,7 @@ fn test_aarch64_binemit() { Inst::LoadP64 { rt: writable_xreg(8), rt2: writable_xreg(9), - mem: PairMemArg::SignedOffset( - xreg(10), - SImm7Scaled::maybe_from_i64(-512, I64).unwrap(), - ), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-512, I64).unwrap()), }, "482560A9", "ldp x8, x9, [x10, #-512]", @@ -1635,7 +1632,7 @@ fn test_aarch64_binemit() { Inst::LoadP64 { rt: writable_xreg(8), rt2: writable_xreg(9), - mem: PairMemArg::PreIndexed( + mem: PairAMode::PreIndexed( writable_xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap(), ), @@ -1647,7 +1644,7 @@ fn test_aarch64_binemit() { Inst::LoadP64 { rt: writable_xreg(8), rt2: writable_xreg(25), - mem: PairMemArg::PostIndexed( + mem: PairAMode::PostIndexed( writable_xreg(12), SImm7Scaled::maybe_from_i64(504, I64).unwrap(), ), @@ -4143,7 +4140,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::FpuLoad32 { rd: writable_vreg(16), - mem: MemArg::RegScaled(xreg(8), xreg(9), F32), + mem: AMode::RegScaled(xreg(8), xreg(9), F32), srcloc: None, }, "107969BC", @@ -4153,7 +4150,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::FpuLoad64 { rd: writable_vreg(16), - mem: MemArg::RegScaled(xreg(8), xreg(9), F64), + mem: AMode::RegScaled(xreg(8), xreg(9), F64), srcloc: None, }, "107969FC", @@ -4163,7 +4160,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::FpuLoad128 { rd: writable_vreg(16), - mem: MemArg::RegScaled(xreg(8), xreg(9), I128), + mem: AMode::RegScaled(xreg(8), xreg(9), I128), srcloc: None, }, "1079E93C", @@ -4173,7 +4170,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::FpuLoad32 { rd: writable_vreg(16), - mem: MemArg::Label(MemLabel::PCRel(8)), + mem: AMode::Label(MemLabel::PCRel(8)), srcloc: None, }, "5000001C", @@ -4183,7 +4180,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::FpuLoad64 { rd: writable_vreg(16), - mem: MemArg::Label(MemLabel::PCRel(8)), + mem: AMode::Label(MemLabel::PCRel(8)), srcloc: None, }, "5000005C", @@ -4193,7 +4190,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::FpuLoad128 { rd: writable_vreg(16), - mem: MemArg::Label(MemLabel::PCRel(8)), + mem: AMode::Label(MemLabel::PCRel(8)), srcloc: None, }, "5000009C", @@ -4203,7 +4200,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::FpuStore32 { rd: vreg(16), - mem: MemArg::RegScaled(xreg(8), xreg(9), F32), + mem: AMode::RegScaled(xreg(8), xreg(9), F32), srcloc: None, }, "107929BC", @@ -4213,7 +4210,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::FpuStore64 { rd: vreg(16), - mem: MemArg::RegScaled(xreg(8), xreg(9), F64), + mem: AMode::RegScaled(xreg(8), xreg(9), F64), srcloc: None, }, "107929FC", @@ -4223,7 +4220,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::FpuStore128 { rd: vreg(16), - mem: MemArg::RegScaled(xreg(8), xreg(9), I128), + mem: AMode::RegScaled(xreg(8), xreg(9), I128), srcloc: None, }, "1079A93C", diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 949189e8d3..27868f96dc 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -463,68 +463,68 @@ pub enum Inst { /// An unsigned (zero-extending) 8-bit load. ULoad8 { rd: Writable, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// A signed (sign-extending) 8-bit load. SLoad8 { rd: Writable, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// An unsigned (zero-extending) 16-bit load. ULoad16 { rd: Writable, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// A signed (sign-extending) 16-bit load. SLoad16 { rd: Writable, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// An unsigned (zero-extending) 32-bit load. ULoad32 { rd: Writable, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// A signed (sign-extending) 32-bit load. SLoad32 { rd: Writable, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// A 64-bit load. ULoad64 { rd: Writable, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// An 8-bit store. Store8 { rd: Reg, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// A 16-bit store. Store16 { rd: Reg, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// A 32-bit store. Store32 { rd: Reg, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// A 64-bit store. Store64 { rd: Reg, - mem: MemArg, + mem: AMode, srcloc: Option, }, @@ -532,13 +532,13 @@ pub enum Inst { StoreP64 { rt: Reg, rt2: Reg, - mem: PairMemArg, + mem: PairAMode, }, /// A load of a pair of registers. LoadP64 { rt: Writable, rt2: Writable, - mem: PairMemArg, + mem: PairAMode, }, /// A MOV instruction. These are encoded as ORR's (AluRRR form) but we @@ -734,37 +734,37 @@ pub enum Inst { /// Floating-point load, single-precision (32 bit). FpuLoad32 { rd: Writable, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// Floating-point store, single-precision (32 bit). FpuStore32 { rd: Reg, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// Floating-point load, double-precision (64 bit). FpuLoad64 { rd: Writable, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// Floating-point store, double-precision (64 bit). FpuStore64 { rd: Reg, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// Floating-point/vector load, 128 bit. FpuLoad128 { rd: Writable, - mem: MemArg, + mem: AMode, srcloc: Option, }, /// Floating-point/vector store, 128 bit. FpuStore128 { rd: Reg, - mem: MemArg, + mem: AMode, srcloc: Option, }, @@ -1050,11 +1050,11 @@ pub enum Inst { /// Load address referenced by `mem` into `rd`. LoadAddr { rd: Writable, - mem: MemArg, + mem: AMode, }, /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This - /// controls how MemArg::NominalSPOffset args are lowered. + /// controls how AMode::NominalSPOffset args are lowered. VirtualSPOffsetAdj { offset: i64, }, @@ -1215,45 +1215,119 @@ impl Inst { const_data: value, } } + + /// Generic constructor for a load (zero-extending where appropriate). + pub fn gen_load(into_reg: Writable, mem: AMode, ty: Type) -> Inst { + match ty { + B1 | B8 | I8 => Inst::ULoad8 { + rd: into_reg, + mem, + srcloc: None, + }, + B16 | I16 => Inst::ULoad16 { + rd: into_reg, + mem, + srcloc: None, + }, + B32 | I32 | R32 => Inst::ULoad32 { + rd: into_reg, + mem, + srcloc: None, + }, + B64 | I64 | R64 => Inst::ULoad64 { + rd: into_reg, + mem, + srcloc: None, + }, + F32 => Inst::FpuLoad32 { + rd: into_reg, + mem, + srcloc: None, + }, + F64 => Inst::FpuLoad64 { + rd: into_reg, + mem, + srcloc: None, + }, + _ => unimplemented!("gen_load({})", ty), + } + } + + /// Generic constructor for a store. + pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type) -> Inst { + match ty { + B1 | B8 | I8 => Inst::Store8 { + rd: from_reg, + mem, + srcloc: None, + }, + B16 | I16 => Inst::Store16 { + rd: from_reg, + mem, + srcloc: None, + }, + B32 | I32 | R32 => Inst::Store32 { + rd: from_reg, + mem, + srcloc: None, + }, + B64 | I64 | R64 => Inst::Store64 { + rd: from_reg, + mem, + srcloc: None, + }, + F32 => Inst::FpuStore32 { + rd: from_reg, + mem, + srcloc: None, + }, + F64 => Inst::FpuStore64 { + rd: from_reg, + mem, + srcloc: None, + }, + _ => unimplemented!("gen_store({})", ty), + } + } } //============================================================================= // Instructions: get_regs -fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) { +fn memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector) { match memarg { - &MemArg::Unscaled(reg, ..) | &MemArg::UnsignedOffset(reg, ..) => { + &AMode::Unscaled(reg, ..) | &AMode::UnsignedOffset(reg, ..) => { collector.add_use(reg); } - &MemArg::RegReg(r1, r2, ..) - | &MemArg::RegScaled(r1, r2, ..) - | &MemArg::RegScaledExtended(r1, r2, ..) - | &MemArg::RegExtended(r1, r2, ..) => { + &AMode::RegReg(r1, r2, ..) + | &AMode::RegScaled(r1, r2, ..) + | &AMode::RegScaledExtended(r1, r2, ..) + | &AMode::RegExtended(r1, r2, ..) => { collector.add_use(r1); collector.add_use(r2); } - &MemArg::Label(..) => {} - &MemArg::PreIndexed(reg, ..) | &MemArg::PostIndexed(reg, ..) => { + &AMode::Label(..) => {} + &AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => { collector.add_mod(reg); } - &MemArg::FPOffset(..) => { + &AMode::FPOffset(..) => { collector.add_use(fp_reg()); } - &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => { + &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => { collector.add_use(stack_reg()); } - &MemArg::RegOffset(r, ..) => { + &AMode::RegOffset(r, ..) => { collector.add_use(r); } } } -fn pairmemarg_regs(pairmemarg: &PairMemArg, collector: &mut RegUsageCollector) { +fn pairmemarg_regs(pairmemarg: &PairAMode, collector: &mut RegUsageCollector) { match pairmemarg { - &PairMemArg::SignedOffset(reg, ..) => { + &PairAMode::SignedOffset(reg, ..) => { collector.add_use(reg); } - &PairMemArg::PreIndexed(reg, ..) | &PairMemArg::PostIndexed(reg, ..) => { + &PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => { collector.add_mod(reg); } } @@ -1627,36 +1701,36 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { } } - fn map_mem(m: &RUM, mem: &mut MemArg) { + fn map_mem(m: &RUM, mem: &mut AMode) { // N.B.: we take only the pre-map here, but this is OK because the // only addressing modes that update registers (pre/post-increment on // AArch64) both read and write registers, so they are "mods" rather // than "defs", so must be the same in both the pre- and post-map. match mem { - &mut MemArg::Unscaled(ref mut reg, ..) => map_use(m, reg), - &mut MemArg::UnsignedOffset(ref mut reg, ..) => map_use(m, reg), - &mut MemArg::RegReg(ref mut r1, ref mut r2) - | &mut MemArg::RegScaled(ref mut r1, ref mut r2, ..) - | &mut MemArg::RegScaledExtended(ref mut r1, ref mut r2, ..) - | &mut MemArg::RegExtended(ref mut r1, ref mut r2, ..) => { + &mut AMode::Unscaled(ref mut reg, ..) => map_use(m, reg), + &mut AMode::UnsignedOffset(ref mut reg, ..) => map_use(m, reg), + &mut AMode::RegReg(ref mut r1, ref mut r2) + | &mut AMode::RegScaled(ref mut r1, ref mut r2, ..) + | &mut AMode::RegScaledExtended(ref mut r1, ref mut r2, ..) + | &mut AMode::RegExtended(ref mut r1, ref mut r2, ..) => { map_use(m, r1); map_use(m, r2); } - &mut MemArg::Label(..) => {} - &mut MemArg::PreIndexed(ref mut r, ..) => map_mod(m, r), - &mut MemArg::PostIndexed(ref mut r, ..) => map_mod(m, r), - &mut MemArg::FPOffset(..) - | &mut MemArg::SPOffset(..) - | &mut MemArg::NominalSPOffset(..) => {} - &mut MemArg::RegOffset(ref mut r, ..) => map_use(m, r), + &mut AMode::Label(..) => {} + &mut AMode::PreIndexed(ref mut r, ..) => map_mod(m, r), + &mut AMode::PostIndexed(ref mut r, ..) => map_mod(m, r), + &mut AMode::FPOffset(..) + | &mut AMode::SPOffset(..) + | &mut AMode::NominalSPOffset(..) => {} + &mut AMode::RegOffset(ref mut r, ..) => map_use(m, r), }; } - fn map_pairmem(m: &RUM, mem: &mut PairMemArg) { + fn map_pairmem(m: &RUM, mem: &mut PairAMode) { match mem { - &mut PairMemArg::SignedOffset(ref mut reg, ..) => map_use(m, reg), - &mut PairMemArg::PreIndexed(ref mut reg, ..) => map_def(m, reg), - &mut PairMemArg::PostIndexed(ref mut reg, ..) => map_def(m, reg), + &mut PairAMode::SignedOffset(ref mut reg, ..) => map_use(m, reg), + &mut PairAMode::PreIndexed(ref mut reg, ..) => map_def(m, reg), + &mut PairAMode::PostIndexed(ref mut reg, ..) => map_def(m, reg), } } @@ -2432,10 +2506,10 @@ impl MachInst for Inst { // Pretty-printing of instructions. fn mem_finalize_for_show( - mem: &MemArg, + mem: &AMode, mb_rru: Option<&RealRegUniverse>, state: &EmitState, -) -> (String, MemArg) { +) -> (String, AMode) { let (mem_insts, mem) = mem_finalize(0, mem, state); let mut mem_str = mem_insts .into_iter() @@ -2646,7 +2720,7 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let is_unscaled = match &mem { - &MemArg::Unscaled(..) => true, + &AMode::Unscaled(..) => true, _ => false, }; let (op, size) = match (self, is_unscaled) { @@ -2694,7 +2768,7 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let is_unscaled = match &mem { - &MemArg::Unscaled(..) => true, + &AMode::Unscaled(..) => true, _ => false, }; let (op, size) = match (self, is_unscaled) { @@ -3350,8 +3424,8 @@ impl Inst { ret.push_str(&inst.show_rru(mb_rru)); } let (reg, offset) = match mem { - MemArg::Unscaled(r, simm9) => (r, simm9.value()), - MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32), + AMode::Unscaled(r, simm9) => (r, simm9.value()), + AMode::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32), _ => panic!("Unsupported case for LoadAddr: {:?}", mem), }; let abs_offset = if offset < 0 { diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 4ec0871eb1..07c6b27281 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -663,7 +663,7 @@ pub(crate) fn lower_address>( elem_ty: Type, roots: &[InsnInput], offset: i32, -) -> MemArg { +) -> AMode { // TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or // mul instructions (Load/StoreComplex don't include scale factors). @@ -680,26 +680,26 @@ pub(crate) fn lower_address>( offset ); - // First, decide what the `MemArg` will be. Take one extendee and one 64-bit + // First, decide what the `AMode` will be. Take one extendee and one 64-bit // reg, or two 64-bit regs, or a 64-bit reg and a 32-bit reg with extension, // or some other combination as appropriate. let memarg = if addends64.len() > 0 { if addends32.len() > 0 { let (reg32, extendop) = addends32.pop().unwrap(); let reg64 = addends64.pop().unwrap(); - MemArg::RegExtended(reg64, reg32, extendop) + AMode::RegExtended(reg64, reg32, extendop) } else if offset > 0 && offset < 0x1000 { let reg64 = addends64.pop().unwrap(); let off = offset; offset = 0; - MemArg::RegOffset(reg64, off, elem_ty) + AMode::RegOffset(reg64, off, elem_ty) } else if addends64.len() >= 2 { let reg1 = addends64.pop().unwrap(); let reg2 = addends64.pop().unwrap(); - MemArg::RegReg(reg1, reg2) + AMode::RegReg(reg1, reg2) } else { let reg1 = addends64.pop().unwrap(); - MemArg::reg(reg1) + AMode::reg(reg1) } } else /* addends64.len() == 0 */ @@ -720,9 +720,9 @@ pub(crate) fn lower_address>( to_bits: 64, }); if let Some((reg2, extendop)) = addends32.pop() { - MemArg::RegExtended(tmp.to_reg(), reg2, extendop) + AMode::RegExtended(tmp.to_reg(), reg2, extendop) } else { - MemArg::reg(tmp.to_reg()) + AMode::reg(tmp.to_reg()) } } else /* addends32.len() == 0 */ @@ -730,32 +730,32 @@ pub(crate) fn lower_address>( let off_reg = ctx.alloc_tmp(RegClass::I64, I64); lower_constant_u64(ctx, off_reg, offset as u64); offset = 0; - MemArg::reg(off_reg.to_reg()) + AMode::reg(off_reg.to_reg()) } }; // At this point, if we have any remaining components, we need to allocate a - // temp, replace one of the registers in the MemArg with the temp, and emit + // temp, replace one of the registers in the AMode with the temp, and emit // instructions to add together the remaining components. Return immediately // if this is *not* the case. if offset == 0 && addends32.len() == 0 && addends64.len() == 0 { return memarg; } - // Allocate the temp and shoehorn it into the MemArg. + // Allocate the temp and shoehorn it into the AMode. let addr = ctx.alloc_tmp(RegClass::I64, I64); let (reg, memarg) = match memarg { - MemArg::RegExtended(r1, r2, extendop) => { - (r1, MemArg::RegExtended(addr.to_reg(), r2, extendop)) + AMode::RegExtended(r1, r2, extendop) => { + (r1, AMode::RegExtended(addr.to_reg(), r2, extendop)) } - MemArg::RegOffset(r, off, ty) => (r, MemArg::RegOffset(addr.to_reg(), off, ty)), - MemArg::RegReg(r1, r2) => (r2, MemArg::RegReg(addr.to_reg(), r1)), - MemArg::UnsignedOffset(r, imm) => (r, MemArg::UnsignedOffset(addr.to_reg(), imm)), + AMode::RegOffset(r, off, ty) => (r, AMode::RegOffset(addr.to_reg(), off, ty)), + AMode::RegReg(r1, r2) => (r2, AMode::RegReg(addr.to_reg(), r1)), + AMode::UnsignedOffset(r, imm) => (r, AMode::UnsignedOffset(addr.to_reg(), imm)), _ => unreachable!(), }; // If there is any offset, load that first into `addr`, and add the `reg` - // that we kicked out of the `MemArg`; otherwise, start with that reg. + // that we kicked out of the `AMode`; otherwise, start with that reg. if offset != 0 { // If we can fit offset or -offset in an imm12, use an add-imm // to combine the reg and offset. Otherwise, load value first then add. @@ -994,37 +994,6 @@ pub(crate) fn condcode_is_signed(cc: IntCC) -> bool { //============================================================================= // Helpers for instruction lowering. -/// Returns the size (in bits) of a given type. -pub fn ty_bits(ty: Type) -> usize { - match ty { - B1 => 1, - B8 | I8 => 8, - B16 | I16 => 16, - B32 | I32 | F32 | R32 => 32, - B64 | I64 | F64 | R64 => 64, - B128 | I128 => 128, - IFLAGS | FFLAGS => 32, - B8X8 | I8X8 | B16X4 | I16X4 | B32X2 | I32X2 => 64, - B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 => 128, - F32X4 | F64X2 => 128, - _ => panic!("ty_bits() on unknown type: {:?}", ty), - } -} - -pub(crate) fn ty_is_int(ty: Type) -> bool { - match ty { - B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 | R32 | R64 => true, - F32 | F64 | B128 | F32X2 | F32X4 | F64X2 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 - | I32X4 | I64X2 => false, - IFLAGS | FFLAGS => panic!("Unexpected flags type"), - _ => panic!("ty_is_int() on unknown type: {:?}", ty), - } -} - -pub(crate) fn ty_is_float(ty: Type) -> bool { - !ty_is_int(ty) -} - pub(crate) fn choose_32_64(ty: Type, op32: T, op64: T) -> T { let bits = ty_bits(ty); if bits <= 32 { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 25dc268dec..5fe62da697 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1010,7 +1010,7 @@ pub(crate) fn lower_insn_to_regs>( | Opcode::Sload32Complex => true, _ => false, }; - let is_float = ty_is_float(elem_ty); + let is_float = ty_has_float_or_vec_representation(elem_ty); let mem = lower_address(ctx, elem_ty, &inputs[..], off); let rd = get_output_reg(ctx, outputs[0]); @@ -1074,7 +1074,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0), _ => unreachable!(), }; - let is_float = ty_is_float(elem_ty); + let is_float = ty_has_float_or_vec_representation(elem_ty); let mem = lower_address(ctx, elem_ty, &inputs[1..], off); let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); @@ -1291,9 +1291,10 @@ pub(crate) fn lower_insn_to_regs>( let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); let ty = ctx.output_ty(insn, 0); let bits = ty_bits(ty); - if ty_is_float(ty) && bits == 32 { + let is_float = ty_has_float_or_vec_representation(ty); + if is_float && bits == 32 { ctx.emit(Inst::FpuCSel32 { cond, rd, rn, rm }); - } else if ty_is_float(ty) && bits == 64 { + } else if is_float && bits == 64 { ctx.emit(Inst::FpuCSel64 { cond, rd, rn, rm }); } else { ctx.emit(Inst::CSel { cond, rd, rn, rm }); @@ -1315,9 +1316,10 @@ pub(crate) fn lower_insn_to_regs>( let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); let ty = ctx.output_ty(insn, 0); let bits = ty_bits(ty); - if ty_is_float(ty) && bits == 32 { + let is_float = ty_has_float_or_vec_representation(ty); + if is_float && bits == 32 { ctx.emit(Inst::FpuCSel32 { cond, rd, rn, rm }); - } else if ty_is_float(ty) && bits == 64 { + } else if is_float && bits == 64 { ctx.emit(Inst::FpuCSel64 { cond, rd, rn, rm }); } else { ctx.emit(Inst::CSel { cond, rd, rn, rm }); @@ -1521,7 +1523,9 @@ pub(crate) fn lower_insn_to_regs>( let rd = get_output_reg(ctx, outputs[0]); let ity = ctx.input_ty(insn, 0); let oty = ctx.output_ty(insn, 0); - match (ty_is_float(ity), ty_is_float(oty)) { + let ity_vec_reg = ty_has_float_or_vec_representation(ity); + let oty_vec_reg = ty_has_float_or_vec_representation(oty); + match (ity_vec_reg, oty_vec_reg) { (true, true) => { let narrow_mode = if ty_bits(ity) <= 32 && ty_bits(oty) <= 32 { NarrowValueMode::ZeroExtend32 @@ -1809,7 +1813,7 @@ pub(crate) fn lower_insn_to_regs>( let size = VectorSize::from_ty(ctx.input_ty(insn, 0)); let ty = ty.unwrap(); - if ty_is_int(ty) { + if ty_has_int_representation(ty) { ctx.emit(Inst::MovFromVec { rd, rn, idx, size }); // Plain moves are faster on some processors. } else if idx == 0 { @@ -1837,7 +1841,7 @@ pub(crate) fn lower_insn_to_regs>( ctx.emit(Inst::gen_move(rd, rm, ty)); - if ty_is_int(input_ty) { + if ty_has_int_representation(input_ty) { ctx.emit(Inst::MovToVec { rd, rn, idx, size }); } else { ctx.emit(Inst::VecMovElement { @@ -1855,7 +1859,7 @@ pub(crate) fn lower_insn_to_regs>( let rd = get_output_reg(ctx, outputs[0]); let input_ty = ctx.input_ty(insn, 0); let size = VectorSize::from_ty(ty.unwrap()); - let inst = if ty_is_int(input_ty) { + let inst = if ty_has_int_representation(input_ty) { Inst::VecDup { rd, rn, size } } else { Inst::VecDupFromFpu { rd, rn, size } diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs new file mode 100644 index 0000000000..fdf9409ae5 --- /dev/null +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -0,0 +1,1160 @@ +//! Implementation of a vanilla ABI, shared between several machines. The +//! implementation here assumes that arguments will be passed in registers +//! first, then additional args on the stack; that the stack grows downward, +//! contains a standard frame (return address and frame pointer), and the +//! compiler is otherwise free to allocate space below that with its choice of +//! layout; and that the machine has some notion of caller- and callee-save +//! registers. Most modern machines, e.g. x86-64 and AArch64, should fit this +//! mold and thus both of these backends use this shared implementation. +//! +//! See the documentation in specific machine backends for the "instantiation" +//! of this generic ABI, i.e., which registers are caller/callee-save, arguments +//! and return values, and any other special requirements. +//! +//! For now the implementation here assumes a 64-bit machine, but we intend to +//! make this 32/64-bit-generic shortly. +//! +//! # Vanilla ABI +//! +//! First, arguments and return values are passed in registers up to a certain +//! fixed count, after which they overflow onto the stack. Multiple return +//! values either fit in registers, or are returned in a separate return-value +//! area on the stack, given by a hidden extra parameter. +//! +//! Note that the exact stack layout is up to us. We settled on the +//! below design based on several requirements. In particular, we need to be +//! able to generate instructions (or instruction sequences) to access +//! arguments, stack slots, and spill slots before we know how many spill slots +//! or clobber-saves there will be, because of our pass structure. We also +//! prefer positive offsets to negative offsets because of an asymmetry in +//! some machines' addressing modes (e.g., on AArch64, positive offsets have a +//! larger possible range without a long-form sequence to synthesize an +//! arbitrary offset). Finally, it is not allowed to access memory below the +//! current SP value. +//! +//! We assume that a prologue first pushes the frame pointer (and return address +//! above that, if the machine does not do that in hardware). We set FP to point +//! to this two-word frame record. We store all other frame slots below this +//! two-word frame record, with the stack pointer remaining at or below this +//! fixed frame storage for the rest of the function. We can then access frame +//! storage slots using positive offsets from SP. In order to allow codegen for +//! the latter before knowing how many clobber-saves we have, and also allow it +//! while SP is being adjusted to set up a call, we implement a "nominal SP" +//! tracking feature by which a fixup (distance between actual SP and a +//! "nominal" SP) is known at each instruction. +//! +//! # Stack Layout +//! +//! The stack looks like: +//! +//! ```plain +//! (high address) +//! +//! +---------------------------+ +//! | ... | +//! | stack args | +//! | (accessed via FP) | +//! +---------------------------+ +//! SP at function entry -----> | return address | +//! +---------------------------+ +//! FP after prologue --------> | FP (pushed by prologue) | +//! +---------------------------+ +//! | ... | +//! | spill slots | +//! | (accessed via nominal SP) | +//! | ... | +//! | stack slots | +//! | (accessed via nominal SP) | +//! nominal SP ---------------> | (alloc'd by prologue) | +//! +---------------------------+ +//! | ... | +//! | clobbered callee-saves | +//! SP at end of prologue ----> | (pushed by prologue) | +//! +---------------------------+ +//! | [alignment as needed] | +//! | ... | +//! | args for call | +//! SP before making a call --> | (pushed at callsite) | +//! +---------------------------+ +//! +//! (low address) +//! ``` +//! +//! # Multi-value Returns +//! +//! Note that we support multi-value returns in two ways. First, we allow for +//! multiple return-value registers. Second, if teh appropriate flag is set, we +//! support the SpiderMonkey Wasm ABI. For details of the multi-value return +//! ABI, see: +//! +//! https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134 +//! +//! In brief: +//! - Return values are processed in *reverse* order. +//! - The first return value in this order (so the last return) goes into the +//! ordinary return register. +//! - Any further returns go in a struct-return area, allocated upwards (in +//! address order) during the reverse traversal. +//! - This struct-return area is provided by the caller, and a pointer to its +//! start is passed as an invisible last (extra) argument. Normally the caller +//! will allocate this area on the stack. When we generate calls, we place it +//! just above the on-stack argument area. +//! - So, for example, a function returning 4 i64's (v0, v1, v2, v3), with no +//! formal arguments, would: +//! - Accept a pointer `P` to the struct return area as a hidden argument in the +//! first argument register on entry. +//! - Return v3 in the one and only return-value register. +//! - Return v2 in memory at `[P]`. +//! - Return v1 in memory at `[P+8]`. +//! - Return v0 in memory at `[P+16]`. + +use super::abi::*; +use crate::binemit::StackMap; +use crate::ir::types::*; +use crate::ir::{ArgumentExtension, SourceLoc, StackSlot}; +use crate::machinst::*; +use crate::settings; +use crate::CodegenResult; +use crate::{ir, isa}; +use alloc::vec::Vec; +use log::{debug, trace}; +use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; +use std::marker::PhantomData; +use std::mem; + +/// A location for an argument or return value. +#[derive(Clone, Copy, Debug)] +pub enum ABIArg { + /// In a real register. + Reg(RealReg, ir::Type, ir::ArgumentExtension), + /// Arguments only: on stack, at given offset from SP at entry. + Stack(i64, ir::Type, ir::ArgumentExtension), +} + +/// Are we computing information about arguments or return values? Much of the +/// handling is factored out into common routines; this enum allows us to +/// distinguish which case we're handling. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ArgsOrRets { + /// Arguments. + Args, + /// Return values. + Rets, +} + +/// Abstract location for a machine-specific ABI impl to translate into the +/// appropriate addressing mode. +#[derive(Clone, Copy, Debug)] +pub enum StackAMode { + /// Offset from the frame pointer, possibly making use of a specific type + /// for a scaled indexing operation. + FPOffset(i64, ir::Type), + /// Offset from the nominal stack pointer, possibly making use of a specific + /// type for a scaled indexing operation. + NominalSPOffset(i64, ir::Type), + /// Offset from the real stack pointer, possibly making use of a specific + /// type for a scaled indexing operation. + SPOffset(i64, ir::Type), +} + +/// Trait implemented by machine-specific backend to provide information about +/// register assignments and to allow generating the specific instructions for +/// stack loads/saves, prologues/epilogues, etc. +pub trait ABIMachineImpl { + /// The instruction type. + type I: VCodeInst; + + /// Process a list of parameters or return values and allocate them to registers + /// and stack slots. + /// + /// Returns the list of argument locations, the stack-space used (rounded up + /// to as alignment requires), and if `add_ret_area_ptr` was passed, the + /// index of the extra synthetic arg that was added. + fn compute_arg_locs( + call_conv: isa::CallConv, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + ) -> CodegenResult<(Vec, i64, Option)>; + + /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return + /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg). + fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64; + + /// Generate a load from the stack. + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I; + + /// Generate a store to the stack. + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I; + + /// Generate a move. + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self::I; + + /// Generate an integer-extend operation. + fn gen_extend( + to_reg: Writable, + from_reg: Reg, + is_signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Self::I; + + /// Generate a return instruction. + fn gen_ret() -> Self::I; + + /// Generate an "epilogue placeholder" instruction, recognized by lowering + /// when using the Baldrdash ABI. + fn gen_epilogue_placeholder() -> Self::I; + + /// Generate an add-with-immediate. Note that even if this uses a scratch + /// register, the sequence must still be correct if the given source or dest + /// is the register returned by `get_fixed_tmp_reg()`; hence, for machines + /// that may need a scratch register to synthesize an arbitrary constant, + /// the machine backend should reserve *another* fixed temp register for + /// this purpose. (E.g., on AArch64, x16 is the ordinary fixed tmp, and x17 + /// is the secondary fixed tmp used to implement this.) + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u64) -> SmallVec<[Self::I; 4]>; + + /// Generate a sequence that traps with a `TrapCode::StackOverflow` code if + /// the stack pointer is less than the given limit register (assuming the + /// stack grows downward). + fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Self::I; 2]>; + + /// Generate an instruction to compute an address of a stack slot (FP- or + /// SP-based offset). + fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I; + + /// Get a fixed (not used by regalloc) temp. This is needed for certain + /// sequences generated after the register allocator has already run. + fn get_fixed_tmp_reg() -> Reg; + + /// Generate a store to the given [base+offset] address. + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i64, ty: Type) -> Self::I; + + /// Generate a load from the given [base+offset] address. + fn gen_store_base_offset(base: Reg, offset: i64, from_reg: Reg, ty: Type) -> Self::I; + + /// Adjust the stack pointer up or down. + fn gen_sp_reg_adjust(amount: i64) -> SmallVec<[Self::I; 2]>; + + /// Generate a meta-instruction that adjusts the nominal SP offset. + fn gen_nominal_sp_adj(amount: i64) -> Self::I; + + /// Generate the usual frame-setup sequence for this architecture: e.g., + /// `push rbp / mov rbp, rsp` on x86-64, or `stp fp, lr, [sp, #-16]!` on + /// AArch64. + fn gen_prologue_frame_setup() -> SmallVec<[Self::I; 2]>; + + /// Generate the usual frame-restore sequence for this architecture. + fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]>; + + /// Generate a clobber-save sequence. This takes the list of *all* registers + /// written/modified by the function body. The implementation here is + /// responsible for determining which of these are callee-saved according to + /// the ABI. It should return a sequence of instructions that "push" or + /// otherwise save these values to the stack. The sequence of instructions + /// should adjust the stack pointer downward, and should align as necessary + /// according to ABI requirements. + /// + /// Returns stack bytes used as well as instructions. Does not adjust + /// nominal SP offset; caller will do that. + fn gen_clobber_save( + call_conv: isa::CallConv, + clobbers: &Set>, + ) -> (u64, SmallVec<[Self::I; 16]>); + + /// Generate a clobber-restore sequence. This sequence should perform the + /// opposite of the clobber-save sequence generated above, assuming that SP + /// going into the sequence is at the same point that it was left when the + /// clobber-save sequence finished. + fn gen_clobber_restore( + call_conv: isa::CallConv, + clobbers: &Set>, + ) -> SmallVec<[Self::I; 16]>; + + /// Generate a call instruction/sequence. + fn gen_call( + dest: &CallDest, + uses: Vec, + defs: Vec>, + loc: SourceLoc, + opcode: ir::Opcode, + ) -> SmallVec<[(/* is_safepoint = */ bool, Self::I); 2]>; + + /// Get the number of spillslots required for the given register-class and + /// type. + fn get_spillslot_size(rc: RegClass, ty: Type) -> u32; + + /// Get the current virtual-SP offset from an instruction-emission state. + fn get_virtual_sp_offset_from_state(s: &::State) -> i64; + + /// Get the "nominal SP to FP" offset from an instruction-emission state. + fn get_nominal_sp_to_fp(s: &::State) -> i64; + + /// Get all caller-save registers. + fn get_caller_saves(call_conv: isa::CallConv) -> Vec>; +} + +/// ABI information shared between body (callee) and caller. +struct ABISig { + /// Argument locations (regs or stack slots). Stack offsets are relative to + /// SP on entry to function. + args: Vec, + /// Return-value locations. Stack offsets are relative to the return-area + /// pointer. + rets: Vec, + /// Space on stack used to store arguments. + stack_arg_space: i64, + /// Space on stack used to store return values. + stack_ret_space: i64, + /// Index in `args` of the stack-return-value-area argument. + stack_ret_arg: Option, + /// Calling convention used. + call_conv: isa::CallConv, +} + +impl ABISig { + fn from_func_sig(sig: &ir::Signature) -> CodegenResult { + // Compute args and retvals from signature. Handle retvals first, + // because we may need to add a return-area arg to the args. + let (rets, stack_ret_space, _) = M::compute_arg_locs( + sig.call_conv, + &sig.returns, + ArgsOrRets::Rets, + /* extra ret-area ptr = */ false, + )?; + let need_stack_return_area = stack_ret_space > 0; + let (args, stack_arg_space, stack_ret_arg) = M::compute_arg_locs( + sig.call_conv, + &sig.params, + ArgsOrRets::Args, + need_stack_return_area, + )?; + + trace!( + "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", + sig, + args, + rets, + stack_arg_space, + stack_ret_space, + stack_ret_arg + ); + + Ok(ABISig { + args, + rets, + stack_arg_space, + stack_ret_space, + stack_ret_arg, + call_conv: sig.call_conv, + }) + } +} + +/// ABI object for a function body. +pub struct ABIBodyImpl { + /// Signature: arg and retval regs. + sig: ABISig, + /// Offsets to each stackslot. + stackslots: Vec, + /// Total stack size of all stackslots. + stackslots_size: u32, + /// Clobbered registers, from regalloc. + clobbered: Set>, + /// Total number of spillslots, from regalloc. + spillslots: Option, + /// "Total frame size", as defined by "distance between FP and nominal SP". + /// Some items are pushed below nominal SP, so the function may actually use + /// more stack than this would otherwise imply. It is simply the initial + /// frame/allocation size needed for stackslots and spillslots. + total_frame_size: Option, + /// The register holding the return-area pointer, if needed. + ret_area_ptr: Option>, + /// Calling convention this function expects. + call_conv: isa::CallConv, + /// The settings controlling this function's compilation. + flags: settings::Flags, + /// Whether or not this function is a "leaf", meaning it calls no other + /// functions + is_leaf: bool, + /// If this function has a stack limit specified, then `Reg` is where the + /// stack limit will be located after the instructions specified have been + /// executed. + /// + /// Note that this is intended for insertion into the prologue, if + /// present. Also note that because the instructions here execute in the + /// prologue this happens after legalization/register allocation/etc so we + /// need to be extremely careful with each instruction. The instructions are + /// manually register-allocated and carefully only use caller-saved + /// registers and keep nothing live after this sequence of instructions. + stack_limit: Option<(Reg, Vec)>, + + _mach: PhantomData, +} + +fn get_special_purpose_param_register( + f: &ir::Function, + abi: &ABISig, + purpose: ir::ArgumentPurpose, +) -> Option { + let idx = f.signature.special_param_index(purpose)?; + match abi.args[idx] { + ABIArg::Reg(reg, ..) => Some(reg.to_reg()), + ABIArg::Stack(..) => None, + } +} + +impl ABIBodyImpl { + /// Create a new body ABI instance. + pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { + debug!("ABI: func signature {:?}", f.signature); + + let sig = ABISig::from_func_sig::(&f.signature)?; + + let call_conv = f.signature.call_conv; + // Only these calling conventions are supported. + debug_assert!( + call_conv == isa::CallConv::SystemV + || call_conv == isa::CallConv::Fast + || call_conv == isa::CallConv::Cold + || call_conv.extends_baldrdash(), + "Unsupported calling convention: {:?}", + call_conv + ); + + // Compute stackslot locations and total stackslot size. + let mut stack_offset: u32 = 0; + let mut stackslots = vec![]; + for (stackslot, data) in f.stack_slots.iter() { + let off = stack_offset; + stack_offset += data.size; + stack_offset = (stack_offset + 7) & !7; + debug_assert_eq!(stackslot.as_u32() as usize, stackslots.len()); + stackslots.push(off); + } + + // Figure out what instructions, if any, will be needed to check the + // stack limit. This can either be specified as a special-purpose + // argument or as a global value which often calculates the stack limit + // from the arguments. + let stack_limit = + get_special_purpose_param_register(f, &sig, ir::ArgumentPurpose::StackLimit) + .map(|reg| (reg, Vec::new())) + .or_else(|| f.stack_limit.map(|gv| gen_stack_limit::(f, &sig, gv))); + + Ok(Self { + sig, + stackslots, + stackslots_size: stack_offset, + clobbered: Set::empty(), + spillslots: None, + total_frame_size: None, + ret_area_ptr: None, + call_conv, + flags, + is_leaf: f.is_leaf(), + stack_limit, + _mach: PhantomData, + }) + } + + /// Inserts instructions necessary for checking the stack limit into the + /// prologue. + /// + /// This function will generate instructions necessary for perform a stack + /// check at the header of a function. The stack check is intended to trap + /// if the stack pointer goes below a particular threshold, preventing stack + /// overflow in wasm or other code. The `stack_limit` argument here is the + /// register which holds the threshold below which we're supposed to trap. + /// This function is known to allocate `stack_size` bytes and we'll push + /// instructions onto `insts`. + /// + /// Note that the instructions generated here are special because this is + /// happening so late in the pipeline (e.g. after register allocation). This + /// means that we need to do manual register allocation here and also be + /// careful to not clobber any callee-saved or argument registers. For now + /// this routine makes do with the `spilltmp_reg` as one temporary + /// register, and a second register of `tmp2` which is caller-saved. This + /// should be fine for us since no spills should happen in this sequence of + /// instructions, so our register won't get accidentally clobbered. + /// + /// No values can be live after the prologue, but in this case that's ok + /// because we just need to perform a stack check before progressing with + /// the rest of the function. + fn insert_stack_check(&self, stack_limit: Reg, stack_size: u32, insts: &mut Vec) { + // With no explicit stack allocated we can just emit the simple check of + // the stack registers against the stack limit register, and trap if + // it's out of bounds. + if stack_size == 0 { + insts.extend(M::gen_stack_lower_bound_trap(stack_limit)); + return; + } + + // Note that the 32k stack size here is pretty special. See the + // documentation in x86/abi.rs for why this is here. The general idea is + // that we're protecting against overflow in the addition that happens + // below. + if stack_size >= 32 * 1024 { + insts.extend(M::gen_stack_lower_bound_trap(stack_limit)); + } + + // Add the `stack_size` to `stack_limit`, placing the result in + // `scratch`. + // + // Note though that `stack_limit`'s register may be the same as + // `scratch`. If our stack size doesn't fit into an immediate this + // means we need a second scratch register for loading the stack size + // into a register. + let scratch = Writable::from_reg(M::get_fixed_tmp_reg()); + let stack_size = u64::from(stack_size); + insts.extend(M::gen_add_imm(scratch, stack_limit, stack_size).into_iter()); + insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg())); + } +} + +/// Generates the instructions necessary for the `gv` to be materialized into a +/// register. +/// +/// This function will return a register that will contain the result of +/// evaluating `gv`. It will also return any instructions necessary to calculate +/// the value of the register. +/// +/// Note that global values are typically lowered to instructions via the +/// standard legalization pass. Unfortunately though prologue generation happens +/// so late in the pipeline that we can't use these legalization passes to +/// generate the instructions for `gv`. As a result we duplicate some lowering +/// of `gv` here and support only some global values. This is similar to what +/// the x86 backend does for now, and hopefully this can be somewhat cleaned up +/// in the future too! +/// +/// Also note that this function will make use of `writable_spilltmp_reg()` as a +/// temporary register to store values in if necessary. Currently after we write +/// to this register there's guaranteed to be no spilled values between where +/// it's used, because we're not participating in register allocation anyway! +fn gen_stack_limit( + f: &ir::Function, + abi: &ABISig, + gv: ir::GlobalValue, +) -> (Reg, Vec) { + let mut insts = Vec::new(); + let reg = generate_gv::(f, abi, gv, &mut insts); + return (reg, insts); +} + +fn generate_gv( + f: &ir::Function, + abi: &ABISig, + gv: ir::GlobalValue, + insts: &mut Vec, +) -> Reg { + match f.global_values[gv] { + // Return the direct register the vmcontext is in + ir::GlobalValueData::VMContext => { + get_special_purpose_param_register(f, abi, ir::ArgumentPurpose::VMContext) + .expect("no vmcontext parameter found") + } + // Load our base value into a register, then load from that register + // in to a temporary register. + ir::GlobalValueData::Load { + base, + offset, + global_type: _, + readonly: _, + } => { + let base = generate_gv::(f, abi, base, insts); + let into_reg = Writable::from_reg(M::get_fixed_tmp_reg()); + insts.push(M::gen_load_base_offset(into_reg, base, offset.into(), I64)); + return into_reg.to_reg(); + } + ref other => panic!("global value for stack limit not supported: {}", other), + } +} + +/// Return a type either from an optional type hint, or if not, from the default +/// type associated with the given register's class. This is used to generate +/// loads/spills appropriately given the type of value loaded/stored (which may +/// be narrower than the spillslot). We usually have the type because the +/// regalloc usually provides the vreg being spilled/reloaded, and we know every +/// vreg's type. However, the regalloc *can* request a spill/reload without an +/// associated vreg when needed to satisfy a safepoint (which requires all +/// ref-typed values, even those in real registers in the original vcode, to be +/// in spillslots). +fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { + match (ty, r.get_class()) { + // If the type is provided + (Some(t), _) => t, + // If no type is provided, this should be a register spill for a + // safepoint, so we only expect I64 (integer) registers. + (None, RegClass::I64) => I64, + _ => panic!("Unexpected register class!"), + } +} + +impl ABIBody for ABIBodyImpl { + type I = M::I; + + fn temp_needed(&self) -> bool { + self.sig.stack_ret_arg.is_some() + } + + fn init(&mut self, maybe_tmp: Option>) { + if self.sig.stack_ret_arg.is_some() { + assert!(maybe_tmp.is_some()); + self.ret_area_ptr = maybe_tmp; + } + } + + fn flags(&self) -> &settings::Flags { + &self.flags + } + + fn liveins(&self) -> Set { + let mut set: Set = Set::empty(); + for &arg in &self.sig.args { + if let ABIArg::Reg(r, ..) = arg { + set.insert(r); + } + } + set + } + + fn liveouts(&self) -> Set { + let mut set: Set = Set::empty(); + for &ret in &self.sig.rets { + if let ABIArg::Reg(r, ..) = ret { + set.insert(r); + } + } + set + } + + fn num_args(&self) -> usize { + self.sig.args.len() + } + + fn num_retvals(&self) -> usize { + self.sig.rets.len() + } + + fn num_stackslots(&self) -> usize { + self.stackslots.len() + } + + fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable) -> Self::I { + match &self.sig.args[idx] { + // Extension mode doesn't matter (we're copying out, not in; we + // ignore high bits by convention). + &ABIArg::Reg(r, ty, _) => M::gen_move(into_reg, r.to_reg(), ty), + &ABIArg::Stack(off, ty, _) => M::gen_load_stack( + StackAMode::FPOffset(M::fp_to_arg_offset(self.call_conv, &self.flags) + off, ty), + into_reg, + ty, + ), + } + } + + fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Writable) -> Vec { + let mut ret = Vec::new(); + match &self.sig.rets[idx] { + &ABIArg::Reg(r, ty, ext) => { + let from_bits = ty_bits(ty) as u8; + let dest_reg = Writable::from_reg(r.to_reg()); + match (ext, from_bits) { + (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) if n < 64 => { + let signed = ext == ArgumentExtension::Sext; + ret.push(M::gen_extend( + dest_reg, + from_reg.to_reg(), + signed, + from_bits, + /* to_bits = */ 64, + )); + } + _ => ret.push(M::gen_move(dest_reg, from_reg.to_reg(), ty)), + }; + } + &ABIArg::Stack(off, ty, ext) => { + let from_bits = ty_bits(ty) as u8; + // Trash the from_reg; it should be its last use. + match (ext, from_bits) { + (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) if n < 64 => { + let signed = ext == ArgumentExtension::Sext; + ret.push(M::gen_extend( + from_reg, + from_reg.to_reg(), + signed, + from_bits, + /* to_bits = */ 64, + )); + } + _ => {} + }; + ret.push(M::gen_store_base_offset( + self.ret_area_ptr.unwrap().to_reg(), + off, + from_reg.to_reg(), + ty, + )); + } + } + ret + } + + fn gen_retval_area_setup(&self) -> Option { + if let Some(i) = self.sig.stack_ret_arg { + let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap()); + trace!( + "gen_retval_area_setup: inst {:?}; ptr reg is {:?}", + inst, + self.ret_area_ptr.unwrap().to_reg() + ); + Some(inst) + } else { + trace!("gen_retval_area_setup: not needed"); + None + } + } + + fn gen_ret(&self) -> Self::I { + M::gen_ret() + } + + fn gen_epilogue_placeholder(&self) -> Self::I { + M::gen_epilogue_placeholder() + } + + fn set_num_spillslots(&mut self, slots: usize) { + self.spillslots = Some(slots); + } + + fn set_clobbered(&mut self, clobbered: Set>) { + self.clobbered = clobbered; + } + + /// Load from a stackslot. + fn load_stackslot( + &self, + slot: StackSlot, + offset: u32, + ty: Type, + into_reg: Writable, + ) -> Self::I { + // Offset from beginning of stackslot area, which is at nominal SP (see + // [MemArg::NominalSPOffset] for more details on nominal SP tracking). + let stack_off = self.stackslots[slot.as_u32() as usize] as i64; + let sp_off: i64 = stack_off + (offset as i64); + trace!("load_stackslot: slot {} -> sp_off {}", slot, sp_off); + M::gen_load_stack(StackAMode::NominalSPOffset(sp_off, ty), into_reg, ty) + } + + /// Store to a stackslot. + fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Self::I { + // Offset from beginning of stackslot area, which is at nominal SP (see + // [MemArg::NominalSPOffset] for more details on nominal SP tracking). + let stack_off = self.stackslots[slot.as_u32() as usize] as i64; + let sp_off: i64 = stack_off + (offset as i64); + trace!("store_stackslot: slot {} -> sp_off {}", slot, sp_off); + M::gen_store_stack(StackAMode::NominalSPOffset(sp_off, ty), from_reg, ty) + } + + /// Produce an instruction that computes a stackslot address. + fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Self::I { + // Offset from beginning of stackslot area, which is at nominal SP (see + // [MemArg::NominalSPOffset] for more details on nominal SP tracking). + let stack_off = self.stackslots[slot.as_u32() as usize] as i64; + let sp_off: i64 = stack_off + (offset as i64); + M::gen_get_stack_addr(StackAMode::NominalSPOffset(sp_off, I8), into_reg, I8) + } + + /// Load from a spillslot. + fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable) -> Self::I { + // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size. + let islot = slot.get() as i64; + let spill_off = islot * 8; // FIXME: 64-bit machine assumed. + let sp_off = self.stackslots_size as i64 + spill_off; + trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); + M::gen_load_stack(StackAMode::NominalSPOffset(sp_off, ty), into_reg, ty) + } + + /// Store to a spillslot. + fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Self::I { + // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size. + let islot = slot.get() as i64; + let spill_off = islot * 8; // FIXME: 64-bit machine assumed. + let sp_off = self.stackslots_size as i64 + spill_off; + trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); + M::gen_store_stack(StackAMode::NominalSPOffset(sp_off, ty), from_reg, ty) + } + + fn spillslots_to_stack_map( + &self, + slots: &[SpillSlot], + state: &::State, + ) -> StackMap { + let virtual_sp_offset = M::get_virtual_sp_offset_from_state(state); + let nominal_sp_to_fp = M::get_nominal_sp_to_fp(state); + assert!(virtual_sp_offset >= 0); + trace!( + "spillslots_to_stackmap: slots = {:?}, state = {:?}", + slots, + state + ); + let map_size = (virtual_sp_offset + nominal_sp_to_fp) as u32; + let map_words = (map_size + 7) / 8; // FIXME: 64-bit machine assumed. + let mut bits = std::iter::repeat(false) + .take(map_words as usize) + .collect::>(); + + let first_spillslot_word = ((self.stackslots_size + virtual_sp_offset as u32) / 8) as usize; + for &slot in slots { + let slot = slot.get() as usize; + bits[first_spillslot_word + slot] = true; + } + + StackMap::from_slice(&bits[..]) + } + + fn gen_prologue(&mut self) -> Vec { + let mut insts = vec![]; + if !self.call_conv.extends_baldrdash() { + // set up frame + insts.extend(M::gen_prologue_frame_setup().into_iter()); + } + + let mut total_stacksize = self.stackslots_size + 8 * self.spillslots.unwrap() as u32; + if self.call_conv.extends_baldrdash() { + debug_assert!( + !self.flags.enable_probestack(), + "baldrdash does not expect cranelift to emit stack probes" + ); + // FIXME: 64-bit machine assumed. + total_stacksize += self.flags.baldrdash_prologue_words() as u32 * 8; + } + let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack. + + let mut total_sp_adjust = 0; + + if !self.call_conv.extends_baldrdash() { + // Leaf functions with zero stack don't need a stack check if one's + // specified, otherwise always insert the stack check. + if total_stacksize > 0 || !self.is_leaf { + if let Some((reg, stack_limit_load)) = &self.stack_limit { + insts.extend_from_slice(stack_limit_load); + self.insert_stack_check(*reg, total_stacksize, &mut insts); + } + } + if total_stacksize > 0 { + total_sp_adjust += total_stacksize as u64; + } + } + + // N.B.: "nominal SP", which we use to refer to stackslots and + // spillslots, is defined to be equal to the stack pointer at this point + // in the prologue. + // + // If we push any clobbers below, we emit a virtual-SP adjustment + // meta-instruction so that the nominal SP references behave as if SP + // were still at this point. See documentation for + // [crate::machinst::abi_impl](this module) for more details on + // stackframe layout and nominal SP maintenance. + + if total_sp_adjust > 0 { + // sub sp, sp, #total_stacksize + let adj = total_sp_adjust as i64; + insts.extend(M::gen_sp_reg_adjust(-adj)); + } + + // Save clobbered registers. + let (clobber_size, clobber_insts) = M::gen_clobber_save(self.call_conv, &self.clobbered); + insts.extend(clobber_insts); + + if clobber_size > 0 { + insts.push(M::gen_nominal_sp_adj(clobber_size as i64)); + } + + self.total_frame_size = Some(total_stacksize); + insts + } + + fn gen_epilogue(&self) -> Vec { + let mut insts = vec![]; + + // Restore clobbered registers. + insts.extend(M::gen_clobber_restore(self.call_conv, &self.clobbered)); + + // N.B.: we do *not* emit a nominal SP adjustment here, because (i) there will be no + // references to nominal SP offsets before the return below, and (ii) the instruction + // emission tracks running SP offset linearly (in straight-line order), not according to + // the CFG, so early returns in the middle of function bodies would cause an incorrect + // offset for the rest of the body. + + if !self.call_conv.extends_baldrdash() { + insts.extend(M::gen_epilogue_frame_restore()); + insts.push(M::gen_ret()); + } + + debug!("Epilogue: {:?}", insts); + insts + } + + fn frame_size(&self) -> u32 { + self.total_frame_size + .expect("frame size not computed before prologue generation") + } + + fn stack_args_size(&self) -> u32 { + self.sig.stack_arg_space as u32 + } + + fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { + M::get_spillslot_size(rc, ty) + } + + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Self::I { + let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty); + self.store_spillslot(to_slot, ty, from_reg.to_reg()) + } + + fn gen_reload( + &self, + to_reg: Writable, + from_slot: SpillSlot, + ty: Option, + ) -> Self::I { + let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty); + self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg())) + } +} + +fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { + // Compute uses: all arg regs. + let mut uses = Vec::new(); + for arg in &sig.args { + match arg { + &ABIArg::Reg(reg, ..) => uses.push(reg.to_reg()), + _ => {} + } + } + + // Compute defs: all retval regs, and all caller-save (clobbered) regs. + let mut defs = M::get_caller_saves(sig.call_conv); + for ret in &sig.rets { + match ret { + &ABIArg::Reg(reg, ..) => defs.push(Writable::from_reg(reg.to_reg())), + _ => {} + } + } + + (uses, defs) +} + +/// ABI object for a callsite. +pub struct ABICallImpl { + /// The called function's signature. + sig: ABISig, + /// All uses for the callsite, i.e., function args. + uses: Vec, + /// All defs for the callsite, i.e., return values and caller-saves. + defs: Vec>, + /// Call destination. + dest: CallDest, + /// Location of callsite. + loc: ir::SourceLoc, + /// Actuall call opcode; used to distinguish various types of calls. + opcode: ir::Opcode, + + _mach: PhantomData, +} + +/// Destination for a call. +#[derive(Debug, Clone)] +pub enum CallDest { + /// Call to an ExtName (named function symbol). + ExtName(ir::ExternalName, RelocDistance), + /// Indirect call to a function pointer in a register. + Reg(Reg), +} + +impl ABICallImpl { + /// Create a callsite ABI object for a call directly to the specified function. + pub fn from_func( + sig: &ir::Signature, + extname: &ir::ExternalName, + dist: RelocDistance, + loc: ir::SourceLoc, + ) -> CodegenResult> { + let sig = ABISig::from_func_sig::(sig)?; + let (uses, defs) = abisig_to_uses_and_defs::(&sig); + Ok(ABICallImpl { + sig, + uses, + defs, + dest: CallDest::ExtName(extname.clone(), dist), + loc, + opcode: ir::Opcode::Call, + _mach: PhantomData, + }) + } + + /// Create a callsite ABI object for a call to a function pointer with the + /// given signature. + pub fn from_ptr( + sig: &ir::Signature, + ptr: Reg, + loc: ir::SourceLoc, + opcode: ir::Opcode, + ) -> CodegenResult> { + let sig = ABISig::from_func_sig::(sig)?; + let (uses, defs) = abisig_to_uses_and_defs::(&sig); + Ok(ABICallImpl { + sig, + uses, + defs, + dest: CallDest::Reg(ptr), + loc, + opcode, + _mach: PhantomData, + }) + } +} + +fn adjust_stack_and_nominal_sp>( + ctx: &mut C, + off: u64, + is_sub: bool, +) { + if off == 0 { + return; + } + let off = off as i64; + let amt = if is_sub { -off } else { off }; + for inst in M::gen_sp_reg_adjust(amt) { + ctx.emit(inst); + } + ctx.emit(M::gen_nominal_sp_adj(-amt)); +} + +impl ABICall for ABICallImpl { + type I = M::I; + + fn num_args(&self) -> usize { + if self.sig.stack_ret_arg.is_some() { + self.sig.args.len() - 1 + } else { + self.sig.args.len() + } + } + + fn emit_stack_pre_adjust>(&self, ctx: &mut C) { + let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + adjust_stack_and_nominal_sp::(ctx, off as u64, /* is_sub = */ true) + } + + fn emit_stack_post_adjust>(&self, ctx: &mut C) { + let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + adjust_stack_and_nominal_sp::(ctx, off as u64, /* is_sub = */ false) + } + + fn emit_copy_reg_to_arg>( + &self, + ctx: &mut C, + idx: usize, + from_reg: Reg, + ) { + match &self.sig.args[idx] { + &ABIArg::Reg(reg, ty, ext) + if ext != ir::ArgumentExtension::None && ty_bits(ty) < 64 => + { + assert_eq!(RegClass::I64, reg.get_class()); + let signed = match ext { + ir::ArgumentExtension::Uext => false, + ir::ArgumentExtension::Sext => true, + _ => unreachable!(), + }; + ctx.emit(M::gen_extend( + Writable::from_reg(reg.to_reg()), + from_reg, + signed, + ty_bits(ty) as u8, + 64, + )); + } + &ABIArg::Reg(reg, ty, _) => { + ctx.emit(M::gen_move(Writable::from_reg(reg.to_reg()), from_reg, ty)); + } + &ABIArg::Stack(off, ty, ext) => { + if ext != ir::ArgumentExtension::None && ty_bits(ty) < 64 { + assert_eq!(RegClass::I64, from_reg.get_class()); + let signed = match ext { + ir::ArgumentExtension::Uext => false, + ir::ArgumentExtension::Sext => true, + _ => unreachable!(), + }; + // Extend in place in the source register. Our convention is to + // treat high bits as undefined for values in registers, so this + // is safe, even for an argument that is nominally read-only. + ctx.emit(M::gen_extend( + Writable::from_reg(from_reg), + from_reg, + signed, + ty_bits(ty) as u8, + 64, + )); + } + ctx.emit(M::gen_store_stack( + StackAMode::SPOffset(off, ty), + from_reg, + ty, + )); + } + } + } + + fn emit_copy_retval_to_reg>( + &self, + ctx: &mut C, + idx: usize, + into_reg: Writable, + ) { + match &self.sig.rets[idx] { + // Extension mode doesn't matter because we're copying out, not in, + // and we ignore high bits in our own registers by convention. + &ABIArg::Reg(reg, ty, _) => ctx.emit(M::gen_move(into_reg, reg.to_reg(), ty)), + &ABIArg::Stack(off, ty, _) => { + let ret_area_base = self.sig.stack_arg_space; + ctx.emit(M::gen_load_stack( + StackAMode::SPOffset(off + ret_area_base, ty), + into_reg, + ty, + )); + } + } + } + + fn emit_call>(&mut self, ctx: &mut C) { + let (uses, defs) = ( + mem::replace(&mut self.uses, Default::default()), + mem::replace(&mut self.defs, Default::default()), + ); + if let Some(i) = self.sig.stack_ret_arg { + let rd = ctx.alloc_tmp(RegClass::I64, I64); + let ret_area_base = self.sig.stack_arg_space; + ctx.emit(M::gen_get_stack_addr( + StackAMode::SPOffset(ret_area_base, I8), + rd, + I8, + )); + self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); + } + for (is_safepoint, inst) in + M::gen_call(&self.dest, uses, defs, self.loc, self.opcode).into_iter() + { + if is_safepoint { + ctx.emit_safepoint(inst); + } else { + ctx.emit(inst); + } + } + } +} diff --git a/cranelift/codegen/src/machinst/helpers.rs b/cranelift/codegen/src/machinst/helpers.rs new file mode 100644 index 0000000000..3231258921 --- /dev/null +++ b/cranelift/codegen/src/machinst/helpers.rs @@ -0,0 +1,18 @@ +//! Miscellaneous helpers for machine backends. + +use crate::ir::Type; + +/// Returns the size (in bits) of a given type. +pub fn ty_bits(ty: Type) -> usize { + usize::from(ty.bits()) +} + +/// Is the type represented by an integer (not float) at the machine level? +pub(crate) fn ty_has_int_representation(ty: Type) -> bool { + ty.is_int() || ty.is_bool() || ty.is_ref() +} + +/// Is the type represented by a float or vector value at the machine level? +pub(crate) fn ty_has_float_or_vec_representation(ty: Type) -> bool { + ty.is_vector() || ty.is_float() +} diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 7d14be82d2..b8ec275133 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -123,12 +123,16 @@ pub mod blockorder; pub use blockorder::*; pub mod abi; pub use abi::*; +pub mod abi_impl; +pub use abi_impl::*; pub mod pretty_print; pub use pretty_print::*; pub mod buffer; pub use buffer::*; pub mod adapter; pub use adapter::*; +pub mod helpers; +pub use helpers::*; /// A machine instruction. pub trait MachInst: Clone + Debug {