From 615362068f41df52f59c6053a590645701e170c4 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 27 May 2020 14:01:49 -0700 Subject: [PATCH 1/2] Multi-value return support. --- cranelift/codegen/src/isa/aarch64/abi.rs | 269 ++++++++++++++---- cranelift/codegen/src/isa/x64/abi.rs | 10 + cranelift/codegen/src/machinst/abi.rs | 13 + cranelift/codegen/src/machinst/lower.rs | 10 + .../vcode/aarch64/multivalue-ret.clif | 18 ++ .../filetests/wasm/multi-val-f32.clif | 1 + .../filetests/wasm/multi-val-f64.clif | 1 + .../filetests/wasm/multi-val-i32.clif | 1 + .../filetests/wasm/multi-val-i64.clif | 1 + .../wasm/multi-val-tons-of-results.clif | 1 + 10 files changed, 269 insertions(+), 56 deletions(-) create mode 100644 cranelift/filetests/filetests/vcode/aarch64/multivalue-ret.clif diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index bb8d2de30c..235d9031a8 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -58,13 +58,44 @@ //! //! (low address) //! ``` +//! +//! # Multi-value Returns +//! +//! Note that we support multi-value returns by adopting the SpiderMonkey Wasm +//! ABI internally. Because we do not support linking with externally-compiled +//! multi-value-returning functions (yet), this choice is arbitrary and we are +//! free to make it as we please. Wasmtime generates trampolines to enter +//! toplevel multi-value-returning functions, so this does not concern the +//! Wasmtime embedding. +//! +//! For details of the multi-value return ABI, see: +//! +//! https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134 +//! +//! In brief: +//! - Returns are processed in *reverse* order. +//! - The first return in this order (so the last return) goes into the ordinary +//! return register, X0. +//! - Any further returns go in a struct-return area, allocated upwards (in +//! address order) during the reverse traversal. +//! - This struct-return area is provided by the caller, and a pointer to its +//! start is passed as an invisible last (extra) argument. Normally the caller +//! will allocate this area on the stack. When we generate calls, we place it +//! just above the on-stack argument area. +//! - So, for example, a function returning 4 i64's (v0, v1, v2, v3), with no +//! formal arguments, would: +//! - Accept a pointer P to the struct return area in x0 on entry. +//! - Return v3 in x0. +//! - Return v2 in memory at [P]. +//! - Return v1 in memory at [P+8]. +//! - Return v0 in memory at [P+16]. use crate::ir; use crate::ir::types; use crate::ir::types::*; use crate::ir::{ArgumentExtension, StackSlot}; use crate::isa; -use crate::isa::aarch64::{self, inst::*}; +use crate::isa::aarch64::{inst::*, lower::ty_bits}; use crate::machinst::*; use crate::settings; @@ -87,9 +118,19 @@ enum ABIArg { /// AArch64 ABI information shared between body (callee) and caller. struct ABISig { + /// Argument locations (regs or stack slots). Stack offsets are relative to + /// SP on entry to function. args: Vec, + /// Return-value locations. Stack offsets are relative to the return-area + /// pointer. rets: Vec, + /// Space on stack used to store arguments. stack_arg_space: i64, + /// Space on stack used to store return values. + stack_ret_space: i64, + /// Index in `args` of the stack-return-value-area argument. + stack_ret_arg: Option, + /// Calling convention used. call_conv: isa::CallConv, } @@ -155,19 +196,49 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt } } +/// Are we computing information about arguments or return values? Much of the +/// handling is factored out into common routines; this enum allows us to +/// distinguish which case we're handling. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum ArgsOrRets { + Args, + Rets, +} + /// Process a list of parameters or return values and allocate them to X-regs, /// V-regs, and stack slots. /// /// Returns the list of argument locations, and the stack-space used (rounded up /// to a 16-byte-aligned boundary). -fn compute_arg_locs(call_conv: isa::CallConv, params: &[ir::AbiParam]) -> (Vec, i64) { +fn compute_arg_locs( + call_conv: isa::CallConv, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, +) -> (Vec, i64) { + let is_baldrdash = call_conv.extends_baldrdash(); + // See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4. let mut next_xreg = 0; let mut next_vreg = 0; let mut next_stack: u64 = 0; let mut ret = vec![]; - for param in params { + let max_reg_vals = match (args_or_rets, is_baldrdash) { + (ArgsOrRets::Args, _) => 8, // x0-x7, v0-v7 + (ArgsOrRets::Rets, false) => 8, // x0-x7, v0-v7 + (ArgsOrRets::Rets, true) => 1, // x0 or v0 + }; + + for i in 0..params.len() { + // Process returns backward, according to the SpiderMonkey ABI (which we + // adopt internally if `is_baldrdash` is set). + let param = match (args_or_rets, is_baldrdash) { + (ArgsOrRets::Args, _) => ¶ms[i], + (ArgsOrRets::Rets, false) => ¶ms[i], + (ArgsOrRets::Rets, true) => ¶ms[params.len() - 1 - i], + }; + // Validate "purpose". match ¶m.purpose { &ir::ArgumentPurpose::VMContext @@ -180,31 +251,52 @@ fn compute_arg_locs(call_conv: isa::CallConv, params: &[ir::AbiParam]) -> (Vec 8, - _ => panic!("Unsupported vector-reg argument type"), - }; - // Align. - debug_assert!(size.is_power_of_two()); - next_stack = (next_stack + size - 1) & !(size - 1); - ret.push(ABIArg::Stack(next_stack as i64, param.value_type)); - next_stack += size; - } + vreg(*next_reg) + }; + ret.push(ABIArg::Reg(reg.to_real_reg(), param.value_type)); + *next_reg += 1; + } else { + // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte + // stack alignment happens separately after all args.) + let size = (ty_bits(param.value_type) / 8) as u64; + let size = std::cmp::max(size, 8); + // Align. + debug_assert!(size.is_power_of_two()); + next_stack = (next_stack + size - 1) & !(size - 1); + ret.push(ABIArg::Stack(next_stack as i64, param.value_type)); + next_stack += size; + } + } + + if args_or_rets == ArgsOrRets::Rets && is_baldrdash { + ret.reverse(); + } + + if add_ret_area_ptr { + debug_assert!(args_or_rets == ArgsOrRets::Args); + if next_xreg < max_reg_vals { + ret.push(ABIArg::Reg(xreg(next_xreg).to_real_reg(), I64)); + } else { + ret.push(ABIArg::Stack(next_stack as i64, I64)); + next_stack += 8; } } @@ -215,22 +307,43 @@ fn compute_arg_locs(call_conv: isa::CallConv, params: &[ir::AbiParam]) -> (Vec ABISig { - // Compute args and retvals from signature. - // TODO: pass in arg-mode or ret-mode. (Does not matter - // for the types of arguments/return values that we support.) - let (args, stack_arg_space) = compute_arg_locs(sig.call_conv, &sig.params); - let (rets, _) = compute_arg_locs(sig.call_conv, &sig.returns); + // Compute args and retvals from signature. Handle retvals first, + // because we may need to add a return-area arg to the args. + let (rets, stack_ret_space) = compute_arg_locs( + sig.call_conv, + &sig.returns, + ArgsOrRets::Rets, + /* extra ret-area ptr = */ false, + ); + let need_stack_return_area = stack_ret_space > 0; + let (args, stack_arg_space) = compute_arg_locs( + sig.call_conv, + &sig.params, + ArgsOrRets::Args, + need_stack_return_area, + ); + let stack_ret_arg = if need_stack_return_area { + Some(args.len() - 1) + } else { + None + }; - // Verify that there are no return values on the stack. - debug_assert!(rets.iter().all(|a| match a { - &ABIArg::Stack(..) => false, - _ => true, - })); + trace!( + "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", + sig, + args, + rets, + stack_arg_space, + stack_ret_space, + stack_ret_arg + ); ABISig { args, rets, stack_arg_space, + stack_ret_space, + stack_ret_arg, call_conv: sig.call_conv, } } @@ -250,6 +363,8 @@ pub struct AArch64ABIBody { spillslots: Option, /// Total frame size. total_frame_size: Option, + /// The register holding the return-area pointer, if needed. + ret_area_ptr: Option>, /// Calling convention this function expects. call_conv: isa::CallConv, /// The settings controlling this function's compilation. @@ -409,6 +524,7 @@ impl AArch64ABIBody { clobbered: Set::empty(), spillslots: None, total_frame_size: None, + ret_area_ptr: None, call_conv, flags, is_leaf: f.is_leaf(), @@ -675,6 +791,20 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { impl ABIBody for AArch64ABIBody { type I = Inst; + fn needed_tmps(&self) -> usize { + if self.sig.stack_ret_arg.is_some() { + 1 + } else { + 0 + } + } + + fn init_with_tmps(&mut self, tmps: &[Writable]) { + if self.sig.stack_ret_arg.is_some() { + self.ret_area_ptr = Some(tmps[0]); + } + } + fn flags(&self) -> &settings::Flags { &self.flags } @@ -722,6 +852,21 @@ impl ABIBody for AArch64ABIBody { } } + fn gen_retval_area_setup(&self) -> Vec { + if let Some(i) = self.sig.stack_ret_arg { + let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap()); + trace!( + "gen_retval_area_setup: inst {:?}; ptr reg is {:?}", + inst, + self.ret_area_ptr.unwrap().to_reg() + ); + vec![inst] + } else { + trace!("gen_retval_area_setup: not needed"); + vec![] + } + } + fn gen_copy_reg_to_retval( &self, idx: usize, @@ -731,7 +876,7 @@ impl ABIBody for AArch64ABIBody { let mut ret = Vec::new(); match &self.sig.rets[idx] { &ABIArg::Reg(r, ty) => { - let from_bits = aarch64::lower::ty_bits(ty) as u8; + let from_bits = ty_bits(ty) as u8; let dest_reg = Writable::from_reg(r.to_reg()); match (ext, from_bits) { (ArgumentExtension::Uext, n) if n < 64 => { @@ -756,7 +901,7 @@ impl ABIBody for AArch64ABIBody { }; } &ABIArg::Stack(off, ty) => { - let from_bits = aarch64::lower::ty_bits(ty) as u8; + let from_bits = ty_bits(ty) as u8; // Trash the from_reg; it should be its last use. match (ext, from_bits) { (ArgumentExtension::Uext, n) if n < 64 => { @@ -779,11 +924,9 @@ impl ABIBody for AArch64ABIBody { } _ => {} }; - ret.push(store_stack( - MemArg::FPOffset(self.fp_to_arg_offset() + off), - from_reg.to_reg(), - ty, - )) + let mem = MemArg::reg_maybe_offset(self.ret_area_ptr.unwrap().to_reg(), off, ty) + .expect("Return-value area is too large"); + ret.push(store_stack(mem, from_reg.to_reg(), ty)) } } ret @@ -1222,23 +1365,21 @@ impl ABICall for AArch64ABICall { type I = Inst; fn num_args(&self) -> usize { - self.sig.args.len() + if self.sig.stack_ret_arg.is_some() { + self.sig.args.len() - 1 + } else { + self.sig.args.len() + } } fn emit_stack_pre_adjust>(&self, ctx: &mut C) { - adjust_stack( - ctx, - self.sig.stack_arg_space as u64, - /* is_sub = */ true, - ) + let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + adjust_stack(ctx, off as u64, /* is_sub = */ true) } fn emit_stack_post_adjust>(&self, ctx: &mut C) { - adjust_stack( - ctx, - self.sig.stack_arg_space as u64, - /* is_sub = */ false, - ) + let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + adjust_stack(ctx, off as u64, /* is_sub = */ false) } fn emit_copy_reg_to_arg>( @@ -1265,7 +1406,14 @@ impl ABICall for AArch64ABICall { ) { match &self.sig.rets[idx] { &ABIArg::Reg(reg, ty) => ctx.emit(Inst::gen_move(into_reg, reg.to_reg(), ty)), - _ => unimplemented!(), + &ABIArg::Stack(off, ty) => { + let ret_area_base = self.sig.stack_arg_space; + ctx.emit(load_stack( + MemArg::SPOffset(off + ret_area_base), + into_reg, + ty, + )); + } } } @@ -1274,6 +1422,15 @@ impl ABICall for AArch64ABICall { mem::replace(&mut self.uses, Default::default()), mem::replace(&mut self.defs, Default::default()), ); + if let Some(i) = self.sig.stack_ret_arg { + let rd = ctx.alloc_tmp(RegClass::I64, I64); + let ret_area_base = self.sig.stack_arg_space; + ctx.emit(Inst::LoadAddr { + rd, + mem: MemArg::SPOffset(ret_area_base), + }); + self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); + } match &self.dest { &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::Call { info: Box::new(CallInfo { diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 2deb47fb89..294bdc215a 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -184,6 +184,12 @@ impl X64ABIBody { impl ABIBody for X64ABIBody { type I = Inst; + fn needed_tmps(&self) -> usize { + 0 + } + + fn init_with_tmps(&mut self, _: &[Writable]) {} + fn flags(&self) -> &settings::Flags { &self.flags } @@ -233,6 +239,10 @@ impl ABIBody for X64ABIBody { } } + fn gen_retval_area_setup(&self) -> Vec { + vec![] + } + fn gen_copy_reg_to_retval( &self, idx: usize, diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index e8fbf25db1..7075b10689 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -12,6 +12,12 @@ pub trait ABIBody { /// The instruction type for the ISA associated with this ABI. type I: VCodeInst; + /// How many temps are needed? + fn needed_tmps(&self) -> usize; + + /// Initialize, providing the requersted temps. + fn init_with_tmps(&mut self, tmps: &[Writable]); + /// Get the settings controlling this function's compilation. fn flags(&self) -> &settings::Flags; @@ -34,6 +40,13 @@ pub trait ABIBody { /// register. fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable) -> Self::I; + /// Generate any setup instructions needed to save values to the + /// return-value area. This is usually used when were are multiple return + /// values or an otherwise large return value that must be passed on the + /// stack; typically the ABI specifies an extra hidden argument that is a + /// pointer to that memory. + fn gen_retval_area_setup(&self) -> Vec; + /// Generate an instruction which copies a source register to a return value slot. fn gen_copy_reg_to_retval( &self, diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index fabfdecc6a..b83d400b74 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -6,6 +6,7 @@ use crate::entity::SecondaryMap; use crate::fx::{FxHashMap, FxHashSet}; use crate::inst_predicates::{has_side_effect_or_load, is_constant_64bit}; use crate::ir::instructions::BranchInfo; +use crate::ir::types::I64; use crate::ir::{ ArgumentExtension, Block, Constant, ConstantData, ExternalName, Function, GlobalValueData, Inst, InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value, ValueDef, @@ -382,6 +383,9 @@ impl<'func, I: VCodeInst> Lower<'func, I> { let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg); self.emit(insn); } + for insn in self.vcode.abi().gen_retval_area_setup().into_iter() { + self.emit(insn); + } } } @@ -648,6 +652,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> { pub fn lower>(mut self, backend: &B) -> CodegenResult> { debug!("about to lower function: {:?}", self.f); + // Initialize the ABI object with any temps it needs. + let tmps: SmallVec<[Writable; 4]> = (0..self.vcode.abi().needed_tmps()) + .map(|_| self.alloc_tmp(RegClass::I64, I64)) + .collect(); + self.vcode.abi().init_with_tmps(&tmps[..]); + // Get the pinned reg here (we only parameterize this function on `B`, // not the whole `Lower` impl). self.pinned_reg = backend.maybe_pinned_reg(); diff --git a/cranelift/filetests/filetests/vcode/aarch64/multivalue-ret.clif b/cranelift/filetests/filetests/vcode/aarch64/multivalue-ret.clif new file mode 100644 index 0000000000..e20130fd3c --- /dev/null +++ b/cranelift/filetests/filetests/vcode/aarch64/multivalue-ret.clif @@ -0,0 +1,18 @@ +test compile +target aarch64 + +;; Test default (non-SpiderMonkey) ABI. +function %f() -> i64, i64 { +block1: + v0 = iconst.i64 1 + v1 = iconst.i64 2 + return v0, v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: movz x0, #1 +; nextln: movz x1, #2 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret diff --git a/cranelift/filetests/filetests/wasm/multi-val-f32.clif b/cranelift/filetests/filetests/wasm/multi-val-f32.clif index b69b71e047..96a212b605 100644 --- a/cranelift/filetests/filetests/wasm/multi-val-f32.clif +++ b/cranelift/filetests/filetests/wasm/multi-val-f32.clif @@ -1,5 +1,6 @@ test compile target x86_64 haswell +target aarch64 ;; Returning many f32s diff --git a/cranelift/filetests/filetests/wasm/multi-val-f64.clif b/cranelift/filetests/filetests/wasm/multi-val-f64.clif index afb6585efc..ce7c057bcb 100644 --- a/cranelift/filetests/filetests/wasm/multi-val-f64.clif +++ b/cranelift/filetests/filetests/wasm/multi-val-f64.clif @@ -1,5 +1,6 @@ test compile target x86_64 haswell +target aarch64 ;; Returning many f64s diff --git a/cranelift/filetests/filetests/wasm/multi-val-i32.clif b/cranelift/filetests/filetests/wasm/multi-val-i32.clif index 035cc2e332..983e382150 100644 --- a/cranelift/filetests/filetests/wasm/multi-val-i32.clif +++ b/cranelift/filetests/filetests/wasm/multi-val-i32.clif @@ -1,5 +1,6 @@ test compile target x86_64 haswell +target aarch64 ;; Returning many i32s diff --git a/cranelift/filetests/filetests/wasm/multi-val-i64.clif b/cranelift/filetests/filetests/wasm/multi-val-i64.clif index bacaf8240f..3a1c919af8 100644 --- a/cranelift/filetests/filetests/wasm/multi-val-i64.clif +++ b/cranelift/filetests/filetests/wasm/multi-val-i64.clif @@ -1,5 +1,6 @@ test compile target x86_64 haswell +target aarch64 ;; Returning many i64s diff --git a/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif b/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif index f394bdd904..6c04476f57 100644 --- a/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif +++ b/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif @@ -1,5 +1,6 @@ test compile target x86_64 haswell +target aarch64 function %return_20_i32s() -> i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 { block0: From fe97659813b8ec3fc7a29ce7be8a489c210904a5 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 2 Jun 2020 16:57:50 -0700 Subject: [PATCH 2/2] Address review comments. --- cranelift/codegen/src/isa/aarch64/abi.rs | 179 ++++++++++-------- .../codegen/src/isa/aarch64/inst/args.rs | 28 ++- .../codegen/src/isa/aarch64/inst/emit.rs | 70 +++---- .../src/isa/aarch64/inst/emit_tests.rs | 38 +++- .../codegen/src/isa/aarch64/inst/imms.rs | 7 +- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 4 + cranelift/codegen/src/isa/aarch64/lower.rs | 6 +- .../codegen/src/isa/aarch64/lower_inst.rs | 4 +- cranelift/codegen/src/isa/aarch64/mod.rs | 2 +- cranelift/codegen/src/isa/x64/abi.rs | 10 +- cranelift/codegen/src/machinst/abi.rs | 15 +- cranelift/codegen/src/machinst/lower.rs | 14 +- .../filetests/vcode/aarch64/stack-limit.clif | 16 +- 13 files changed, 224 insertions(+), 169 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 235d9031a8..9439ef55df 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -73,9 +73,9 @@ //! https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134 //! //! In brief: -//! - Returns are processed in *reverse* order. -//! - The first return in this order (so the last return) goes into the ordinary -//! return register, X0. +//! - Return values are processed in *reverse* order. +//! - The first return value in this order (so the last return) goes into the +//! ordinary return register, X0. //! - Any further returns go in a struct-return area, allocated upwards (in //! address order) during the reverse traversal. //! - This struct-return area is provided by the caller, and a pointer to its @@ -98,6 +98,7 @@ use crate::isa; use crate::isa::aarch64::{inst::*, lower::ty_bits}; use crate::machinst::*; use crate::settings; +use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; @@ -134,6 +135,11 @@ struct ABISig { call_conv: isa::CallConv, } +/// This is the limit for the size of argument and return-value areas on the +/// stack. We place a reasonable limit here to avoid integer overflow issues +/// with 32-bit arithmetic: for now, 128 MB. +static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; + // Spidermonkey specific ABI convention. /// This is SpiderMonkey's `WasmTableCallSigReg`. @@ -208,14 +214,15 @@ enum ArgsOrRets { /// Process a list of parameters or return values and allocate them to X-regs, /// V-regs, and stack slots. /// -/// Returns the list of argument locations, and the stack-space used (rounded up -/// to a 16-byte-aligned boundary). +/// Returns the list of argument locations, the stack-space used (rounded up +/// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the +/// index of the extra synthetic arg that was added. fn compute_arg_locs( call_conv: isa::CallConv, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, -) -> (Vec, i64) { +) -> CodegenResult<(Vec, i64, Option)> { let is_baldrdash = call_conv.extends_baldrdash(); // See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4. @@ -290,7 +297,7 @@ fn compute_arg_locs( ret.reverse(); } - if add_ret_area_ptr { + let extra_arg = if add_ret_area_ptr { debug_assert!(args_or_rets == ArgsOrRets::Args); if next_xreg < max_reg_vals { ret.push(ABIArg::Reg(xreg(next_xreg).to_real_reg(), I64)); @@ -298,35 +305,39 @@ fn compute_arg_locs( ret.push(ABIArg::Stack(next_stack as i64, I64)); next_stack += 8; } - } + Some(ret.len() - 1) + } else { + None + }; next_stack = (next_stack + 15) & !15; - (ret, next_stack as i64) + // To avoid overflow issues, limit the arg/return size to something + // reasonable -- here, 128 MB. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((ret, next_stack as i64, extra_arg)) } impl ABISig { - fn from_func_sig(sig: &ir::Signature) -> ABISig { + fn from_func_sig(sig: &ir::Signature) -> CodegenResult { // Compute args and retvals from signature. Handle retvals first, // because we may need to add a return-area arg to the args. - let (rets, stack_ret_space) = compute_arg_locs( + let (rets, stack_ret_space, _) = compute_arg_locs( sig.call_conv, &sig.returns, ArgsOrRets::Rets, /* extra ret-area ptr = */ false, - ); + )?; let need_stack_return_area = stack_ret_space > 0; - let (args, stack_arg_space) = compute_arg_locs( + let (args, stack_arg_space, stack_ret_arg) = compute_arg_locs( sig.call_conv, &sig.params, ArgsOrRets::Args, need_stack_return_area, - ); - let stack_ret_arg = if need_stack_return_area { - Some(args.len() - 1) - } else { - None - }; + )?; trace!( "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", @@ -338,14 +349,14 @@ impl ABISig { stack_ret_arg ); - ABISig { + Ok(ABISig { args, rets, stack_arg_space, stack_ret_space, stack_ret_arg, call_conv: sig.call_conv, - } + }) } } @@ -446,15 +457,7 @@ fn gen_stack_limit(f: &ir::Function, abi: &ABISig, gv: ir::GlobalValue) -> (Reg, } => { let base = generate_gv(f, abi, base, insts); let into_reg = writable_spilltmp_reg(); - let mem = if let Some(offset) = - UImm12Scaled::maybe_from_i64(offset.into(), ir::types::I8) - { - MemArg::UnsignedOffset(base, offset) - } else { - let offset: i64 = offset.into(); - insts.extend(Inst::load_constant(into_reg, offset as u64)); - MemArg::RegReg(base, into_reg.to_reg()) - }; + let mem = MemArg::RegOffset(base, offset.into(), I64); insts.push(Inst::ULoad64 { rd: into_reg, mem, @@ -481,10 +484,10 @@ fn get_special_purpose_param_register( impl AArch64ABIBody { /// Create a new body ABI instance. - pub fn new(f: &ir::Function, flags: settings::Flags) -> Self { + pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { debug!("AArch64 ABI: func signature {:?}", f.signature); - let sig = ABISig::from_func_sig(&f.signature); + let sig = ABISig::from_func_sig(&f.signature)?; let call_conv = f.signature.call_conv; // Only these calling conventions are supported. @@ -517,7 +520,7 @@ impl AArch64ABIBody { .map(|reg| (reg, Vec::new())) .or_else(|| f.stack_limit.map(|gv| gen_stack_limit(f, &sig, gv))); - Self { + Ok(Self { sig, stackslots, stackslots_size: stack_offset, @@ -529,7 +532,7 @@ impl AArch64ABIBody { flags, is_leaf: f.is_leaf(), stack_limit, - } + }) } /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return @@ -635,15 +638,22 @@ impl AArch64ABIBody { fn load_stack(mem: MemArg, into_reg: Writable, ty: Type) -> Inst { match ty { - types::B1 - | types::B8 - | types::I8 - | types::B16 - | types::I16 - | types::B32 - | types::I32 - | types::B64 - | types::I64 => Inst::ULoad64 { + types::B1 | types::B8 | types::I8 => Inst::ULoad8 { + rd: into_reg, + mem, + srcloc: None, + }, + types::B16 | types::I16 => Inst::ULoad16 { + rd: into_reg, + mem, + srcloc: None, + }, + types::B32 | types::I32 => Inst::ULoad32 { + rd: into_reg, + mem, + srcloc: None, + }, + types::B64 | types::I64 => Inst::ULoad64 { rd: into_reg, mem, srcloc: None, @@ -664,15 +674,22 @@ fn load_stack(mem: MemArg, into_reg: Writable, ty: Type) -> Inst { fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { match ty { - types::B1 - | types::B8 - | types::I8 - | types::B16 - | types::I16 - | types::B32 - | types::I32 - | types::B64 - | types::I64 => Inst::Store64 { + types::B1 | types::B8 | types::I8 => Inst::Store8 { + rd: from_reg, + mem, + srcloc: None, + }, + types::B16 | types::I16 => Inst::Store16 { + rd: from_reg, + mem, + srcloc: None, + }, + types::B32 | types::I32 => Inst::Store32 { + rd: from_reg, + mem, + srcloc: None, + }, + types::B64 | types::I64 => Inst::Store64 { rd: from_reg, mem, srcloc: None, @@ -791,17 +808,14 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { impl ABIBody for AArch64ABIBody { type I = Inst; - fn needed_tmps(&self) -> usize { - if self.sig.stack_ret_arg.is_some() { - 1 - } else { - 0 - } + fn temp_needed(&self) -> bool { + self.sig.stack_ret_arg.is_some() } - fn init_with_tmps(&mut self, tmps: &[Writable]) { + fn init(&mut self, maybe_tmp: Option>) { if self.sig.stack_ret_arg.is_some() { - self.ret_area_ptr = Some(tmps[0]); + assert!(maybe_tmp.is_some()); + self.ret_area_ptr = maybe_tmp; } } @@ -845,14 +859,14 @@ impl ABIBody for AArch64ABIBody { match &self.sig.args[idx] { &ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty), &ABIArg::Stack(off, ty) => load_stack( - MemArg::FPOffset(self.fp_to_arg_offset() + off), + MemArg::FPOffset(self.fp_to_arg_offset() + off, ty), into_reg, ty, ), } } - fn gen_retval_area_setup(&self) -> Vec { + fn gen_retval_area_setup(&self) -> Option { if let Some(i) = self.sig.stack_ret_arg { let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap()); trace!( @@ -860,10 +874,10 @@ impl ABIBody for AArch64ABIBody { inst, self.ret_area_ptr.unwrap().to_reg() ); - vec![inst] + Some(inst) } else { trace!("gen_retval_area_setup: not needed"); - vec![] + None } } @@ -924,8 +938,7 @@ impl ABIBody for AArch64ABIBody { } _ => {} }; - let mem = MemArg::reg_maybe_offset(self.ret_area_ptr.unwrap().to_reg(), off, ty) - .expect("Return-value area is too large"); + let mem = MemArg::RegOffset(self.ret_area_ptr.unwrap().to_reg(), off, ty); ret.push(store_stack(mem, from_reg.to_reg(), ty)) } } @@ -961,7 +974,7 @@ impl ABIBody for AArch64ABIBody { let stack_off = self.stackslots[slot.as_u32() as usize] as i64; let sp_off: i64 = stack_off + (offset as i64); trace!("load_stackslot: slot {} -> sp_off {}", slot, sp_off); - load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty) + load_stack(MemArg::NominalSPOffset(sp_off, ty), into_reg, ty) } /// Store to a stackslot. @@ -971,7 +984,7 @@ impl ABIBody for AArch64ABIBody { let stack_off = self.stackslots[slot.as_u32() as usize] as i64; let sp_off: i64 = stack_off + (offset as i64); trace!("store_stackslot: slot {} -> sp_off {}", slot, sp_off); - store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty) + store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty) } /// Produce an instruction that computes a stackslot address. @@ -982,7 +995,7 @@ impl ABIBody for AArch64ABIBody { let sp_off: i64 = stack_off + (offset as i64); Inst::LoadAddr { rd: into_reg, - mem: MemArg::NominalSPOffset(sp_off), + mem: MemArg::NominalSPOffset(sp_off, I8), } } @@ -993,7 +1006,7 @@ impl ABIBody for AArch64ABIBody { let spill_off = islot * 8; let sp_off = self.stackslots_size as i64 + spill_off; trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty) + load_stack(MemArg::NominalSPOffset(sp_off, ty), into_reg, ty) } /// Store to a spillslot. @@ -1003,7 +1016,7 @@ impl ABIBody for AArch64ABIBody { let spill_off = islot * 8; let sp_off = self.stackslots_size as i64 + spill_off; trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty) + store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty) } fn gen_prologue(&mut self) -> Vec { @@ -1290,17 +1303,17 @@ impl AArch64ABICall { extname: &ir::ExternalName, dist: RelocDistance, loc: ir::SourceLoc, - ) -> AArch64ABICall { - let sig = ABISig::from_func_sig(sig); + ) -> CodegenResult { + let sig = ABISig::from_func_sig(sig)?; let (uses, defs) = abisig_to_uses_and_defs(&sig); - AArch64ABICall { + Ok(AArch64ABICall { sig, uses, defs, dest: CallDest::ExtName(extname.clone(), dist), loc, opcode: ir::Opcode::Call, - } + }) } /// Create a callsite ABI object for a call to a function pointer with the @@ -1310,17 +1323,17 @@ impl AArch64ABICall { ptr: Reg, loc: ir::SourceLoc, opcode: ir::Opcode, - ) -> AArch64ABICall { - let sig = ABISig::from_func_sig(sig); + ) -> CodegenResult { + let sig = ABISig::from_func_sig(sig)?; let (uses, defs) = abisig_to_uses_and_defs(&sig); - AArch64ABICall { + Ok(AArch64ABICall { sig, uses, defs, dest: CallDest::Reg(ptr), loc, opcode, - } + }) } } @@ -1394,7 +1407,9 @@ impl ABICall for AArch64ABICall { from_reg, ty, )), - &ABIArg::Stack(off, ty) => ctx.emit(store_stack(MemArg::SPOffset(off), from_reg, ty)), + &ABIArg::Stack(off, ty) => { + ctx.emit(store_stack(MemArg::SPOffset(off, ty), from_reg, ty)) + } } } @@ -1409,7 +1424,7 @@ impl ABICall for AArch64ABICall { &ABIArg::Stack(off, ty) => { let ret_area_base = self.sig.stack_arg_space; ctx.emit(load_stack( - MemArg::SPOffset(off + ret_area_base), + MemArg::SPOffset(off + ret_area_base, ty), into_reg, ty, )); @@ -1427,7 +1442,7 @@ impl ABICall for AArch64ABICall { let ret_area_base = self.sig.stack_arg_space; ctx.emit(Inst::LoadAddr { rd, - mem: MemArg::SPOffset(ret_area_base), + mem: MemArg::SPOffset(ret_area_base, I8), }); self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 0ea61a0404..dd41912479 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -145,11 +145,15 @@ pub enum MemArg { /// Reference to a "label": e.g., a symbol. Label(MemLabel), + /// Arbitrary offset from a register. Converted to generation of large + /// offsets with multiple instructions as necessary during code emission. + RegOffset(Reg, i64, Type), + /// Offset from the stack pointer. - SPOffset(i64), + SPOffset(i64, Type), /// Offset from the frame pointer. - FPOffset(i64), + FPOffset(i64, Type), /// Offset from the "nominal stack pointer", which is where the real SP is /// just after stack and spill slots are allocated in the function prologue. @@ -163,7 +167,7 @@ pub enum MemArg { /// SP" is where the actual SP is after the function prologue and before /// clobber pushes. See the diagram in the documentation for /// [crate::isa::aarch64::abi](the ABI module) for more details. - NominalSPOffset(i64), + NominalSPOffset(i64, Type), } impl MemArg { @@ -174,17 +178,6 @@ impl MemArg { MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64)) } - /// Memory reference using an address in a register and an offset, if possible. - pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option { - if let Some(simm9) = SImm9::maybe_from_i64(offset) { - Some(MemArg::Unscaled(reg, simm9)) - } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) { - Some(MemArg::UnsignedOffset(reg, uimm12s)) - } else { - None - } - } - /// Memory reference using the sum of two registers as an address. pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg { MemArg::RegReg(reg1, reg2) @@ -431,8 +424,11 @@ impl ShowWithRRU for MemArg { simm9.show_rru(mb_rru) ), // Eliminated by `mem_finalize()`. - &MemArg::SPOffset(..) | &MemArg::FPOffset(..) | &MemArg::NominalSPOffset(..) => { - panic!("Unexpected stack-offset mem-arg mode!") + &MemArg::SPOffset(..) + | &MemArg::FPOffset(..) + | &MemArg::NominalSPOffset(..) + | &MemArg::RegOffset(..) => { + panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!") } } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 99aade9b30..81b238adc4 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -5,6 +5,7 @@ use crate::ir::constant::ConstantData; use crate::ir::types::*; use crate::ir::TrapCode; use crate::isa::aarch64::inst::*; +use crate::isa::aarch64::lower::ty_bits; use regalloc::{Reg, RegClass, Writable}; @@ -29,8 +30,12 @@ pub fn mem_finalize( state: &EmitState, ) -> (SmallVec<[Inst; 4]>, MemArg) { match mem { - &MemArg::SPOffset(off) | &MemArg::FPOffset(off) | &MemArg::NominalSPOffset(off) => { + &MemArg::RegOffset(_, off, ty) + | &MemArg::SPOffset(off, ty) + | &MemArg::FPOffset(off, ty) + | &MemArg::NominalSPOffset(off, ty) => { let basereg = match mem { + &MemArg::RegOffset(reg, _, _) => reg, &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(), &MemArg::FPOffset(..) => fp_reg(), _ => unreachable!(), @@ -52,6 +57,9 @@ pub fn mem_finalize( if let Some(simm9) = SImm9::maybe_from_i64(off) { let mem = MemArg::Unscaled(basereg, simm9); (smallvec![], mem) + } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) { + let mem = MemArg::UnsignedOffset(basereg, uimm12s); + (smallvec![], mem) } else { let tmp = writable_spilltmp_reg(); let mut const_insts = Inst::load_constant(tmp, off as u64); @@ -654,17 +662,17 @@ impl MachInstEmit for Inst { // This is the base opcode (top 10 bits) for the "unscaled // immediate" form (Unscaled). Other addressing modes will OR in // other values for bits 24/25 (bits 1/2 of this constant). - let op = match self { - &Inst::ULoad8 { .. } => 0b0011100001, - &Inst::SLoad8 { .. } => 0b0011100010, - &Inst::ULoad16 { .. } => 0b0111100001, - &Inst::SLoad16 { .. } => 0b0111100010, - &Inst::ULoad32 { .. } => 0b1011100001, - &Inst::SLoad32 { .. } => 0b1011100010, - &Inst::ULoad64 { .. } => 0b1111100001, - &Inst::FpuLoad32 { .. } => 0b1011110001, - &Inst::FpuLoad64 { .. } => 0b1111110001, - &Inst::FpuLoad128 { .. } => 0b0011110011, + let (op, bits) = match self { + &Inst::ULoad8 { .. } => (0b0011100001, 8), + &Inst::SLoad8 { .. } => (0b0011100010, 8), + &Inst::ULoad16 { .. } => (0b0111100001, 16), + &Inst::SLoad16 { .. } => (0b0111100010, 16), + &Inst::ULoad32 { .. } => (0b1011100001, 32), + &Inst::SLoad32 { .. } => (0b1011100010, 32), + &Inst::ULoad64 { .. } => (0b1111100001, 64), + &Inst::FpuLoad32 { .. } => (0b1011110001, 32), + &Inst::FpuLoad64 { .. } => (0b1111110001, 64), + &Inst::FpuLoad128 { .. } => (0b0011110011, 128), _ => unreachable!(), }; @@ -678,6 +686,9 @@ impl MachInstEmit for Inst { sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); } &MemArg::UnsignedOffset(reg, uimm12scaled) => { + if uimm12scaled.value() != 0 { + assert_eq!(bits, ty_bits(uimm12scaled.scale_ty())); + } sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); } &MemArg::RegReg(r1, r2) => { @@ -686,19 +697,7 @@ impl MachInstEmit for Inst { )); } &MemArg::RegScaled(r1, r2, ty) | &MemArg::RegScaledExtended(r1, r2, ty, _) => { - match (ty, self) { - (I8, &Inst::ULoad8 { .. }) => {} - (I8, &Inst::SLoad8 { .. }) => {} - (I16, &Inst::ULoad16 { .. }) => {} - (I16, &Inst::SLoad16 { .. }) => {} - (I32, &Inst::ULoad32 { .. }) => {} - (I32, &Inst::SLoad32 { .. }) => {} - (I64, &Inst::ULoad64 { .. }) => {} - (F32, &Inst::FpuLoad32 { .. }) => {} - (F64, &Inst::FpuLoad64 { .. }) => {} - (I128, &Inst::FpuLoad128 { .. }) => {} - _ => panic!("Mismatching reg-scaling type in MemArg"), - } + assert_eq!(bits, ty_bits(ty)); let extendop = match &mem { &MemArg::RegScaled(..) => None, &MemArg::RegScaledExtended(_, _, _, op) => Some(op), @@ -746,6 +745,7 @@ impl MachInstEmit for Inst { &MemArg::SPOffset(..) | &MemArg::FPOffset(..) | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"), + &MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"), } } @@ -791,14 +791,14 @@ impl MachInstEmit for Inst { inst.emit(sink, flags, state); } - let op = match self { - &Inst::Store8 { .. } => 0b0011100000, - &Inst::Store16 { .. } => 0b0111100000, - &Inst::Store32 { .. } => 0b1011100000, - &Inst::Store64 { .. } => 0b1111100000, - &Inst::FpuStore32 { .. } => 0b1011110000, - &Inst::FpuStore64 { .. } => 0b1111110000, - &Inst::FpuStore128 { .. } => 0b0011110010, + let (op, bits) = match self { + &Inst::Store8 { .. } => (0b0011100000, 8), + &Inst::Store16 { .. } => (0b0111100000, 16), + &Inst::Store32 { .. } => (0b1011100000, 32), + &Inst::Store64 { .. } => (0b1111100000, 64), + &Inst::FpuStore32 { .. } => (0b1011110000, 32), + &Inst::FpuStore64 { .. } => (0b1111110000, 64), + &Inst::FpuStore128 { .. } => (0b0011110010, 128), _ => unreachable!(), }; @@ -812,6 +812,9 @@ impl MachInstEmit for Inst { sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); } &MemArg::UnsignedOffset(reg, uimm12scaled) => { + if uimm12scaled.value() != 0 { + assert_eq!(bits, ty_bits(uimm12scaled.scale_ty())); + } sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); } &MemArg::RegReg(r1, r2) => { @@ -843,6 +846,7 @@ impl MachInstEmit for Inst { &MemArg::SPOffset(..) | &MemArg::FPOffset(..) | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"), + &MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"), } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index f9cd9237f9..6f302501d2 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1311,7 +1311,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(32768), + mem: MemArg::FPOffset(32768, I8), srcloc: None, }, "100090D2B063308B010240F9", @@ -1320,7 +1320,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(-32768), + mem: MemArg::FPOffset(-32768, I8), srcloc: None, }, "F0FF8F92B063308B010240F9", @@ -1329,7 +1329,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(1048576), // 2^20 + mem: MemArg::FPOffset(1048576, I8), // 2^20 srcloc: None, }, "1002A0D2B063308B010240F9", @@ -1338,13 +1338,43 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1 + mem: MemArg::FPOffset(1048576 + 1, I8), // 2^20 + 1 srcloc: None, }, "300080D21002A0F2B063308B010240F9", "movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::RegOffset(xreg(7), 8, I64), + srcloc: None, + }, + "E18040F8", + "ldur x1, [x7, #8]", + )); + + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::RegOffset(xreg(7), 1024, I64), + srcloc: None, + }, + "E10042F9", + "ldr x1, [x7, #1024]", + )); + + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::RegOffset(xreg(7), 1048576, I64), + srcloc: None, + }, + "1002A0D2F060308B010240F9", + "movz x16, #16, LSL #16 ; add x16, x7, x16, UXTX ; ldr x1, [x16]", + )); + insns.push(( Inst::Store8 { rd: xreg(1), diff --git a/cranelift/codegen/src/isa/aarch64/inst/imms.rs b/cranelift/codegen/src/isa/aarch64/inst/imms.rs index 7c473a83d2..20a8225e7a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs @@ -259,7 +259,12 @@ impl UImm12Scaled { /// Value after scaling. pub fn value(&self) -> u32 { - self.value as u32 * self.scale_ty.bytes() + self.value as u32 + } + + /// The value type which is the scaling base. + pub fn scale_ty(&self) -> Type { + self.scale_ty } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 1d21613e04..6fb559dbb9 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1004,6 +1004,9 @@ fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) { &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => { collector.add_use(stack_reg()); } + &MemArg::RegOffset(r, ..) => { + collector.add_use(r); + } } } @@ -1318,6 +1321,7 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { &mut MemArg::FPOffset(..) | &mut MemArg::SPOffset(..) | &mut MemArg::NominalSPOffset(..) => {} + &mut MemArg::RegOffset(ref mut r, ..) => map_use(m, r), }; } diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 10db3b1f07..129b332295 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -539,12 +539,10 @@ pub(crate) fn lower_address>( // TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or // mul instructions (Load/StoreComplex don't include scale factors). - // Handle one reg and offset that fits in immediate, if possible. + // Handle one reg and offset. if addends.len() == 1 { let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64); - if let Some(memarg) = MemArg::reg_maybe_offset(reg, offset as i64, elem_ty) { - return memarg; - } + return MemArg::RegOffset(reg, offset as i64, elem_ty); } // Handle two regs and a zero offset, if possible. diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index f59c0a3f19..56fd628932 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1335,7 +1335,7 @@ pub(crate) fn lower_insn_to_regs>( assert!(inputs.len() == sig.params.len()); assert!(outputs.len() == sig.returns.len()); ( - AArch64ABICall::from_func(sig, &extname, dist, loc), + AArch64ABICall::from_func(sig, &extname, dist, loc)?, &inputs[..], ) } @@ -1344,7 +1344,7 @@ pub(crate) fn lower_insn_to_regs>( let sig = ctx.call_sig(insn).unwrap(); assert!(inputs.len() - 1 == sig.params.len()); assert!(outputs.len() == sig.returns.len()); - (AArch64ABICall::from_ptr(sig, ptr, loc, op), &inputs[1..]) + (AArch64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) } _ => unreachable!(), }; diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 3aa8c779aa..1b061045c0 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -46,7 +46,7 @@ impl AArch64Backend { func: &Function, flags: settings::Flags, ) -> CodegenResult> { - let abi = Box::new(abi::AArch64ABIBody::new(func, flags)); + let abi = Box::new(abi::AArch64ABIBody::new(func, flags)?); compile::compile::(func, self, abi) } } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 294bdc215a..4ba75e394c 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -184,11 +184,11 @@ impl X64ABIBody { impl ABIBody for X64ABIBody { type I = Inst; - fn needed_tmps(&self) -> usize { - 0 + fn temp_needed(&self) -> bool { + false } - fn init_with_tmps(&mut self, _: &[Writable]) {} + fn init(&mut self, _: Option>) {} fn flags(&self) -> &settings::Flags { &self.flags @@ -239,8 +239,8 @@ impl ABIBody for X64ABIBody { } } - fn gen_retval_area_setup(&self) -> Vec { - vec![] + fn gen_retval_area_setup(&self) -> Option { + None } fn gen_copy_reg_to_retval( diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 7075b10689..6e5170c3f6 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -12,11 +12,14 @@ pub trait ABIBody { /// The instruction type for the ISA associated with this ABI. type I: VCodeInst; - /// How many temps are needed? - fn needed_tmps(&self) -> usize; + /// Does the ABI-body code need a temp reg? One will be provided to `init()` + /// as the `maybe_tmp` arg if so. + fn temp_needed(&self) -> bool; - /// Initialize, providing the requersted temps. - fn init_with_tmps(&mut self, tmps: &[Writable]); + /// Initialize. This is called after the ABIBody is constructed because it + /// may be provided with a temp vreg, which can only be allocated once the + /// lowering context exists. + fn init(&mut self, maybe_tmp: Option>); /// Get the settings controlling this function's compilation. fn flags(&self) -> &settings::Flags; @@ -40,12 +43,12 @@ pub trait ABIBody { /// register. fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable) -> Self::I; - /// Generate any setup instructions needed to save values to the + /// Generate any setup instruction needed to save values to the /// return-value area. This is usually used when were are multiple return /// values or an otherwise large return value that must be passed on the /// stack; typically the ABI specifies an extra hidden argument that is a /// pointer to that memory. - fn gen_retval_area_setup(&self) -> Vec; + fn gen_retval_area_setup(&self) -> Option; /// Generate an instruction which copies a source register to a return value slot. fn gen_copy_reg_to_retval( diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index b83d400b74..a4bd09cfa0 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -383,7 +383,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg); self.emit(insn); } - for insn in self.vcode.abi().gen_retval_area_setup().into_iter() { + if let Some(insn) = self.vcode.abi().gen_retval_area_setup() { self.emit(insn); } } @@ -652,11 +652,13 @@ impl<'func, I: VCodeInst> Lower<'func, I> { pub fn lower>(mut self, backend: &B) -> CodegenResult> { debug!("about to lower function: {:?}", self.f); - // Initialize the ABI object with any temps it needs. - let tmps: SmallVec<[Writable; 4]> = (0..self.vcode.abi().needed_tmps()) - .map(|_| self.alloc_tmp(RegClass::I64, I64)) - .collect(); - self.vcode.abi().init_with_tmps(&tmps[..]); + // Initialize the ABI object, giving it a temp if requested. + let maybe_tmp = if self.vcode.abi().temp_needed() { + Some(self.alloc_tmp(RegClass::I64, I64)) + } else { + None + }; + self.vcode.abi().init(maybe_tmp); // Get the pinned reg here (we only parameterize this function on `B`, // not the whole `Lower` impl). diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif index b7dc3bf342..ffb8b9b599 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif @@ -64,8 +64,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x16, [x0] -; nextln: ldr x16, [x16, #4] +; nextln: ldur x16, [x0] +; nextln: ldur x16, [x16, #4] ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; nextln: udf @@ -128,8 +128,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x16, [x0] -; nextln: ldr x16, [x16, #4] +; nextln: ldur x16, [x0] +; nextln: ldur x16, [x16, #4] ; nextln: add x16, x16, #32 ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 @@ -151,8 +151,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x16, [x0] -; nextln: ldr x16, [x16, #4] +; nextln: ldur x16, [x0] +; nextln: ldur x16, [x16, #4] ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; nextln: udf @@ -179,9 +179,7 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: movz x16, #6784 -; nextln: movk x16, #6, LSL #16 -; nextln: ldr x16, [x0, x16] +; nextln: movz x16, #6784 ; movk x16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16] ; nextln: add x16, x16, #32 ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8