diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 235d9031a8..9439ef55df 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -73,9 +73,9 @@ //! https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134 //! //! In brief: -//! - Returns are processed in *reverse* order. -//! - The first return in this order (so the last return) goes into the ordinary -//! return register, X0. +//! - Return values are processed in *reverse* order. +//! - The first return value in this order (so the last return) goes into the +//! ordinary return register, X0. //! - Any further returns go in a struct-return area, allocated upwards (in //! address order) during the reverse traversal. //! - This struct-return area is provided by the caller, and a pointer to its @@ -98,6 +98,7 @@ use crate::isa; use crate::isa::aarch64::{inst::*, lower::ty_bits}; use crate::machinst::*; use crate::settings; +use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; @@ -134,6 +135,11 @@ struct ABISig { call_conv: isa::CallConv, } +/// This is the limit for the size of argument and return-value areas on the +/// stack. We place a reasonable limit here to avoid integer overflow issues +/// with 32-bit arithmetic: for now, 128 MB. +static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; + // Spidermonkey specific ABI convention. /// This is SpiderMonkey's `WasmTableCallSigReg`. @@ -208,14 +214,15 @@ enum ArgsOrRets { /// Process a list of parameters or return values and allocate them to X-regs, /// V-regs, and stack slots. /// -/// Returns the list of argument locations, and the stack-space used (rounded up -/// to a 16-byte-aligned boundary). +/// Returns the list of argument locations, the stack-space used (rounded up +/// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the +/// index of the extra synthetic arg that was added. fn compute_arg_locs( call_conv: isa::CallConv, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, -) -> (Vec, i64) { +) -> CodegenResult<(Vec, i64, Option)> { let is_baldrdash = call_conv.extends_baldrdash(); // See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4. @@ -290,7 +297,7 @@ fn compute_arg_locs( ret.reverse(); } - if add_ret_area_ptr { + let extra_arg = if add_ret_area_ptr { debug_assert!(args_or_rets == ArgsOrRets::Args); if next_xreg < max_reg_vals { ret.push(ABIArg::Reg(xreg(next_xreg).to_real_reg(), I64)); @@ -298,35 +305,39 @@ fn compute_arg_locs( ret.push(ABIArg::Stack(next_stack as i64, I64)); next_stack += 8; } - } + Some(ret.len() - 1) + } else { + None + }; next_stack = (next_stack + 15) & !15; - (ret, next_stack as i64) + // To avoid overflow issues, limit the arg/return size to something + // reasonable -- here, 128 MB. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((ret, next_stack as i64, extra_arg)) } impl ABISig { - fn from_func_sig(sig: &ir::Signature) -> ABISig { + fn from_func_sig(sig: &ir::Signature) -> CodegenResult { // Compute args and retvals from signature. Handle retvals first, // because we may need to add a return-area arg to the args. - let (rets, stack_ret_space) = compute_arg_locs( + let (rets, stack_ret_space, _) = compute_arg_locs( sig.call_conv, &sig.returns, ArgsOrRets::Rets, /* extra ret-area ptr = */ false, - ); + )?; let need_stack_return_area = stack_ret_space > 0; - let (args, stack_arg_space) = compute_arg_locs( + let (args, stack_arg_space, stack_ret_arg) = compute_arg_locs( sig.call_conv, &sig.params, ArgsOrRets::Args, need_stack_return_area, - ); - let stack_ret_arg = if need_stack_return_area { - Some(args.len() - 1) - } else { - None - }; + )?; trace!( "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", @@ -338,14 +349,14 @@ impl ABISig { stack_ret_arg ); - ABISig { + Ok(ABISig { args, rets, stack_arg_space, stack_ret_space, stack_ret_arg, call_conv: sig.call_conv, - } + }) } } @@ -446,15 +457,7 @@ fn gen_stack_limit(f: &ir::Function, abi: &ABISig, gv: ir::GlobalValue) -> (Reg, } => { let base = generate_gv(f, abi, base, insts); let into_reg = writable_spilltmp_reg(); - let mem = if let Some(offset) = - UImm12Scaled::maybe_from_i64(offset.into(), ir::types::I8) - { - MemArg::UnsignedOffset(base, offset) - } else { - let offset: i64 = offset.into(); - insts.extend(Inst::load_constant(into_reg, offset as u64)); - MemArg::RegReg(base, into_reg.to_reg()) - }; + let mem = MemArg::RegOffset(base, offset.into(), I64); insts.push(Inst::ULoad64 { rd: into_reg, mem, @@ -481,10 +484,10 @@ fn get_special_purpose_param_register( impl AArch64ABIBody { /// Create a new body ABI instance. - pub fn new(f: &ir::Function, flags: settings::Flags) -> Self { + pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { debug!("AArch64 ABI: func signature {:?}", f.signature); - let sig = ABISig::from_func_sig(&f.signature); + let sig = ABISig::from_func_sig(&f.signature)?; let call_conv = f.signature.call_conv; // Only these calling conventions are supported. @@ -517,7 +520,7 @@ impl AArch64ABIBody { .map(|reg| (reg, Vec::new())) .or_else(|| f.stack_limit.map(|gv| gen_stack_limit(f, &sig, gv))); - Self { + Ok(Self { sig, stackslots, stackslots_size: stack_offset, @@ -529,7 +532,7 @@ impl AArch64ABIBody { flags, is_leaf: f.is_leaf(), stack_limit, - } + }) } /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return @@ -635,15 +638,22 @@ impl AArch64ABIBody { fn load_stack(mem: MemArg, into_reg: Writable, ty: Type) -> Inst { match ty { - types::B1 - | types::B8 - | types::I8 - | types::B16 - | types::I16 - | types::B32 - | types::I32 - | types::B64 - | types::I64 => Inst::ULoad64 { + types::B1 | types::B8 | types::I8 => Inst::ULoad8 { + rd: into_reg, + mem, + srcloc: None, + }, + types::B16 | types::I16 => Inst::ULoad16 { + rd: into_reg, + mem, + srcloc: None, + }, + types::B32 | types::I32 => Inst::ULoad32 { + rd: into_reg, + mem, + srcloc: None, + }, + types::B64 | types::I64 => Inst::ULoad64 { rd: into_reg, mem, srcloc: None, @@ -664,15 +674,22 @@ fn load_stack(mem: MemArg, into_reg: Writable, ty: Type) -> Inst { fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { match ty { - types::B1 - | types::B8 - | types::I8 - | types::B16 - | types::I16 - | types::B32 - | types::I32 - | types::B64 - | types::I64 => Inst::Store64 { + types::B1 | types::B8 | types::I8 => Inst::Store8 { + rd: from_reg, + mem, + srcloc: None, + }, + types::B16 | types::I16 => Inst::Store16 { + rd: from_reg, + mem, + srcloc: None, + }, + types::B32 | types::I32 => Inst::Store32 { + rd: from_reg, + mem, + srcloc: None, + }, + types::B64 | types::I64 => Inst::Store64 { rd: from_reg, mem, srcloc: None, @@ -791,17 +808,14 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { impl ABIBody for AArch64ABIBody { type I = Inst; - fn needed_tmps(&self) -> usize { - if self.sig.stack_ret_arg.is_some() { - 1 - } else { - 0 - } + fn temp_needed(&self) -> bool { + self.sig.stack_ret_arg.is_some() } - fn init_with_tmps(&mut self, tmps: &[Writable]) { + fn init(&mut self, maybe_tmp: Option>) { if self.sig.stack_ret_arg.is_some() { - self.ret_area_ptr = Some(tmps[0]); + assert!(maybe_tmp.is_some()); + self.ret_area_ptr = maybe_tmp; } } @@ -845,14 +859,14 @@ impl ABIBody for AArch64ABIBody { match &self.sig.args[idx] { &ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty), &ABIArg::Stack(off, ty) => load_stack( - MemArg::FPOffset(self.fp_to_arg_offset() + off), + MemArg::FPOffset(self.fp_to_arg_offset() + off, ty), into_reg, ty, ), } } - fn gen_retval_area_setup(&self) -> Vec { + fn gen_retval_area_setup(&self) -> Option { if let Some(i) = self.sig.stack_ret_arg { let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap()); trace!( @@ -860,10 +874,10 @@ impl ABIBody for AArch64ABIBody { inst, self.ret_area_ptr.unwrap().to_reg() ); - vec![inst] + Some(inst) } else { trace!("gen_retval_area_setup: not needed"); - vec![] + None } } @@ -924,8 +938,7 @@ impl ABIBody for AArch64ABIBody { } _ => {} }; - let mem = MemArg::reg_maybe_offset(self.ret_area_ptr.unwrap().to_reg(), off, ty) - .expect("Return-value area is too large"); + let mem = MemArg::RegOffset(self.ret_area_ptr.unwrap().to_reg(), off, ty); ret.push(store_stack(mem, from_reg.to_reg(), ty)) } } @@ -961,7 +974,7 @@ impl ABIBody for AArch64ABIBody { let stack_off = self.stackslots[slot.as_u32() as usize] as i64; let sp_off: i64 = stack_off + (offset as i64); trace!("load_stackslot: slot {} -> sp_off {}", slot, sp_off); - load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty) + load_stack(MemArg::NominalSPOffset(sp_off, ty), into_reg, ty) } /// Store to a stackslot. @@ -971,7 +984,7 @@ impl ABIBody for AArch64ABIBody { let stack_off = self.stackslots[slot.as_u32() as usize] as i64; let sp_off: i64 = stack_off + (offset as i64); trace!("store_stackslot: slot {} -> sp_off {}", slot, sp_off); - store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty) + store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty) } /// Produce an instruction that computes a stackslot address. @@ -982,7 +995,7 @@ impl ABIBody for AArch64ABIBody { let sp_off: i64 = stack_off + (offset as i64); Inst::LoadAddr { rd: into_reg, - mem: MemArg::NominalSPOffset(sp_off), + mem: MemArg::NominalSPOffset(sp_off, I8), } } @@ -993,7 +1006,7 @@ impl ABIBody for AArch64ABIBody { let spill_off = islot * 8; let sp_off = self.stackslots_size as i64 + spill_off; trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty) + load_stack(MemArg::NominalSPOffset(sp_off, ty), into_reg, ty) } /// Store to a spillslot. @@ -1003,7 +1016,7 @@ impl ABIBody for AArch64ABIBody { let spill_off = islot * 8; let sp_off = self.stackslots_size as i64 + spill_off; trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty) + store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty) } fn gen_prologue(&mut self) -> Vec { @@ -1290,17 +1303,17 @@ impl AArch64ABICall { extname: &ir::ExternalName, dist: RelocDistance, loc: ir::SourceLoc, - ) -> AArch64ABICall { - let sig = ABISig::from_func_sig(sig); + ) -> CodegenResult { + let sig = ABISig::from_func_sig(sig)?; let (uses, defs) = abisig_to_uses_and_defs(&sig); - AArch64ABICall { + Ok(AArch64ABICall { sig, uses, defs, dest: CallDest::ExtName(extname.clone(), dist), loc, opcode: ir::Opcode::Call, - } + }) } /// Create a callsite ABI object for a call to a function pointer with the @@ -1310,17 +1323,17 @@ impl AArch64ABICall { ptr: Reg, loc: ir::SourceLoc, opcode: ir::Opcode, - ) -> AArch64ABICall { - let sig = ABISig::from_func_sig(sig); + ) -> CodegenResult { + let sig = ABISig::from_func_sig(sig)?; let (uses, defs) = abisig_to_uses_and_defs(&sig); - AArch64ABICall { + Ok(AArch64ABICall { sig, uses, defs, dest: CallDest::Reg(ptr), loc, opcode, - } + }) } } @@ -1394,7 +1407,9 @@ impl ABICall for AArch64ABICall { from_reg, ty, )), - &ABIArg::Stack(off, ty) => ctx.emit(store_stack(MemArg::SPOffset(off), from_reg, ty)), + &ABIArg::Stack(off, ty) => { + ctx.emit(store_stack(MemArg::SPOffset(off, ty), from_reg, ty)) + } } } @@ -1409,7 +1424,7 @@ impl ABICall for AArch64ABICall { &ABIArg::Stack(off, ty) => { let ret_area_base = self.sig.stack_arg_space; ctx.emit(load_stack( - MemArg::SPOffset(off + ret_area_base), + MemArg::SPOffset(off + ret_area_base, ty), into_reg, ty, )); @@ -1427,7 +1442,7 @@ impl ABICall for AArch64ABICall { let ret_area_base = self.sig.stack_arg_space; ctx.emit(Inst::LoadAddr { rd, - mem: MemArg::SPOffset(ret_area_base), + mem: MemArg::SPOffset(ret_area_base, I8), }); self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 0ea61a0404..dd41912479 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -145,11 +145,15 @@ pub enum MemArg { /// Reference to a "label": e.g., a symbol. Label(MemLabel), + /// Arbitrary offset from a register. Converted to generation of large + /// offsets with multiple instructions as necessary during code emission. + RegOffset(Reg, i64, Type), + /// Offset from the stack pointer. - SPOffset(i64), + SPOffset(i64, Type), /// Offset from the frame pointer. - FPOffset(i64), + FPOffset(i64, Type), /// Offset from the "nominal stack pointer", which is where the real SP is /// just after stack and spill slots are allocated in the function prologue. @@ -163,7 +167,7 @@ pub enum MemArg { /// SP" is where the actual SP is after the function prologue and before /// clobber pushes. See the diagram in the documentation for /// [crate::isa::aarch64::abi](the ABI module) for more details. - NominalSPOffset(i64), + NominalSPOffset(i64, Type), } impl MemArg { @@ -174,17 +178,6 @@ impl MemArg { MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64)) } - /// Memory reference using an address in a register and an offset, if possible. - pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option { - if let Some(simm9) = SImm9::maybe_from_i64(offset) { - Some(MemArg::Unscaled(reg, simm9)) - } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) { - Some(MemArg::UnsignedOffset(reg, uimm12s)) - } else { - None - } - } - /// Memory reference using the sum of two registers as an address. pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg { MemArg::RegReg(reg1, reg2) @@ -431,8 +424,11 @@ impl ShowWithRRU for MemArg { simm9.show_rru(mb_rru) ), // Eliminated by `mem_finalize()`. - &MemArg::SPOffset(..) | &MemArg::FPOffset(..) | &MemArg::NominalSPOffset(..) => { - panic!("Unexpected stack-offset mem-arg mode!") + &MemArg::SPOffset(..) + | &MemArg::FPOffset(..) + | &MemArg::NominalSPOffset(..) + | &MemArg::RegOffset(..) => { + panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!") } } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 99aade9b30..81b238adc4 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -5,6 +5,7 @@ use crate::ir::constant::ConstantData; use crate::ir::types::*; use crate::ir::TrapCode; use crate::isa::aarch64::inst::*; +use crate::isa::aarch64::lower::ty_bits; use regalloc::{Reg, RegClass, Writable}; @@ -29,8 +30,12 @@ pub fn mem_finalize( state: &EmitState, ) -> (SmallVec<[Inst; 4]>, MemArg) { match mem { - &MemArg::SPOffset(off) | &MemArg::FPOffset(off) | &MemArg::NominalSPOffset(off) => { + &MemArg::RegOffset(_, off, ty) + | &MemArg::SPOffset(off, ty) + | &MemArg::FPOffset(off, ty) + | &MemArg::NominalSPOffset(off, ty) => { let basereg = match mem { + &MemArg::RegOffset(reg, _, _) => reg, &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(), &MemArg::FPOffset(..) => fp_reg(), _ => unreachable!(), @@ -52,6 +57,9 @@ pub fn mem_finalize( if let Some(simm9) = SImm9::maybe_from_i64(off) { let mem = MemArg::Unscaled(basereg, simm9); (smallvec![], mem) + } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) { + let mem = MemArg::UnsignedOffset(basereg, uimm12s); + (smallvec![], mem) } else { let tmp = writable_spilltmp_reg(); let mut const_insts = Inst::load_constant(tmp, off as u64); @@ -654,17 +662,17 @@ impl MachInstEmit for Inst { // This is the base opcode (top 10 bits) for the "unscaled // immediate" form (Unscaled). Other addressing modes will OR in // other values for bits 24/25 (bits 1/2 of this constant). - let op = match self { - &Inst::ULoad8 { .. } => 0b0011100001, - &Inst::SLoad8 { .. } => 0b0011100010, - &Inst::ULoad16 { .. } => 0b0111100001, - &Inst::SLoad16 { .. } => 0b0111100010, - &Inst::ULoad32 { .. } => 0b1011100001, - &Inst::SLoad32 { .. } => 0b1011100010, - &Inst::ULoad64 { .. } => 0b1111100001, - &Inst::FpuLoad32 { .. } => 0b1011110001, - &Inst::FpuLoad64 { .. } => 0b1111110001, - &Inst::FpuLoad128 { .. } => 0b0011110011, + let (op, bits) = match self { + &Inst::ULoad8 { .. } => (0b0011100001, 8), + &Inst::SLoad8 { .. } => (0b0011100010, 8), + &Inst::ULoad16 { .. } => (0b0111100001, 16), + &Inst::SLoad16 { .. } => (0b0111100010, 16), + &Inst::ULoad32 { .. } => (0b1011100001, 32), + &Inst::SLoad32 { .. } => (0b1011100010, 32), + &Inst::ULoad64 { .. } => (0b1111100001, 64), + &Inst::FpuLoad32 { .. } => (0b1011110001, 32), + &Inst::FpuLoad64 { .. } => (0b1111110001, 64), + &Inst::FpuLoad128 { .. } => (0b0011110011, 128), _ => unreachable!(), }; @@ -678,6 +686,9 @@ impl MachInstEmit for Inst { sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); } &MemArg::UnsignedOffset(reg, uimm12scaled) => { + if uimm12scaled.value() != 0 { + assert_eq!(bits, ty_bits(uimm12scaled.scale_ty())); + } sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); } &MemArg::RegReg(r1, r2) => { @@ -686,19 +697,7 @@ impl MachInstEmit for Inst { )); } &MemArg::RegScaled(r1, r2, ty) | &MemArg::RegScaledExtended(r1, r2, ty, _) => { - match (ty, self) { - (I8, &Inst::ULoad8 { .. }) => {} - (I8, &Inst::SLoad8 { .. }) => {} - (I16, &Inst::ULoad16 { .. }) => {} - (I16, &Inst::SLoad16 { .. }) => {} - (I32, &Inst::ULoad32 { .. }) => {} - (I32, &Inst::SLoad32 { .. }) => {} - (I64, &Inst::ULoad64 { .. }) => {} - (F32, &Inst::FpuLoad32 { .. }) => {} - (F64, &Inst::FpuLoad64 { .. }) => {} - (I128, &Inst::FpuLoad128 { .. }) => {} - _ => panic!("Mismatching reg-scaling type in MemArg"), - } + assert_eq!(bits, ty_bits(ty)); let extendop = match &mem { &MemArg::RegScaled(..) => None, &MemArg::RegScaledExtended(_, _, _, op) => Some(op), @@ -746,6 +745,7 @@ impl MachInstEmit for Inst { &MemArg::SPOffset(..) | &MemArg::FPOffset(..) | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"), + &MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"), } } @@ -791,14 +791,14 @@ impl MachInstEmit for Inst { inst.emit(sink, flags, state); } - let op = match self { - &Inst::Store8 { .. } => 0b0011100000, - &Inst::Store16 { .. } => 0b0111100000, - &Inst::Store32 { .. } => 0b1011100000, - &Inst::Store64 { .. } => 0b1111100000, - &Inst::FpuStore32 { .. } => 0b1011110000, - &Inst::FpuStore64 { .. } => 0b1111110000, - &Inst::FpuStore128 { .. } => 0b0011110010, + let (op, bits) = match self { + &Inst::Store8 { .. } => (0b0011100000, 8), + &Inst::Store16 { .. } => (0b0111100000, 16), + &Inst::Store32 { .. } => (0b1011100000, 32), + &Inst::Store64 { .. } => (0b1111100000, 64), + &Inst::FpuStore32 { .. } => (0b1011110000, 32), + &Inst::FpuStore64 { .. } => (0b1111110000, 64), + &Inst::FpuStore128 { .. } => (0b0011110010, 128), _ => unreachable!(), }; @@ -812,6 +812,9 @@ impl MachInstEmit for Inst { sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); } &MemArg::UnsignedOffset(reg, uimm12scaled) => { + if uimm12scaled.value() != 0 { + assert_eq!(bits, ty_bits(uimm12scaled.scale_ty())); + } sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); } &MemArg::RegReg(r1, r2) => { @@ -843,6 +846,7 @@ impl MachInstEmit for Inst { &MemArg::SPOffset(..) | &MemArg::FPOffset(..) | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"), + &MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"), } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index f9cd9237f9..6f302501d2 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1311,7 +1311,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(32768), + mem: MemArg::FPOffset(32768, I8), srcloc: None, }, "100090D2B063308B010240F9", @@ -1320,7 +1320,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(-32768), + mem: MemArg::FPOffset(-32768, I8), srcloc: None, }, "F0FF8F92B063308B010240F9", @@ -1329,7 +1329,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(1048576), // 2^20 + mem: MemArg::FPOffset(1048576, I8), // 2^20 srcloc: None, }, "1002A0D2B063308B010240F9", @@ -1338,13 +1338,43 @@ fn test_aarch64_binemit() { insns.push(( Inst::ULoad64 { rd: writable_xreg(1), - mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1 + mem: MemArg::FPOffset(1048576 + 1, I8), // 2^20 + 1 srcloc: None, }, "300080D21002A0F2B063308B010240F9", "movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::RegOffset(xreg(7), 8, I64), + srcloc: None, + }, + "E18040F8", + "ldur x1, [x7, #8]", + )); + + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::RegOffset(xreg(7), 1024, I64), + srcloc: None, + }, + "E10042F9", + "ldr x1, [x7, #1024]", + )); + + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::RegOffset(xreg(7), 1048576, I64), + srcloc: None, + }, + "1002A0D2F060308B010240F9", + "movz x16, #16, LSL #16 ; add x16, x7, x16, UXTX ; ldr x1, [x16]", + )); + insns.push(( Inst::Store8 { rd: xreg(1), diff --git a/cranelift/codegen/src/isa/aarch64/inst/imms.rs b/cranelift/codegen/src/isa/aarch64/inst/imms.rs index 7c473a83d2..20a8225e7a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs @@ -259,7 +259,12 @@ impl UImm12Scaled { /// Value after scaling. pub fn value(&self) -> u32 { - self.value as u32 * self.scale_ty.bytes() + self.value as u32 + } + + /// The value type which is the scaling base. + pub fn scale_ty(&self) -> Type { + self.scale_ty } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 1d21613e04..6fb559dbb9 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1004,6 +1004,9 @@ fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) { &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => { collector.add_use(stack_reg()); } + &MemArg::RegOffset(r, ..) => { + collector.add_use(r); + } } } @@ -1318,6 +1321,7 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { &mut MemArg::FPOffset(..) | &mut MemArg::SPOffset(..) | &mut MemArg::NominalSPOffset(..) => {} + &mut MemArg::RegOffset(ref mut r, ..) => map_use(m, r), }; } diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 10db3b1f07..129b332295 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -539,12 +539,10 @@ pub(crate) fn lower_address>( // TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or // mul instructions (Load/StoreComplex don't include scale factors). - // Handle one reg and offset that fits in immediate, if possible. + // Handle one reg and offset. if addends.len() == 1 { let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64); - if let Some(memarg) = MemArg::reg_maybe_offset(reg, offset as i64, elem_ty) { - return memarg; - } + return MemArg::RegOffset(reg, offset as i64, elem_ty); } // Handle two regs and a zero offset, if possible. diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index f59c0a3f19..56fd628932 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1335,7 +1335,7 @@ pub(crate) fn lower_insn_to_regs>( assert!(inputs.len() == sig.params.len()); assert!(outputs.len() == sig.returns.len()); ( - AArch64ABICall::from_func(sig, &extname, dist, loc), + AArch64ABICall::from_func(sig, &extname, dist, loc)?, &inputs[..], ) } @@ -1344,7 +1344,7 @@ pub(crate) fn lower_insn_to_regs>( let sig = ctx.call_sig(insn).unwrap(); assert!(inputs.len() - 1 == sig.params.len()); assert!(outputs.len() == sig.returns.len()); - (AArch64ABICall::from_ptr(sig, ptr, loc, op), &inputs[1..]) + (AArch64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) } _ => unreachable!(), }; diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 3aa8c779aa..1b061045c0 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -46,7 +46,7 @@ impl AArch64Backend { func: &Function, flags: settings::Flags, ) -> CodegenResult> { - let abi = Box::new(abi::AArch64ABIBody::new(func, flags)); + let abi = Box::new(abi::AArch64ABIBody::new(func, flags)?); compile::compile::(func, self, abi) } } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 294bdc215a..4ba75e394c 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -184,11 +184,11 @@ impl X64ABIBody { impl ABIBody for X64ABIBody { type I = Inst; - fn needed_tmps(&self) -> usize { - 0 + fn temp_needed(&self) -> bool { + false } - fn init_with_tmps(&mut self, _: &[Writable]) {} + fn init(&mut self, _: Option>) {} fn flags(&self) -> &settings::Flags { &self.flags @@ -239,8 +239,8 @@ impl ABIBody for X64ABIBody { } } - fn gen_retval_area_setup(&self) -> Vec { - vec![] + fn gen_retval_area_setup(&self) -> Option { + None } fn gen_copy_reg_to_retval( diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 7075b10689..6e5170c3f6 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -12,11 +12,14 @@ pub trait ABIBody { /// The instruction type for the ISA associated with this ABI. type I: VCodeInst; - /// How many temps are needed? - fn needed_tmps(&self) -> usize; + /// Does the ABI-body code need a temp reg? One will be provided to `init()` + /// as the `maybe_tmp` arg if so. + fn temp_needed(&self) -> bool; - /// Initialize, providing the requersted temps. - fn init_with_tmps(&mut self, tmps: &[Writable]); + /// Initialize. This is called after the ABIBody is constructed because it + /// may be provided with a temp vreg, which can only be allocated once the + /// lowering context exists. + fn init(&mut self, maybe_tmp: Option>); /// Get the settings controlling this function's compilation. fn flags(&self) -> &settings::Flags; @@ -40,12 +43,12 @@ pub trait ABIBody { /// register. fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable) -> Self::I; - /// Generate any setup instructions needed to save values to the + /// Generate any setup instruction needed to save values to the /// return-value area. This is usually used when were are multiple return /// values or an otherwise large return value that must be passed on the /// stack; typically the ABI specifies an extra hidden argument that is a /// pointer to that memory. - fn gen_retval_area_setup(&self) -> Vec; + fn gen_retval_area_setup(&self) -> Option; /// Generate an instruction which copies a source register to a return value slot. fn gen_copy_reg_to_retval( diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index b83d400b74..a4bd09cfa0 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -383,7 +383,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg); self.emit(insn); } - for insn in self.vcode.abi().gen_retval_area_setup().into_iter() { + if let Some(insn) = self.vcode.abi().gen_retval_area_setup() { self.emit(insn); } } @@ -652,11 +652,13 @@ impl<'func, I: VCodeInst> Lower<'func, I> { pub fn lower>(mut self, backend: &B) -> CodegenResult> { debug!("about to lower function: {:?}", self.f); - // Initialize the ABI object with any temps it needs. - let tmps: SmallVec<[Writable; 4]> = (0..self.vcode.abi().needed_tmps()) - .map(|_| self.alloc_tmp(RegClass::I64, I64)) - .collect(); - self.vcode.abi().init_with_tmps(&tmps[..]); + // Initialize the ABI object, giving it a temp if requested. + let maybe_tmp = if self.vcode.abi().temp_needed() { + Some(self.alloc_tmp(RegClass::I64, I64)) + } else { + None + }; + self.vcode.abi().init(maybe_tmp); // Get the pinned reg here (we only parameterize this function on `B`, // not the whole `Lower` impl). diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif index b7dc3bf342..ffb8b9b599 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif @@ -64,8 +64,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x16, [x0] -; nextln: ldr x16, [x16, #4] +; nextln: ldur x16, [x0] +; nextln: ldur x16, [x16, #4] ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; nextln: udf @@ -128,8 +128,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x16, [x0] -; nextln: ldr x16, [x16, #4] +; nextln: ldur x16, [x0] +; nextln: ldur x16, [x16, #4] ; nextln: add x16, x16, #32 ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 @@ -151,8 +151,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x16, [x0] -; nextln: ldr x16, [x16, #4] +; nextln: ldur x16, [x0] +; nextln: ldur x16, [x16, #4] ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; nextln: udf @@ -179,9 +179,7 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: movz x16, #6784 -; nextln: movk x16, #6, LSL #16 -; nextln: ldr x16, [x0, x16] +; nextln: movz x16, #6784 ; movk x16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16] ; nextln: add x16, x16, #32 ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8