From 0b13d8c848aa40e2b643e8994e796ca47433a8ef Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Thu, 23 Apr 2020 19:23:16 +0200 Subject: [PATCH] aarch64: copy SP whenever it's involved in an address lowering with an explicit add; --- build.rs | 6 +- cranelift/codegen/src/isa/aarch64/abi.rs | 71 +++++++++++++++++-- .../codegen/src/isa/aarch64/inst/emit.rs | 34 +++++++-- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 3 + cranelift/codegen/src/isa/aarch64/lower.rs | 20 +++++- cranelift/codegen/src/machinst/abi.rs | 9 ++- 6 files changed, 124 insertions(+), 19 deletions(-) diff --git a/build.rs b/build.rs index f94f0d3980..98a31a2c10 100644 --- a/build.rs +++ b/build.rs @@ -237,11 +237,7 @@ fn should_panic(testsuite: &str, testname: &str) -> bool { } match (testsuite, testname) { // FIXME(#1521) - ("misc_testsuite", "func_400_params") - | ("simd", _) - | ("multi_value", "call") - | ("spec_testsuite", "call") => true, - + ("simd", _) => true, _ => false, } } diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 1b314037f7..a8fc48d794 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -295,6 +295,10 @@ fn load_stack_from_fp(fp_offset: i64, into_reg: Writable, ty: Type) -> Inst } fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { + debug_assert!(match &mem { + MemArg::SPOffset(off) => SImm9::maybe_from_i64(*off).is_some(), + _ => true, + }); match ty { types::B1 | types::B8 @@ -323,6 +327,49 @@ fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { } } +fn store_stack_fp(fp_offset: i64, from_reg: Reg, ty: Type) -> Inst { + store_stack(MemArg::FPOffset(fp_offset), from_reg, ty) +} + +fn store_stack_sp( + sp_offset: i64, + from_reg: Reg, + ty: Type, + tmp1: Writable, + tmp2: Writable, +) -> Vec { + if SImm9::maybe_from_i64(sp_offset).is_some() { + vec![store_stack(MemArg::SPOffset(sp_offset), from_reg, ty)] + } else { + // mem_finalize will try to generate an add, but in an addition, x31 is the zero register, + // not sp! So we have to synthesize the full add here. + let mut result = Vec::new(); + // tmp1 := sp + result.push(Inst::Mov { + rd: tmp1, + rm: stack_reg(), + }); + // tmp2 := offset + for inst in Inst::load_constant(tmp2, sp_offset as u64) { + result.push(inst); + } + // tmp1 := add tmp1, tmp2 + result.push(Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: tmp1, + rn: tmp1.to_reg(), + rm: tmp2.to_reg(), + }); + // Actual store. + result.push(store_stack( + MemArg::Unscaled(tmp1.to_reg(), SImm9::maybe_from_i64(0).unwrap()), + from_reg, + ty, + )); + result + } +} + fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool { if call_conv.extends_baldrdash() { match r.get_class() { @@ -523,8 +570,8 @@ impl ABIBody for AArch64ABIBody { } _ => {} }; - ret.push(store_stack( - MemArg::FPOffset(off + self.frame_size()), + ret.push(store_stack_fp( + off + self.frame_size(), from_reg.to_reg(), ty, )) @@ -566,7 +613,7 @@ impl ABIBody for AArch64ABIBody { // Offset from beginning of stackslot area, which is at FP - stackslots_size. let stack_off = self.stackslots[slot.as_u32() as usize] as i64; let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64); - store_stack(MemArg::FPOffset(fp_off), from_reg, ty) + store_stack_fp(fp_off, from_reg, ty) } fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Inst { @@ -596,7 +643,7 @@ impl ABIBody for AArch64ABIBody { let islot = slot.get() as i64; let ty_size = self.get_spillslot_size(from_reg.get_class(), ty) * 8; let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64; - store_stack(MemArg::FPOffset(fp_off), from_reg, ty) + store_stack_fp(fp_off, from_reg, ty) } fn gen_prologue(&mut self) -> Vec { @@ -927,10 +974,20 @@ impl ABICall for AArch64ABICall { adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ false) } - fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Inst { + fn gen_copy_reg_to_arg( + &self, + idx: usize, + from_reg: Reg, + tmp1: Writable, + tmp2: Writable, + ) -> Vec { match &self.sig.args[idx] { - &ABIArg::Reg(reg, ty) => Inst::gen_move(Writable::from_reg(reg.to_reg()), from_reg, ty), - &ABIArg::Stack(off, ty) => store_stack(MemArg::SPOffset(off), from_reg, ty), + &ABIArg::Reg(reg, ty) => vec![Inst::gen_move( + Writable::from_reg(reg.to_reg()), + from_reg, + ty, + )], + &ABIArg::Stack(off, ty) => store_stack_sp(off, from_reg, ty, tmp1, tmp2), } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index b137ad3017..d012521ecd 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -35,6 +35,14 @@ pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec, MemArg) { let mem = MemArg::Unscaled(basereg, simm9); (vec![], mem) } else { + // In an addition, x31 is the zero register, not sp; we have only one temporary + // so we can't do the proper add here. + debug_assert_ne!( + basereg, + stack_reg(), + "should have diverted SP before mem_finalize" + ); + let tmp = writable_spilltmp_reg(); let mut const_insts = Inst::load_constant(tmp, off as u64); let add_inst = Inst::AluRRR { @@ -363,7 +371,11 @@ impl MachInstEmit for Inst { ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000, _ => 0b000000, }; - assert_ne!(writable_stack_reg(), rd); + debug_assert_ne!(writable_stack_reg(), rd); + // The stack pointer is the zero register in this context, so this might be an + // indication that something is wrong. + debug_assert_ne!(stack_reg(), rn); + debug_assert_ne!(stack_reg(), rm); sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm)); } &Inst::AluRRRR { @@ -818,11 +830,25 @@ impl MachInstEmit for Inst { &Inst::Mov { rd, rm } => { assert!(rd.to_reg().get_class() == rm.get_class()); assert!(rm.get_class() == RegClass::I64); + // MOV to SP is interpreted as MOV to XZR instead. And our codegen // should never MOV to XZR. - assert!(machreg_to_gpr(rd.to_reg()) != 31); - // Encoded as ORR rd, rm, zero. - sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm)); + assert!(rd.to_reg() != stack_reg()); + + if rm == stack_reg() { + // We can't use ORR here, so use an `add rd, sp, #0` instead. + let imm12 = Imm12::maybe_from_u64(0).unwrap(); + sink.put4(enc_arith_rr_imm12( + 0b100_10001, + imm12.shift_bits(), + imm12.imm_bits(), + rm, + rd, + )); + } else { + // Encoded as ORR rd, rm, zero. + sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm)); + } } &Inst::Mov32 { rd, rm } => { // MOV to SP is interpreted as MOV to XZR instead. And our codegen diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 101e217ae7..52330e66e6 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1719,6 +1719,9 @@ impl MachInst for Inst { fn is_move(&self) -> Option<(Writable, Reg)> { match self { + // TODO a regalloc assertion is triggered if we don't have this, see also #1586 on + // wasmtime, as well as https://github.com/bytecodealliance/regalloc.rs/issues/52. + &Inst::Mov { rm, .. } if rm == stack_reg() => None, &Inst::Mov { rd, rm } => Some((rd, rm)), &Inst::FpuMove64 { rd, rn } => Some((rd, rn)), _ => None, diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index e9e72dd6d5..66e708249d 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -595,6 +595,20 @@ fn lower_address>( // Add each addend to the address. for addend in addends { let reg = input_to_reg(ctx, *addend, NarrowValueMode::ZeroExtend64); + + // In an addition, the stack register is the zero register, so divert it to another + // register just before doing the actual add. + let reg = if reg == stack_reg() { + let tmp = ctx.tmp(RegClass::I64, I64); + ctx.emit(Inst::Mov { + rd: tmp, + rm: stack_reg(), + }); + tmp.to_reg() + } else { + reg + }; + ctx.emit(Inst::AluRRR { alu_op: ALUOp::Add64, rd: addr.clone(), @@ -1968,9 +1982,13 @@ fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) { ctx.emit(inst); } assert!(inputs.len() == abi.num_args()); + let tmp1 = ctx.tmp(RegClass::I64, I64); + let tmp2 = ctx.tmp(RegClass::I64, I64); for (i, input) in inputs.iter().enumerate() { let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None); - ctx.emit(abi.gen_copy_reg_to_arg(i, arg_reg)); + for inst in abi.gen_copy_reg_to_arg(i, arg_reg, tmp1, tmp2) { + ctx.emit(inst); + } } for inst in abi.gen_call().into_iter() { ctx.emit(inst); diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index e22bc0bdc4..b5bbbb9cb5 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -132,9 +132,14 @@ pub trait ABICall { /// Get the number of arguments expected. fn num_args(&self) -> usize; - /// Save the clobbered registers. /// Copy an argument value from a source register, prior to the call. - fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Self::I; + fn gen_copy_reg_to_arg( + &self, + idx: usize, + from_reg: Reg, + tmp1: Writable, + tmp2: Writable, + ) -> Vec; /// Copy a return value into a destination register, after the call returns. fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable) -> Self::I;