aarch64: copy SP whenever it's involved in an address lowering with an explicit add;
This commit is contained in:
6
build.rs
6
build.rs
@@ -237,11 +237,7 @@ fn should_panic(testsuite: &str, testname: &str) -> bool {
|
|||||||
}
|
}
|
||||||
match (testsuite, testname) {
|
match (testsuite, testname) {
|
||||||
// FIXME(#1521)
|
// FIXME(#1521)
|
||||||
("misc_testsuite", "func_400_params")
|
("simd", _) => true,
|
||||||
| ("simd", _)
|
|
||||||
| ("multi_value", "call")
|
|
||||||
| ("spec_testsuite", "call") => true,
|
|
||||||
|
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -295,6 +295,10 @@ fn load_stack_from_fp(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
|
fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
|
||||||
|
debug_assert!(match &mem {
|
||||||
|
MemArg::SPOffset(off) => SImm9::maybe_from_i64(*off).is_some(),
|
||||||
|
_ => true,
|
||||||
|
});
|
||||||
match ty {
|
match ty {
|
||||||
types::B1
|
types::B1
|
||||||
| types::B8
|
| types::B8
|
||||||
@@ -323,6 +327,49 @@ fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn store_stack_fp(fp_offset: i64, from_reg: Reg, ty: Type) -> Inst {
|
||||||
|
store_stack(MemArg::FPOffset(fp_offset), from_reg, ty)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn store_stack_sp(
|
||||||
|
sp_offset: i64,
|
||||||
|
from_reg: Reg,
|
||||||
|
ty: Type,
|
||||||
|
tmp1: Writable<Reg>,
|
||||||
|
tmp2: Writable<Reg>,
|
||||||
|
) -> Vec<Inst> {
|
||||||
|
if SImm9::maybe_from_i64(sp_offset).is_some() {
|
||||||
|
vec![store_stack(MemArg::SPOffset(sp_offset), from_reg, ty)]
|
||||||
|
} else {
|
||||||
|
// mem_finalize will try to generate an add, but in an addition, x31 is the zero register,
|
||||||
|
// not sp! So we have to synthesize the full add here.
|
||||||
|
let mut result = Vec::new();
|
||||||
|
// tmp1 := sp
|
||||||
|
result.push(Inst::Mov {
|
||||||
|
rd: tmp1,
|
||||||
|
rm: stack_reg(),
|
||||||
|
});
|
||||||
|
// tmp2 := offset
|
||||||
|
for inst in Inst::load_constant(tmp2, sp_offset as u64) {
|
||||||
|
result.push(inst);
|
||||||
|
}
|
||||||
|
// tmp1 := add tmp1, tmp2
|
||||||
|
result.push(Inst::AluRRR {
|
||||||
|
alu_op: ALUOp::Add64,
|
||||||
|
rd: tmp1,
|
||||||
|
rn: tmp1.to_reg(),
|
||||||
|
rm: tmp2.to_reg(),
|
||||||
|
});
|
||||||
|
// Actual store.
|
||||||
|
result.push(store_stack(
|
||||||
|
MemArg::Unscaled(tmp1.to_reg(), SImm9::maybe_from_i64(0).unwrap()),
|
||||||
|
from_reg,
|
||||||
|
ty,
|
||||||
|
));
|
||||||
|
result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool {
|
fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool {
|
||||||
if call_conv.extends_baldrdash() {
|
if call_conv.extends_baldrdash() {
|
||||||
match r.get_class() {
|
match r.get_class() {
|
||||||
@@ -523,8 +570,8 @@ impl ABIBody for AArch64ABIBody {
|
|||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
};
|
};
|
||||||
ret.push(store_stack(
|
ret.push(store_stack_fp(
|
||||||
MemArg::FPOffset(off + self.frame_size()),
|
off + self.frame_size(),
|
||||||
from_reg.to_reg(),
|
from_reg.to_reg(),
|
||||||
ty,
|
ty,
|
||||||
))
|
))
|
||||||
@@ -566,7 +613,7 @@ impl ABIBody for AArch64ABIBody {
|
|||||||
// Offset from beginning of stackslot area, which is at FP - stackslots_size.
|
// Offset from beginning of stackslot area, which is at FP - stackslots_size.
|
||||||
let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
|
let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
|
||||||
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
|
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
|
||||||
store_stack(MemArg::FPOffset(fp_off), from_reg, ty)
|
store_stack_fp(fp_off, from_reg, ty)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Inst {
|
fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Inst {
|
||||||
@@ -596,7 +643,7 @@ impl ABIBody for AArch64ABIBody {
|
|||||||
let islot = slot.get() as i64;
|
let islot = slot.get() as i64;
|
||||||
let ty_size = self.get_spillslot_size(from_reg.get_class(), ty) * 8;
|
let ty_size = self.get_spillslot_size(from_reg.get_class(), ty) * 8;
|
||||||
let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64;
|
let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64;
|
||||||
store_stack(MemArg::FPOffset(fp_off), from_reg, ty)
|
store_stack_fp(fp_off, from_reg, ty)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_prologue(&mut self) -> Vec<Inst> {
|
fn gen_prologue(&mut self) -> Vec<Inst> {
|
||||||
@@ -927,10 +974,20 @@ impl ABICall for AArch64ABICall {
|
|||||||
adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ false)
|
adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ false)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Inst {
|
fn gen_copy_reg_to_arg(
|
||||||
|
&self,
|
||||||
|
idx: usize,
|
||||||
|
from_reg: Reg,
|
||||||
|
tmp1: Writable<Reg>,
|
||||||
|
tmp2: Writable<Reg>,
|
||||||
|
) -> Vec<Inst> {
|
||||||
match &self.sig.args[idx] {
|
match &self.sig.args[idx] {
|
||||||
&ABIArg::Reg(reg, ty) => Inst::gen_move(Writable::from_reg(reg.to_reg()), from_reg, ty),
|
&ABIArg::Reg(reg, ty) => vec![Inst::gen_move(
|
||||||
&ABIArg::Stack(off, ty) => store_stack(MemArg::SPOffset(off), from_reg, ty),
|
Writable::from_reg(reg.to_reg()),
|
||||||
|
from_reg,
|
||||||
|
ty,
|
||||||
|
)],
|
||||||
|
&ABIArg::Stack(off, ty) => store_stack_sp(off, from_reg, ty, tmp1, tmp2),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -35,6 +35,14 @@ pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec<Inst>, MemArg) {
|
|||||||
let mem = MemArg::Unscaled(basereg, simm9);
|
let mem = MemArg::Unscaled(basereg, simm9);
|
||||||
(vec![], mem)
|
(vec![], mem)
|
||||||
} else {
|
} else {
|
||||||
|
// In an addition, x31 is the zero register, not sp; we have only one temporary
|
||||||
|
// so we can't do the proper add here.
|
||||||
|
debug_assert_ne!(
|
||||||
|
basereg,
|
||||||
|
stack_reg(),
|
||||||
|
"should have diverted SP before mem_finalize"
|
||||||
|
);
|
||||||
|
|
||||||
let tmp = writable_spilltmp_reg();
|
let tmp = writable_spilltmp_reg();
|
||||||
let mut const_insts = Inst::load_constant(tmp, off as u64);
|
let mut const_insts = Inst::load_constant(tmp, off as u64);
|
||||||
let add_inst = Inst::AluRRR {
|
let add_inst = Inst::AluRRR {
|
||||||
@@ -363,7 +371,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||||||
ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
|
ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
|
||||||
_ => 0b000000,
|
_ => 0b000000,
|
||||||
};
|
};
|
||||||
assert_ne!(writable_stack_reg(), rd);
|
debug_assert_ne!(writable_stack_reg(), rd);
|
||||||
|
// The stack pointer is the zero register in this context, so this might be an
|
||||||
|
// indication that something is wrong.
|
||||||
|
debug_assert_ne!(stack_reg(), rn);
|
||||||
|
debug_assert_ne!(stack_reg(), rm);
|
||||||
sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
|
sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
|
||||||
}
|
}
|
||||||
&Inst::AluRRRR {
|
&Inst::AluRRRR {
|
||||||
@@ -818,12 +830,26 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||||||
&Inst::Mov { rd, rm } => {
|
&Inst::Mov { rd, rm } => {
|
||||||
assert!(rd.to_reg().get_class() == rm.get_class());
|
assert!(rd.to_reg().get_class() == rm.get_class());
|
||||||
assert!(rm.get_class() == RegClass::I64);
|
assert!(rm.get_class() == RegClass::I64);
|
||||||
|
|
||||||
// MOV to SP is interpreted as MOV to XZR instead. And our codegen
|
// MOV to SP is interpreted as MOV to XZR instead. And our codegen
|
||||||
// should never MOV to XZR.
|
// should never MOV to XZR.
|
||||||
assert!(machreg_to_gpr(rd.to_reg()) != 31);
|
assert!(rd.to_reg() != stack_reg());
|
||||||
|
|
||||||
|
if rm == stack_reg() {
|
||||||
|
// We can't use ORR here, so use an `add rd, sp, #0` instead.
|
||||||
|
let imm12 = Imm12::maybe_from_u64(0).unwrap();
|
||||||
|
sink.put4(enc_arith_rr_imm12(
|
||||||
|
0b100_10001,
|
||||||
|
imm12.shift_bits(),
|
||||||
|
imm12.imm_bits(),
|
||||||
|
rm,
|
||||||
|
rd,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
// Encoded as ORR rd, rm, zero.
|
// Encoded as ORR rd, rm, zero.
|
||||||
sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
|
sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
&Inst::Mov32 { rd, rm } => {
|
&Inst::Mov32 { rd, rm } => {
|
||||||
// MOV to SP is interpreted as MOV to XZR instead. And our codegen
|
// MOV to SP is interpreted as MOV to XZR instead. And our codegen
|
||||||
// should never MOV to XZR.
|
// should never MOV to XZR.
|
||||||
|
|||||||
@@ -1719,6 +1719,9 @@ impl MachInst for Inst {
|
|||||||
|
|
||||||
fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
|
fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
|
||||||
match self {
|
match self {
|
||||||
|
// TODO a regalloc assertion is triggered if we don't have this, see also #1586 on
|
||||||
|
// wasmtime, as well as https://github.com/bytecodealliance/regalloc.rs/issues/52.
|
||||||
|
&Inst::Mov { rm, .. } if rm == stack_reg() => None,
|
||||||
&Inst::Mov { rd, rm } => Some((rd, rm)),
|
&Inst::Mov { rd, rm } => Some((rd, rm)),
|
||||||
&Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
|
&Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
|
||||||
_ => None,
|
_ => None,
|
||||||
|
|||||||
@@ -595,6 +595,20 @@ fn lower_address<C: LowerCtx<I = Inst>>(
|
|||||||
// Add each addend to the address.
|
// Add each addend to the address.
|
||||||
for addend in addends {
|
for addend in addends {
|
||||||
let reg = input_to_reg(ctx, *addend, NarrowValueMode::ZeroExtend64);
|
let reg = input_to_reg(ctx, *addend, NarrowValueMode::ZeroExtend64);
|
||||||
|
|
||||||
|
// In an addition, the stack register is the zero register, so divert it to another
|
||||||
|
// register just before doing the actual add.
|
||||||
|
let reg = if reg == stack_reg() {
|
||||||
|
let tmp = ctx.tmp(RegClass::I64, I64);
|
||||||
|
ctx.emit(Inst::Mov {
|
||||||
|
rd: tmp,
|
||||||
|
rm: stack_reg(),
|
||||||
|
});
|
||||||
|
tmp.to_reg()
|
||||||
|
} else {
|
||||||
|
reg
|
||||||
|
};
|
||||||
|
|
||||||
ctx.emit(Inst::AluRRR {
|
ctx.emit(Inst::AluRRR {
|
||||||
alu_op: ALUOp::Add64,
|
alu_op: ALUOp::Add64,
|
||||||
rd: addr.clone(),
|
rd: addr.clone(),
|
||||||
@@ -1968,9 +1982,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
|||||||
ctx.emit(inst);
|
ctx.emit(inst);
|
||||||
}
|
}
|
||||||
assert!(inputs.len() == abi.num_args());
|
assert!(inputs.len() == abi.num_args());
|
||||||
|
let tmp1 = ctx.tmp(RegClass::I64, I64);
|
||||||
|
let tmp2 = ctx.tmp(RegClass::I64, I64);
|
||||||
for (i, input) in inputs.iter().enumerate() {
|
for (i, input) in inputs.iter().enumerate() {
|
||||||
let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
|
let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
|
||||||
ctx.emit(abi.gen_copy_reg_to_arg(i, arg_reg));
|
for inst in abi.gen_copy_reg_to_arg(i, arg_reg, tmp1, tmp2) {
|
||||||
|
ctx.emit(inst);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for inst in abi.gen_call().into_iter() {
|
for inst in abi.gen_call().into_iter() {
|
||||||
ctx.emit(inst);
|
ctx.emit(inst);
|
||||||
|
|||||||
@@ -132,9 +132,14 @@ pub trait ABICall {
|
|||||||
/// Get the number of arguments expected.
|
/// Get the number of arguments expected.
|
||||||
fn num_args(&self) -> usize;
|
fn num_args(&self) -> usize;
|
||||||
|
|
||||||
/// Save the clobbered registers.
|
|
||||||
/// Copy an argument value from a source register, prior to the call.
|
/// Copy an argument value from a source register, prior to the call.
|
||||||
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Self::I;
|
fn gen_copy_reg_to_arg(
|
||||||
|
&self,
|
||||||
|
idx: usize,
|
||||||
|
from_reg: Reg,
|
||||||
|
tmp1: Writable<Reg>,
|
||||||
|
tmp2: Writable<Reg>,
|
||||||
|
) -> Vec<Self::I>;
|
||||||
|
|
||||||
/// Copy a return value into a destination register, after the call returns.
|
/// Copy a return value into a destination register, after the call returns.
|
||||||
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
|
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
|
||||||
|
|||||||
Reference in New Issue
Block a user