Merge pull request #2548 from cfallin/fix-aarch64-sp

aarch64: fix reg/imm `sub` insts that read `SP`, not the zero register.
This commit is contained in:
Nick Fitzgerald
2021-01-05 16:38:25 -08:00
committed by GitHub
2 changed files with 41 additions and 4 deletions

View File

@@ -215,9 +215,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let ty = ty.unwrap(); let ty = ty.unwrap();
if !ty.is_vector() { if !ty.is_vector() {
let rn = zero_reg(); let rn = zero_reg();
let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
} else { } else {
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(Inst::VecMisc { ctx.emit(Inst::VecMisc {
@@ -693,9 +693,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let rm = if is_right_shift { let rm = if is_right_shift {
// Right shifts are implemented with a negative left shift. // Right shifts are implemented with a negative left shift.
let tmp = ctx.alloc_tmp(RegClass::I64, I32); let tmp = ctx.alloc_tmp(RegClass::I64, I32);
let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rn = zero_reg(); let rn = zero_reg();
ctx.emit(alu_inst_imm12(ALUOp::Sub32, tmp, rn, rm)); ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Sub32,
rd: tmp,
rn,
rm,
});
tmp.to_reg() tmp.to_reg()
} else { } else {
put_input_in_reg(ctx, inputs[1], NarrowValueMode::None) put_input_in_reg(ctx, inputs[1], NarrowValueMode::None)

View File

@@ -422,3 +422,35 @@ block0(v0: i64):
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
function %f29(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 1
v2 = ineg v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x0, #1
; nextln: sub x0, xzr, x0
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f30(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i64 1
v2 = ushr.i8x16 v0, v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x0, #1
; nextln: sub w0, wzr, w0
; nextln: dup v1.16b, w0
; nextln: ushl v0.16b, v0.16b, v1.16b
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret