From aac3751025e63a2ea978df97eac81f3fe73563eb Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 5 Jan 2021 15:48:07 -0800 Subject: [PATCH] aarch64: fix reg/imm `sub` insts that read `SP`, not the zero register. On AArch64, the zero register (xzr) and the stack pointer (xsp) are alternately named by the same index `31` in machine code depending on context. In particular, in the reg-reg-immediate ALU instruction form, add/subtract will use the stack pointer, not the zero register, if index 31 is given for the first (register) source arg. In a few places, we were emitting subtract instructions with the zero register as an argument and a reg/immediate as the second argument. When an immediate could be incorporated directly (we have the `iconst` definition visible), this would result in incorrect code being generated. This issue was found in `ineg` and in the sequence for vector right-shifts. Reported by Ian Cullinan; thanks! --- .../codegen/src/isa/aarch64/lower_inst.rs | 13 +++++--- .../filetests/isa/aarch64/arithmetic.clif | 32 +++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 89bcd517f4..504fcba438 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -215,9 +215,9 @@ pub(crate) fn lower_insn_to_regs>( let ty = ty.unwrap(); if !ty.is_vector() { let rn = zero_reg(); - let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); - ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); } else { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); ctx.emit(Inst::VecMisc { @@ -693,9 +693,14 @@ pub(crate) fn lower_insn_to_regs>( let rm = if is_right_shift { // Right shifts are implemented with a negative left shift. let tmp = ctx.alloc_tmp(RegClass::I64, I32); - let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rn = zero_reg(); - ctx.emit(alu_inst_imm12(ALUOp::Sub32, tmp, rn, rm)); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Sub32, + rd: tmp, + rn, + rm, + }); tmp.to_reg() } else { put_input_in_reg(ctx, inputs[1], NarrowValueMode::None) diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif index aa52cb7436..2ffc63ce71 100644 --- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif @@ -422,3 +422,35 @@ block0(v0: i64): ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret + +function %f29(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 1 + v2 = ineg v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: movz x0, #1 +; nextln: sub x0, xzr, x0 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f30(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i64 1 + v2 = ushr.i8x16 v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: movz x0, #1 +; nextln: sub w0, wzr, w0 +; nextln: dup v1.16b, w0 +; nextln: ushl v0.16b, v0.16b, v1.16b +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret