From aac3751025e63a2ea978df97eac81f3fe73563eb Mon Sep 17 00:00:00 2001
From: Chris Fallin <chris@cfallin.org>
Date: Tue, 5 Jan 2021 15:48:07 -0800
Subject: [PATCH] aarch64: fix reg/imm `sub` insts that read `SP`, not the zero
 register.

On AArch64, the zero register (xzr) and the stack pointer (xsp) are
alternately named by the same index `31` in machine code depending on
context. In particular, in the reg-reg-immediate ALU instruction form,
add/subtract will use the stack pointer, not the zero register, if index
31 is given for the first (register) source arg.

In a few places, we were emitting subtract instructions with the zero
register as an argument and a reg/immediate as the second argument. When
an immediate could be incorporated directly (we have the `iconst`
definition visible), this would result in incorrect code being
generated.

This issue was found in `ineg` and in the sequence for vector
right-shifts.

Reported by Ian Cullinan; thanks!
---
 .../codegen/src/isa/aarch64/lower_inst.rs     | 13 +++++---
 .../filetests/isa/aarch64/arithmetic.clif     | 32 +++++++++++++++++++
 2 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index 89bcd517f4..504fcba438 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -215,9 +215,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let ty = ty.unwrap();
             if !ty.is_vector() {
                 let rn = zero_reg();
-                let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
+                let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                 let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
-                ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+                ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
             } else {
                 let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                 ctx.emit(Inst::VecMisc {
@@ -693,9 +693,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 let rm = if is_right_shift {
                     // Right shifts are implemented with a negative left shift.
                     let tmp = ctx.alloc_tmp(RegClass::I64, I32);
-                    let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
+                    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
                     let rn = zero_reg();
-                    ctx.emit(alu_inst_imm12(ALUOp::Sub32, tmp, rn, rm));
+                    ctx.emit(Inst::AluRRR {
+                        alu_op: ALUOp::Sub32,
+                        rd: tmp,
+                        rn,
+                        rm,
+                    });
                     tmp.to_reg()
                 } else {
                     put_input_in_reg(ctx, inputs[1], NarrowValueMode::None)
diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
index aa52cb7436..2ffc63ce71 100644
--- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
@@ -422,3 +422,35 @@ block0(v0: i64):
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
+
+function %f29(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 1
+  v2 = ineg v1
+  return v2
+}
+
+; check:  stp fp, lr, [sp, #-16]!
+; nextln:  mov fp, sp
+; nextln:  movz x0, #1
+; nextln:  sub x0, xzr, x0
+; nextln:  mov sp, fp
+; nextln:  ldp fp, lr, [sp], #16
+; nextln:  ret
+
+function %f30(i8x16) -> i8x16 {
+block0(v0: i8x16):
+  v1 = iconst.i64 1
+  v2 = ushr.i8x16 v0, v1
+  return v2
+}
+
+; check:  stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: movz x0, #1
+; nextln: sub w0, wzr, w0
+; nextln: dup v1.16b, w0
+; nextln: ushl v0.16b, v0.16b, v1.16b
+; nextln: mov sp, fp
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret