aarch64: Fix lowering amounts for shifts

This commit addresses two issues: * A panic when shifting any non i128 type by i128 amounts (#3064) * Wrong results when lowering shifts with small types (i8, i16) In these types when shifting for amounts larger than the size of the type, we would not get the wrapping behaviour that we see on i32 and i64. This is because in these larger types, the wrapping behaviour is automatically implemented by using the appropriate instruction, however we do not have i8 and i16 specific instructions, so we have to manually wrap the shift amount with an AND instruction. This issue is also found on x86_64 and s390x, and a separate issue will be filed for those. Closes #3064
2021-07-09 14:40:15 +01:00
parent 6c3d7092b9
commit db5566dadb
8 changed files with 1199 additions and 165 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -107,6 +107,15 @@ pub(crate) enum ResultRegImmShift {
    ImmShift(ImmShift),
 }

+impl ResultRegImmShift {
+    pub fn unwrap_reg(self) -> Reg {
+        match self {
+            ResultRegImmShift::Reg(r) => r,
+            _ => panic!("Unwrapped ResultRegImmShift, expected reg, got: {:?}", self),
+        }
+    }
+}
+
 //============================================================================
 // Lowering: convert instruction inputs to forms that we can use.

@@ -1468,6 +1477,43 @@ pub(crate) fn materialize_bool_result<C: LowerCtx<I = Inst>>(
    }
 }

+pub(crate) fn lower_shift_amt<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    amt_input: InsnInput,
+    dst_ty: Type,
+    tmp_reg: Writable<Reg>,
+) -> ResultRegImmShift {
+    let amt_ty = ctx.input_ty(amt_input.insn, amt_input.input);
+
+    match (dst_ty, amt_ty) {
+        // When shifting for amounts larger than the size of the type, the CLIF shift
+        // instructions implement a "wrapping" behaviour, such that an i8 << 8 is
+        // equivalent to i8 << 0
+        //
+        // On i32 and i64 types this matches what the aarch64 spec does, but on smaller
+        // types (i16, i8) we need to do this manually, so we wrap the shift amount
+        // with an AND instruction
+        (I16 | I8, _) => {
+            // We can ignore the top half of the shift amount register if the type is I128
+            let amt_reg = put_input_in_regs(ctx, amt_input).regs()[0];
+            let mask = (ty_bits(dst_ty) - 1) as u64;
+            ctx.emit(Inst::AluRRImmLogic {
+                alu_op: ALUOp::And32,
+                rd: tmp_reg,
+                rn: amt_reg,
+                imml: ImmLogic::maybe_from_u64(mask, I32).unwrap(),
+            });
+            ResultRegImmShift::Reg(tmp_reg.to_reg())
+        }
+        // TODO: We can use immlogic for i128 types here
+        (I128, _) | (_, I128) => {
+            // For I128 shifts, we need a register without immlogic
+            ResultRegImmShift::Reg(put_input_in_regs(ctx, amt_input).regs()[0])
+        }
+        _ => put_input_in_reg_immshift(ctx, amt_input, ty_bits(dst_ty)),
+    }
+}
+
 /// This is target-word-size dependent.  And it excludes booleans and reftypes.
 pub(crate) fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
    match ty {
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -801,10 +801,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let out_regs = get_output_reg(ctx, outputs[0]);
            let ty = ty.unwrap();
            if ty == I128 {
-                // TODO: We can use immlogic here
                let src = put_input_in_regs(ctx, inputs[0]);
-                // We can ignore the top half of the shift amount register
-                let amt = put_input_in_regs(ctx, inputs[1]).regs()[0];
+                let amt = lower_shift_amt(ctx, inputs[1], ty, out_regs.regs()[0]).unwrap_reg();

                match op {
                    Opcode::Ishl => emit_shl_i128(ctx, src, out_regs, amt),
@@ -828,7 +826,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    _ => unreachable!(),
                };
                let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
-                let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));
+                let rm = lower_shift_amt(ctx, inputs[1], ty, out_regs.regs()[0]);
                let alu_op = match op {
                    Opcode::Ishl => choose_32_64(ty, ALUOp::Lsl32, ALUOp::Lsl64),
                    Opcode::Ushr => choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64),