From d3b525fa29ae55da9b78111e1c87af93dfd3427f Mon Sep 17 00:00:00 2001
From: Afonso Bordado <afonsobordado@az8.co>
Date: Fri, 14 May 2021 18:12:13 +0100
Subject: [PATCH] aarch64: Implement iadd for i128 operands

---
 .../codegen/src/isa/aarch64/inst/emit.rs      |   2 +
 .../src/isa/aarch64/inst/emit_tests.rs        |  20 +++
 cranelift/codegen/src/isa/aarch64/inst/mod.rs |  55 +++++----
 .../codegen/src/isa/aarch64/lower_inst.rs     | 115 +++++++++++-------
 .../filetests/isa/aarch64/arithmetic-run.clif |  39 ++++++
 .../filetests/isa/aarch64/arithmetic.clif     |  15 +++
 6 files changed, 179 insertions(+), 67 deletions(-)
 create mode 100644 cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
index 6621e3f409..89c4f88b4b 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -597,6 +597,8 @@ impl MachInstEmit for Inst {
                 let top11 = match alu_op {
                     ALUOp::Add32 => 0b00001011_000,
                     ALUOp::Add64 => 0b10001011_000,
+                    ALUOp::Adc32 => 0b00011010_000,
+                    ALUOp::Adc64 => 0b10011010_000,
                     ALUOp::Sub32 => 0b01001011_000,
                     ALUOp::Sub64 => 0b11001011_000,
                     ALUOp::Orr32 => 0b00101010_000,
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
index 9f628fced6..e568829955 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -50,6 +50,26 @@ fn test_aarch64_binemit() {
         "A400068B",
         "add x4, x5, x6",
     ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Adc32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "4100031A",
+        "adc w1, w2, w3",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::Adc64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A400069A",
+        "adc x4, x5, x6",
+    ));
     insns.push((
         Inst::AluRRR {
             alu_op: ALUOp::Sub32,
diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
index 35903c18d0..15073a8247 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -84,6 +84,9 @@ pub enum ALUOp {
     Asr64,
     Lsl32,
     Lsl64,
+    /// Add with carry
+    Adc32,
+    Adc64,
 }
 
 /// An ALU operation with three arguments.
@@ -1365,6 +1368,23 @@ impl Inst {
         }
     }
 
+    /// Create instructions that load a 128-bit constant.
+    pub fn load_constant128(to_regs: ValueRegs<Writable<Reg>>, value: u128) -> SmallVec<[Inst; 4]> {
+        assert_eq!(to_regs.len(), 2, "Expected to load i128 into two registers");
+
+        let lower = value as u64;
+        let upper = (value >> 64) as u64;
+
+        let lower_reg = to_regs.regs()[0];
+        let upper_reg = to_regs.regs()[1];
+
+        let mut load_ins = Inst::load_constant(lower_reg, lower);
+        let load_upper = Inst::load_constant(upper_reg, upper);
+
+        load_ins.extend(load_upper.into_iter());
+        load_ins
+    }
+
     /// Create instructions that load a 32-bit floating-point constant.
     pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
         rd: Writable<Reg>,
@@ -3033,30 +3053,15 @@ impl MachInst for Inst {
         ty: Type,
         alloc_tmp: F,
     ) -> SmallVec<[Inst; 4]> {
-        let to_reg = to_regs
-            .only_reg()
-            .expect("multi-reg values not supported yet");
-        let value = value as u64;
-        if ty == F64 {
-            Inst::load_fp_constant64(to_reg, value, alloc_tmp)
-        } else if ty == F32 {
-            Inst::load_fp_constant32(to_reg, value as u32, alloc_tmp)
-        } else {
-            // Must be an integer type.
-            debug_assert!(
-                ty == B1
-                    || ty == I8
-                    || ty == B8
-                    || ty == I16
-                    || ty == B16
-                    || ty == I32
-                    || ty == B32
-                    || ty == I64
-                    || ty == B64
-                    || ty == R32
-                    || ty == R64
-            );
-            Inst::load_constant(to_reg, value)
+        let to_reg = to_regs.only_reg();
+        match ty {
+            F64 => Inst::load_fp_constant64(to_reg.unwrap(), value as u64, alloc_tmp),
+            F32 => Inst::load_fp_constant32(to_reg.unwrap(), value as u32, alloc_tmp),
+            B1 | B8 | B16 | B32 | B64 | I8 | I16 | I32 | I64 | R32 | R64 => {
+                Inst::load_constant(to_reg.unwrap(), value as u64)
+            }
+            I128 => Inst::load_constant128(to_regs, value),
+            _ => panic!("Cannot generate constant for type: {}", ty),
         }
     }
 
@@ -3202,6 +3207,8 @@ impl Inst {
                 ALUOp::Asr64 => ("asr", OperandSize::Size64),
                 ALUOp::Lsl32 => ("lsl", OperandSize::Size32),
                 ALUOp::Lsl64 => ("lsl", OperandSize::Size64),
+                ALUOp::Adc32 => ("adc", OperandSize::Size32),
+                ALUOp::Adc64 => ("adc", OperandSize::Size64),
             }
         }
 
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index fe56d1358d..d6b8933fc4 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -64,59 +64,88 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             lower_constant_f64(ctx, rd, value);
         }
         Opcode::Iadd => {
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let ty = ty.unwrap();
-            if !ty.is_vector() {
-                let mul_insn =
-                    if let Some(mul_insn) = maybe_input_insn(ctx, inputs[1], Opcode::Imul) {
+            match ty.unwrap() {
+                ty if ty.is_vector() => {
+                    let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+                    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+                    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                    ctx.emit(Inst::VecRRR {
+                        rd,
+                        rn,
+                        rm,
+                        alu_op: VecALUOp::Add,
+                        size: VectorSize::from_ty(ty),
+                    });
+                }
+                I128 => {
+                    let lhs = put_input_in_regs(ctx, inputs[0]);
+                    let rhs = put_input_in_regs(ctx, inputs[1]);
+                    let dst = get_output_reg(ctx, outputs[0]);
+                    assert_eq!(lhs.len(), 2);
+                    assert_eq!(rhs.len(), 2);
+                    assert_eq!(dst.len(), 2);
+
+                    // adds    x0, x0, x1
+                    // adc     x1, x1, x3
+
+                    // Add lower
+                    ctx.emit(Inst::AluRRR {
+                        alu_op: ALUOp::AddS64,
+                        rd: dst.regs()[0],
+                        rn: lhs.regs()[0],
+                        rm: rhs.regs()[0],
+                    });
+                    ctx.emit(Inst::AluRRR {
+                        alu_op: ALUOp::Adc64,
+                        rd: dst.regs()[1],
+                        rn: lhs.regs()[1],
+                        rm: rhs.regs()[1],
+                    });
+                }
+                ty => {
+                    let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+                    let mul_insn = if let Some(mul_insn) =
+                        maybe_input_insn(ctx, inputs[1], Opcode::Imul)
+                    {
                         Some((mul_insn, 0))
                     } else if let Some(mul_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Imul) {
                         Some((mul_insn, 1))
                     } else {
                         None
                     };
-                // If possible combine mul + add into madd.
-                if let Some((insn, addend_idx)) = mul_insn {
-                    let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
-                    let rn_input = InsnInput { insn, input: 0 };
-                    let rm_input = InsnInput { insn, input: 1 };
+                    // If possible combine mul + add into madd.
+                    if let Some((insn, addend_idx)) = mul_insn {
+                        let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
+                        let rn_input = InsnInput { insn, input: 0 };
+                        let rm_input = InsnInput { insn, input: 1 };
 
-                    let rn = put_input_in_reg(ctx, rn_input, NarrowValueMode::None);
-                    let rm = put_input_in_reg(ctx, rm_input, NarrowValueMode::None);
-                    let ra = put_input_in_reg(ctx, inputs[addend_idx], NarrowValueMode::None);
+                        let rn = put_input_in_reg(ctx, rn_input, NarrowValueMode::None);
+                        let rm = put_input_in_reg(ctx, rm_input, NarrowValueMode::None);
+                        let ra = put_input_in_reg(ctx, inputs[addend_idx], NarrowValueMode::None);
 
-                    ctx.emit(Inst::AluRRRR {
-                        alu_op,
-                        rd,
-                        rn,
-                        rm,
-                        ra,
-                    });
-                } else {
-                    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                    let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
-                        ctx,
-                        inputs[1],
-                        ty_bits(ty),
-                        NarrowValueMode::None,
-                    );
-                    let alu_op = if !negated {
-                        choose_32_64(ty, ALUOp::Add32, ALUOp::Add64)
+                        ctx.emit(Inst::AluRRRR {
+                            alu_op,
+                            rd,
+                            rn,
+                            rm,
+                            ra,
+                        });
                     } else {
-                        choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64)
-                    };
-                    ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+                        let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                        let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
+                            ctx,
+                            inputs[1],
+                            ty_bits(ty),
+                            NarrowValueMode::None,
+                        );
+                        let alu_op = if !negated {
+                            choose_32_64(ty, ALUOp::Add32, ALUOp::Add64)
+                        } else {
+                            choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64)
+                        };
+                        ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+                    }
                 }
-            } else {
-                let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                ctx.emit(Inst::VecRRR {
-                    rd,
-                    rn,
-                    rm,
-                    alu_op: VecALUOp::Add,
-                    size: VectorSize::from_ty(ty),
-                });
             }
         }
         Opcode::Isub => {
diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif
new file mode 100644
index 0000000000..01bfdb115a
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic-run.clif
@@ -0,0 +1,39 @@
+test run
+target aarch64
+
+; i128 tests
+; TODO: It would be nice if we had native support for i128 immediates in CLIF's parser
+function %i128_const_0() -> i64, i64 {
+block0:
+    v1 = iconst.i128 0
+    v2, v3 = isplit v1
+    return v2, v3
+}
+; run: %i128_const_0() == [0, 0]
+
+; TODO: Blocked by https://github.com/bytecodealliance/wasmtime/issues/2906
+;function %i128_const_neg_1() -> i64, i64 {
+;block0:
+;    v1 = iconst.i128 -1
+;    v2, v3 = isplit v1
+;    return v2, v3
+;}
+; r-un: %i128_const_neg_1() == [0xffffffff_ffffffff, 0xffffffff_ffffffff]
+
+
+function %add_i128(i64, i64, i64, i64) -> i64, i64 {
+block0(v0: i64,v1: i64,v2: i64,v3: i64):
+    v4 = iconcat v0, v1
+    v5 = iconcat v2, v3
+
+    v6 = iadd v4, v5
+
+    v7, v8 = isplit v6
+    return v7, v8
+}
+; run: %add_i128(0, 0, 0, 0) == [0, 0]
+; run: %add_i128(0, -1, -1, 0) == [-1, -1]
+; run: %add_i128(1, 0, 0, 0) == [1, 0]
+; run: %add_i128(1, 0, 1, 0) == [2, 0]
+; run: %add_i128(1, 0, -1, -1) == [0, 0]
+; run: %add_i128(-1, 0, 1, 0) == [0, 1]
\ No newline at end of file
diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
index 5fc88c97c7..91d4987d89 100644
--- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
@@ -425,3 +425,18 @@ block0(v0: i8x16):
 ; nextln: ushl v0.16b, v0.16b, v1.16b
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
+
+
+function %add_i128(i128, i128) -> i128 {
+block0(v0: i128, v1: i128):
+    v2 = iadd v0, v1
+    return v2
+}
+
+; check:  stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: adds x0, x0, x2
+; nextln: adc x1, x1, x3
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+