Merge pull request #2016 from jgouly/saturating-math

arm64: Implement saturating SIMD arithmetic
2020-07-14 11:24:10 -07:00
parent fad2affad0 aa84a4173c
commit 4ba3ee3368
5 changed files with 260 additions and 64 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1338,18 +1338,22 @@ impl MachInstEmit for Inst {
                        debug_assert_eq!(I64, ty);
                        (0b010_11110_11_1, 0b000011)
                    }
+                    VecALUOp::Sqadd => (0b010_01110_00_1 | enc_size << 1, 0b000011),
                    VecALUOp::SQSubScalar => {
                        debug_assert_eq!(I64, ty);
                        (0b010_11110_11_1, 0b001011)
                    }
+                    VecALUOp::Sqsub => (0b010_01110_00_1 | enc_size << 1, 0b001011),
                    VecALUOp::UQAddScalar => {
                        debug_assert_eq!(I64, ty);
                        (0b011_11110_11_1, 0b000011)
                    }
+                    VecALUOp::Uqadd => (0b011_01110_00_1 | enc_size << 1, 0b000011),
                    VecALUOp::UQSubScalar => {
                        debug_assert_eq!(I64, ty);
                        (0b011_11110_11_1, 0b001011)
                    }
+                    VecALUOp::Uqsub => (0b011_01110_00_1 | enc_size << 1, 0b001011),
                    VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011),
                    VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111),
                    VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -2049,6 +2049,198 @@ fn test_aarch64_binemit() {
        "sqsub d21, d22, d23",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqadd,
+            rd: writable_vreg(1),
+            rn: vreg(2),
+            rm: vreg(8),
+            ty: I8X16,
+        },
+        "410C284E",
+        "sqadd v1.16b, v2.16b, v8.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqadd,
+            rd: writable_vreg(1),
+            rn: vreg(12),
+            rm: vreg(28),
+            ty: I16X8,
+        },
+        "810D7C4E",
+        "sqadd v1.8h, v12.8h, v28.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqadd,
+            rd: writable_vreg(12),
+            rn: vreg(2),
+            rm: vreg(6),
+            ty: I32X4,
+        },
+        "4C0CA64E",
+        "sqadd v12.4s, v2.4s, v6.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqadd,
+            rd: writable_vreg(20),
+            rn: vreg(7),
+            rm: vreg(13),
+            ty: I64X2,
+        },
+        "F40CED4E",
+        "sqadd v20.2d, v7.2d, v13.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqsub,
+            rd: writable_vreg(1),
+            rn: vreg(2),
+            rm: vreg(8),
+            ty: I8X16,
+        },
+        "412C284E",
+        "sqsub v1.16b, v2.16b, v8.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqsub,
+            rd: writable_vreg(1),
+            rn: vreg(12),
+            rm: vreg(28),
+            ty: I16X8,
+        },
+        "812D7C4E",
+        "sqsub v1.8h, v12.8h, v28.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqsub,
+            rd: writable_vreg(12),
+            rn: vreg(2),
+            rm: vreg(6),
+            ty: I32X4,
+        },
+        "4C2CA64E",
+        "sqsub v12.4s, v2.4s, v6.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sqsub,
+            rd: writable_vreg(20),
+            rn: vreg(7),
+            rm: vreg(13),
+            ty: I64X2,
+        },
+        "F42CED4E",
+        "sqsub v20.2d, v7.2d, v13.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqadd,
+            rd: writable_vreg(1),
+            rn: vreg(2),
+            rm: vreg(8),
+            ty: I8X16,
+        },
+        "410C286E",
+        "uqadd v1.16b, v2.16b, v8.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqadd,
+            rd: writable_vreg(1),
+            rn: vreg(12),
+            rm: vreg(28),
+            ty: I16X8,
+        },
+        "810D7C6E",
+        "uqadd v1.8h, v12.8h, v28.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqadd,
+            rd: writable_vreg(12),
+            rn: vreg(2),
+            rm: vreg(6),
+            ty: I32X4,
+        },
+        "4C0CA66E",
+        "uqadd v12.4s, v2.4s, v6.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqadd,
+            rd: writable_vreg(20),
+            rn: vreg(7),
+            rm: vreg(13),
+            ty: I64X2,
+        },
+        "F40CED6E",
+        "uqadd v20.2d, v7.2d, v13.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqsub,
+            rd: writable_vreg(1),
+            rn: vreg(2),
+            rm: vreg(8),
+            ty: I8X16,
+        },
+        "412C286E",
+        "uqsub v1.16b, v2.16b, v8.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqsub,
+            rd: writable_vreg(1),
+            rn: vreg(12),
+            rm: vreg(28),
+            ty: I16X8,
+        },
+        "812D7C6E",
+        "uqsub v1.8h, v12.8h, v28.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqsub,
+            rd: writable_vreg(12),
+            rn: vreg(2),
+            rm: vreg(6),
+            ty: I32X4,
+        },
+        "4C2CA66E",
+        "uqsub v12.4s, v2.4s, v6.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Uqsub,
+            rd: writable_vreg(20),
+            rn: vreg(7),
+            rm: vreg(13),
+            ty: I64X2,
+        },
+        "F42CED6E",
+        "uqsub v20.2d, v7.2d, v13.2d",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Cmeq,
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -209,12 +209,16 @@ pub enum VecExtendOp {
 pub enum VecALUOp {
    /// Signed saturating add
    SQAddScalar,
+    Sqadd,
    /// Unsigned saturating add
    UQAddScalar,
+    Uqadd,
    /// Signed saturating subtract
    SQSubScalar,
+    Sqsub,
    /// Unsigned saturating subtract
    UQSubScalar,
+    Uqsub,
    /// Compare bitwise equal
    Cmeq,
    /// Compare signed greater than or equal
@@ -2755,9 +2759,13 @@ impl Inst {
            } => {
                let (op, vector, ty) = match alu_op {
                    VecALUOp::SQAddScalar => ("sqadd", false, ty),
+                    VecALUOp::Sqadd => ("sqadd", true, ty),
                    VecALUOp::UQAddScalar => ("uqadd", false, ty),
+                    VecALUOp::Uqadd => ("uqadd", true, ty),
                    VecALUOp::SQSubScalar => ("sqsub", false, ty),
+                    VecALUOp::Sqsub => ("sqsub", true, ty),
                    VecALUOp::UQSubScalar => ("uqsub", false, ty),
+                    VecALUOp::Uqsub => ("uqsub", true, ty),
                    VecALUOp::Cmeq => ("cmeq", true, ty),
                    VecALUOp::Cmge => ("cmge", true, ty),
                    VecALUOp::Cmgt => ("cmgt", true, ty),