Cranelift AArch64: Improve the Popcnt implementation

2021-01-11 18:23:03 +00:00
parent c7de8f5efb
commit 043a8434d2
6 changed files with 106 additions and 182 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst/args.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs
@@ -601,6 +601,14 @@ impl ScalarSize {
        }
    }

+    /// Convert from an integer operand size.
+    pub fn from_operand_size(size: OperandSize) -> ScalarSize {
+        match size {
+            OperandSize::Size32 => ScalarSize::Size32,
+            OperandSize::Size64 => ScalarSize::Size64,
+        }
+    }
+
    /// Convert from a type into the smallest size that fits.
    pub fn from_ty(ty: Type) -> ScalarSize {
        Self::from_bits(ty_bits(ty))
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1463,12 +1463,18 @@ impl MachInstEmit for Inst {
                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
                        (0b0, 0b11000, enc_size | 0b10)
                    }
+                    VecMisc2::Cnt => {
+                        debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
+                        (0b0, 0b00101, enc_size)
+                    }
                };
                sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
            }
            &Inst::VecLanes { op, rd, rn, size } => {
                let (q, size) = match size {
+                    VectorSize::Size8x8 => (0b0, 0b00),
                    VectorSize::Size8x16 => (0b1, 0b00),
+                    VectorSize::Size16x4 => (0b0, 0b01),
                    VectorSize::Size16x8 => (0b1, 0b01),
                    VectorSize::Size32x4 => (0b1, 0b10),
                    _ => unreachable!(),
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -3792,6 +3792,28 @@ fn test_aarch64_binemit() {
        "frintp v12.2d, v17.2d",
    ));

+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Cnt,
+            rd: writable_vreg(23),
+            rn: vreg(5),
+            size: VectorSize::Size8x8,
+        },
+        "B758200E",
+        "cnt v23.8b, v5.8b",
+    ));
+
+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Uminv,
+            rd: writable_vreg(0),
+            rn: vreg(31),
+            size: VectorSize::Size8x8,
+        },
+        "E0AB312E",
+        "uminv b0, v31.8b",
+    ));
+
    insns.push((
        Inst::VecLanes {
            op: VecLanesOp::Uminv,
@@ -3836,6 +3858,17 @@ fn test_aarch64_binemit() {
        "addv b2, v29.16b",
    ));

+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Addv,
+            rd: writable_vreg(15),
+            rn: vreg(7),
+            size: VectorSize::Size16x4,
+        },
+        "EFB8710E",
+        "addv h15, v7.4h",
+    ));
+
    insns.push((
        Inst::VecLanes {
            op: VecLanesOp::Addv,
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -331,6 +331,8 @@ pub enum VecMisc2 {
    Frintm,
    /// Floating point round to integral, rounding towards plus infinity
    Frintp,
+    /// Population count per byte
+    Cnt,
 }

 /// A Vector narrowing operation with two registers.
@@ -3752,6 +3754,7 @@ impl Inst {
                    VecMisc2::Frintz => ("frintz", size),
                    VecMisc2::Frintm => ("frintm", size),
                    VecMisc2::Frintp => ("frintp", size),
+                    VecMisc2::Cnt => ("cnt", size),
                };

                let rd_size = if is_shll { size.widen() } else { size };