CL/aarch64 back end: implement the wasm SIMD bitmask instructions

The `bitmask.{8x16,16x8,32x4}` instructions do not map neatly to any single AArch64 SIMD instruction, and instead need a sequence of around ten instructions. Because of this, this patch is somewhat longer and more complex than it would be for (eg) x64. Main changes are: * the relevant testsuite test (`simd_boolean.wast`) has been enabled on aarch64. * at the CLIF level, add a new instruction `vhigh_bits`, into which these wasm instructions are to be translated. * in the wasm->CLIF translation (code_translator.rs), translate into `vhigh_bits`. This is straightforward. * in the CLIF->AArch64 translation (lower_inst.rs), translate `vhigh_bits` into equivalent sequences of AArch64 instructions. There is a different sequence for each of the `{8x16, 16x8, 32x4}` variants. All other changes are AArch64-specific, and add instruction definitions needed by the previous step: * Add two new families of AArch64 instructions: `VecShiftImm` (vector shift by immediate) and `VecExtract` (effectively a double-length vector shift) * To the existing AArch64 family `VecRRR`, add a `zip1` variant. To the `VecLanesOp` family add an `addv` variant. * Add supporting code for the above changes to AArch64 instructions: - getting the register uses (`aarch64_get_regs`) - mapping the registers (`aarch64_map_regs`) - printing instructions - emitting instructions (`impl MachInstEmit for Inst`). The handling of `VecShiftImm` is a bit complex. - emission tests for new instructions and variants.
2020-10-22 16:02:46 +02:00
parent b10e027fef
commit 2702942050
8 changed files with 570 additions and 5 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -287,6 +287,8 @@ pub enum VecALUOp {
    Addp,
    /// Unsigned multiply add long
    Umlal,
+    /// Zip vectors (primary) [meaning, high halves]
+    Zip1,
 }

 /// A Vector miscellaneous operation with two registers.
@@ -332,10 +334,23 @@ pub enum VecMiscNarrowOp {
 /// An operation across the lanes of vectors.
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
 pub enum VecLanesOp {
+    /// Integer addition across a vector
+    Addv,
    /// Unsigned minimum across a vector
    Uminv,
 }

+/// A shift-by-immediate operation on each lane of a vector.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecShiftImmOp {
+    // Unsigned shift left
+    Shl,
+    // Unsigned shift right
+    Ushr,
+    // Signed shift right
+    Sshr,
+}
+
 /// An operation on the bits of a register. This can be paired with several instruction formats
 /// below (see `Inst`) in any combination.
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
@@ -949,6 +964,28 @@ pub enum Inst {
        size: VectorSize,
    },

+    /// Vector shift by immediate: Shift Left (immediate), Unsigned Shift Right (immediate),
+    /// Signed Shift Right (immediate).  These are somewhat unusual in that, for right shifts,
+    /// the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive.  A zero
+    /// right-shift cannot be encoded.  Left shifts are "normal", though, having valid `imm`
+    /// values from 0 to lane-size-in-bits - 1 inclusive.
+    VecShiftImm {
+        op: VecShiftImmOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        size: VectorSize,
+        imm: u8,
+    },
+
+    /// Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
+    /// of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
+    VecExtract {
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        imm4: u8,
+    },
+
    /// Table vector lookup - single register table. The table consists of 8-bit elements and is
    /// stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether
    /// to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination
@@ -1577,6 +1614,15 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_def(rd);
            collector.add_use(rn);
        }
+        &Inst::VecShiftImm { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::VecExtract { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
        &Inst::VecTbl {
            rd,
            rn,
@@ -2157,6 +2203,24 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_def(mapper, rd);
            map_use(mapper, rn);
        }
+        &mut Inst::VecShiftImm {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::VecExtract {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
        &mut Inst::VecTbl {
            ref mut rd,
            ref mut rn,
@@ -3330,6 +3394,7 @@ impl Inst {
                    VecALUOp::Fmul => ("fmul", size),
                    VecALUOp::Addp => ("addp", size),
                    VecALUOp::Umlal => ("umlal", size),
+                    VecALUOp::Zip1 => ("zip1", size),
                };
                let rd_size = if alu_op == VecALUOp::Umlal {
                    size.widen()
@@ -3381,11 +3446,28 @@ impl Inst {
            &Inst::VecLanes { op, rd, rn, size } => {
                let op = match op {
                    VecLanesOp::Uminv => "uminv",
+                    VecLanesOp::Addv => "addv",
                };
                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
                let rn = show_vreg_vector(rn, mb_rru, size);
                format!("{} {}, {}", op, rd, rn)
            }
+            &Inst::VecShiftImm { op, rd, rn, size, imm } => {
+                let op = match op {
+                    VecShiftImmOp::Shl => "shl",
+                    VecShiftImmOp::Ushr => "ushr",
+                    VecShiftImmOp::Sshr => "sshr",
+                };
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+                let rn = show_vreg_vector(rn, mb_rru, size);
+                format!("{} {}, {}, #{}", op, rd, rn, imm)
+            }
+            &Inst::VecExtract { rd, rn, rm, imm4 } => {
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+                let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+                format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4)
+            }
            &Inst::VecTbl {
                rd,
                rn,