CL/aarch64 back end: implement the wasm SIMD bitmask instructions

The `bitmask.{8x16,16x8,32x4}` instructions do not map neatly to any single AArch64 SIMD instruction, and instead need a sequence of around ten instructions. Because of this, this patch is somewhat longer and more complex than it would be for (eg) x64. Main changes are: * the relevant testsuite test (`simd_boolean.wast`) has been enabled on aarch64. * at the CLIF level, add a new instruction `vhigh_bits`, into which these wasm instructions are to be translated. * in the wasm->CLIF translation (code_translator.rs), translate into `vhigh_bits`. This is straightforward. * in the CLIF->AArch64 translation (lower_inst.rs), translate `vhigh_bits` into equivalent sequences of AArch64 instructions. There is a different sequence for each of the `{8x16, 16x8, 32x4}` variants. All other changes are AArch64-specific, and add instruction definitions needed by the previous step: * Add two new families of AArch64 instructions: `VecShiftImm` (vector shift by immediate) and `VecExtract` (effectively a double-length vector shift) * To the existing AArch64 family `VecRRR`, add a `zip1` variant. To the `VecLanesOp` family add an `addv` variant. * Add supporting code for the above changes to AArch64 instructions: - getting the register uses (`aarch64_get_regs`) - mapping the registers (`aarch64_map_regs`) - printing instructions - emitting instructions (`impl MachInstEmit for Inst`). The handling of `VecShiftImm` is a bit complex. - emission tests for new instructions and variants.
2020-10-22 16:02:46 +02:00
parent b10e027fef
commit 2702942050
8 changed files with 570 additions and 5 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1441,9 +1441,67 @@ impl MachInstEmit for Inst {
                };
                let (u, opcode) = match op {
                    VecLanesOp::Uminv => (0b1, 0b11010),
+                    VecLanesOp::Addv => (0b0, 0b11011),
                };
                sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
            }
+            &Inst::VecShiftImm {
+                op,
+                rd,
+                rn,
+                size,
+                imm,
+            } => {
+                let (is_shr, template) = match op {
+                    VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
+                    VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
+                    VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
+                };
+                let imm = imm as u32;
+                // Deal with the somewhat strange encoding scheme for, and limits on,
+                // the shift amount.
+                let immh_immb = match (size, is_shr) {
+                    (VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
+                        0b_1000_000_u32 | (64 - imm)
+                    }
+                    (VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
+                        0b_0100_000_u32 | (32 - imm)
+                    }
+                    (VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
+                        0b_0010_000_u32 | (16 - imm)
+                    }
+                    (VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
+                        0b_0001_000_u32 | (8 - imm)
+                    }
+                    (VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
+                    (VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
+                    (VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
+                    (VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
+                    _ => panic!(
+                        "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
+                        op, size, imm
+                    ),
+                };
+                let rn_enc = machreg_to_vec(rn);
+                let rd_enc = machreg_to_vec(rd.to_reg());
+                sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
+            }
+            &Inst::VecExtract { rd, rn, rm, imm4 } => {
+                if imm4 < 16 {
+                    let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
+                    let rm_enc = machreg_to_vec(rm);
+                    let rn_enc = machreg_to_vec(rn);
+                    let rd_enc = machreg_to_vec(rd.to_reg());
+                    sink.put4(
+                        template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
+                    );
+                } else {
+                    panic!(
+                        "aarch64: Inst::VecExtract: emit: invalid extract index {}",
+                        imm4
+                    );
+                }
+            }
            &Inst::VecTbl {
                rd,
                rn,
@@ -1827,6 +1885,7 @@ impl MachInstEmit for Inst {
                        debug_assert!(!size.is_128bits());
                        (0b001_01110_00_1 | enc_size << 1, 0b100000)
                    }
+                    VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
                };
                let top11 = if is_float {
                    top11 | enc_float_size << 1