CL/aarch64 back end: implement the wasm SIMD bitmask instructions

The `bitmask.{8x16,16x8,32x4}` instructions do not map neatly to any single
AArch64 SIMD instruction, and instead need a sequence of around ten
instructions.  Because of this, this patch is somewhat longer and more complex
than it would be for (eg) x64.

Main changes are:

* the relevant testsuite test (`simd_boolean.wast`) has been enabled on aarch64.

* at the CLIF level, add a new instruction `vhigh_bits`, into which these wasm
  instructions are to be translated.

* in the wasm->CLIF translation (code_translator.rs), translate into
  `vhigh_bits`.  This is straightforward.

* in the CLIF->AArch64 translation (lower_inst.rs), translate `vhigh_bits`
  into equivalent sequences of AArch64 instructions.  There is a different
  sequence for each of the `{8x16, 16x8, 32x4}` variants.

All other changes are AArch64-specific, and add instruction definitions needed
by the previous step:

* Add two new families of AArch64 instructions: `VecShiftImm` (vector shift by
  immediate) and `VecExtract` (effectively a double-length vector shift)

* To the existing AArch64 family `VecRRR`, add a `zip1` variant.  To the
  `VecLanesOp` family add an `addv` variant.

* Add supporting code for the above changes to AArch64 instructions:
  - getting the register uses (`aarch64_get_regs`)
  - mapping the registers (`aarch64_map_regs`)
  - printing instructions
  - emitting instructions (`impl MachInstEmit for Inst`).  The handling of
    `VecShiftImm` is a bit complex.
  - emission tests for new instructions and variants.
This commit is contained in:
Julian Seward
2020-10-22 16:02:46 +02:00
committed by julian-seward1
parent b10e027fef
commit 2702942050
8 changed files with 570 additions and 5 deletions

View File

@@ -1441,9 +1441,67 @@ impl MachInstEmit for Inst {
};
let (u, opcode) = match op {
VecLanesOp::Uminv => (0b1, 0b11010),
VecLanesOp::Addv => (0b0, 0b11011),
};
sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
}
&Inst::VecShiftImm {
op,
rd,
rn,
size,
imm,
} => {
let (is_shr, template) = match op {
VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
};
let imm = imm as u32;
// Deal with the somewhat strange encoding scheme for, and limits on,
// the shift amount.
let immh_immb = match (size, is_shr) {
(VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
0b_1000_000_u32 | (64 - imm)
}
(VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
0b_0100_000_u32 | (32 - imm)
}
(VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
0b_0010_000_u32 | (16 - imm)
}
(VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
0b_0001_000_u32 | (8 - imm)
}
(VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
(VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
(VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
(VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
_ => panic!(
"aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
op, size, imm
),
};
let rn_enc = machreg_to_vec(rn);
let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
}
&Inst::VecExtract { rd, rn, rm, imm4 } => {
if imm4 < 16 {
let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
let rm_enc = machreg_to_vec(rm);
let rn_enc = machreg_to_vec(rn);
let rd_enc = machreg_to_vec(rd.to_reg());
sink.put4(
template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
);
} else {
panic!(
"aarch64: Inst::VecExtract: emit: invalid extract index {}",
imm4
);
}
}
&Inst::VecTbl {
rd,
rn,
@@ -1827,6 +1885,7 @@ impl MachInstEmit for Inst {
debug_assert!(!size.is_128bits());
(0b001_01110_00_1 | enc_size << 1, 0b100000)
}
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
};
let top11 = if is_float {
top11 | enc_float_size << 1