Support wasm select instruction with V128-typed operands on AArch64.

* this requires upgrading to wasmparser 0.67.0. * There are no CLIF side changes because the CLIF `select` instruction is polymorphic enough. * on aarch64, there is unfortunately no conditional-move (csel) instruction on vectors. This patch adds a synthetic instruction `VecCSel` which *does* behave like that. At emit time, this is emitted as an if-then-else diamond (4 insns). * aarch64 implementation is otherwise straightforwards.
2020-11-11 17:09:57 +01:00
parent 9ced345aed
commit 41e87a2f99
15 changed files with 122 additions and 18 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1909,6 +1909,46 @@ impl MachInstEmit for Inst {

                sink.put4(enc_ldst_vec(q, size, rn, rd));
            }
+            &Inst::VecCSel { rd, rn, rm, cond } => {
+                /* Emit this:
+                      b.cond  else
+                      mov     rd, rm
+                      b       out
+                     else:
+                      mov     rd, rn
+                     out:
+
+                   Note, we could do better in the cases where rd == rn or rd == rm.
+                */
+                let else_label = sink.get_label();
+                let out_label = sink.get_label();
+
+                // b.cond else
+                let br_else_offset = sink.cur_offset();
+                sink.put4(enc_conditional_br(
+                    BranchTarget::Label(else_label),
+                    CondBrKind::Cond(cond),
+                ));
+                sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
+
+                // mov rd, rm
+                sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
+
+                // b out
+                let b_out_offset = sink.cur_offset();
+                sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
+                sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
+                sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
+
+                // else:
+                sink.bind_label(else_label);
+
+                // mov rd, rn
+                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
+
+                // out:
+                sink.bind_label(out_label);
+            }
            &Inst::MovToNZCV { rn } => {
                sink.put4(0xd51b4200 | machreg_to_gpr(rn));
            }
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -3879,6 +3879,17 @@ fn test_aarch64_binemit() {
        "ld1r { v0.8b }, [x25]",
    ));

+    insns.push((
+        Inst::VecCSel {
+            rd: writable_vreg(5),
+            rn: vreg(10),
+            rm: vreg(19),
+            cond: Cond::Gt,
+        },
+        "6C000054651EB34E02000014451DAA4E",
+        "vcsel v5.16b, v10.16b, v19.16b, gt (if-then-else diamond)",
+    ));
+
    insns.push((
        Inst::Extend {
            rd: writable_xreg(1),
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -1016,6 +1016,15 @@ pub enum Inst {
        size: VectorSize,
    },

+    /// Vector conditional select, 128 bit.  A synthetic instruction, which generates a 4-insn
+    /// control-flow diamond.
+    VecCSel {
+        rd: Writable<Reg>,
+        rn: Reg,
+        rm: Reg,
+        cond: Cond,
+    },
+
    /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
    MovToNZCV {
        rn: Reg,
@@ -1732,6 +1741,11 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_def(rd);
            collector.add_use(rn);
        }
+        &Inst::VecCSel { rd, rn, rm, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+            collector.add_use(rm);
+        }
        &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
            collector.add_use(rn);
            collector.add_use(rm);
@@ -2343,6 +2357,16 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_def(mapper, rd);
            map_use(mapper, rn);
        }
+        &mut Inst::VecCSel {
+            ref mut rd,
+            ref mut rn,
+            ref mut rm,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+            map_use(mapper, rm);
+        }
        &mut Inst::FpuCmp32 {
            ref mut rn,
            ref mut rm,
@@ -3591,6 +3615,13 @@ impl Inst {

                format!("ld1r {{ {} }}, [{}]", rd, rn)
            }
+            &Inst::VecCSel { rd, rn, rm, cond } => {
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+                let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+                let cond = cond.show_rru(mb_rru);
+                format!("vcsel {}, {}, {}, {} (if-then-else diamond)", rd, rn, rm, cond)
+            }
            &Inst::MovToNZCV { rn } => {
                let rn = rn.show_rru(mb_rru);
                format!("msr nzcv, {}", rn)
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1412,6 +1412,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                ctx.emit(Inst::FpuCSel32 { cond, rd, rn, rm });
            } else if is_float && bits == 64 {
                ctx.emit(Inst::FpuCSel64 { cond, rd, rn, rm });
+            } else if is_float && bits == 128 {
+                ctx.emit(Inst::VecCSel { cond, rd, rn, rm });
            } else {
                ctx.emit(Inst::CSel { cond, rd, rn, rm });
            }