[x64] Add i64x2.abs

This instruction has a single instruction lowering in AVX512F/VL and a three instruction lowering in AVX but neither is currently supported in the x64 backend. To implement this, we instead subtract the vector from 0 and use a blending instruction to pick the lanes containing the absolute value.
2021-03-02 09:54:19 -08:00
parent 3c57c1b2bc
commit 508f8fa5a9
6 changed files with 36 additions and 6 deletions
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -1853,7 +1853,27 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let src = input_to_reg_mem(ctx, inputs[0]);
            let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
            let ty = ty.unwrap();
-            if ty.is_vector() {
+            if ty == types::I64X2 {
+                // This lowering could be a single instruction with AVX512F/VL's VPABSQ instruction.
+                // Instead, we use a separate register, `tmp`, to contain the results of `0 - src`
+                // and then blend in those results with `BLENDVPD` if the MSB of `tmp` was set to 1
+                // (i.e. if `tmp` was negative or, conversely, if `src` was originally positive).
+
+                // Emit all 0s into the `tmp` register.
+                let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
+                ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp));
+                // Subtract the lanes from 0 and set up `dst`.
+                ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubq, src.clone(), tmp));
+                ctx.emit(Inst::gen_move(dst, tmp.to_reg(), ty));
+                // Choose the subtracted lanes when `tmp` has an MSB of 1. BLENDVPD's semantics
+                // require the "choice" mask to be in XMM0.
+                ctx.emit(Inst::gen_move(
+                    Writable::from_reg(regs::xmm0()),
+                    tmp.to_reg(),
+                    ty,
+                ));
+                ctx.emit(Inst::xmm_rm_r(SseOpcode::Blendvpd, src, dst));
+            } else if ty.is_vector() {
                let opcode = match ty {
                    types::I8X16 => SseOpcode::Pabsb,
                    types::I16X8 => SseOpcode::Pabsw,