Adds x64 packed negation for the new backend

2020-09-01 22:53:44 -07:00
parent ba9908dd0f
commit a64af55cda
5 changed files with 44 additions and 1 deletions
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -412,6 +412,7 @@ pub enum SseOpcode {
    Psubd,
    Psubq,
    Psubw,
    Pxor,
    Rcpss,
    Roundss,
    Roundsd,
@@ -512,6 +513,7 @@ impl SseOpcode {
            | SseOpcode::Psubd
            | SseOpcode::Psubq
            | SseOpcode::Psubw
            | SseOpcode::Pxor
            | SseOpcode::Sqrtpd
            | SseOpcode::Sqrtsd
            | SseOpcode::Subpd
@@ -607,6 +609,7 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Psubd => "psubd",
            SseOpcode::Psubq => "psubq",
            SseOpcode::Psubw => "psubw",
            SseOpcode::Pxor => "pxor",
            SseOpcode::Rcpss => "rcpss",
            SseOpcode::Roundss => "roundss",
            SseOpcode::Roundsd => "roundsd",
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1703,6 +1703,7 @@ pub(crate) fn emit(
                SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
                SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
                SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
                SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
                SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
                SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
                SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -3083,6 +3083,12 @@ fn test_x64_emit() {
        "pmuludq %xmm8, %xmm9",
    ));
    insns.push((
        Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
        "66410FEFD3",
        "pxor    %xmm11, %xmm2",
    ));
    // XMM_Mov_R_M: float stores
    insns.push((
        Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -1101,7 +1101,9 @@ impl Inst {
            Self::XMM_RM_R { op, src, dst, .. } => {
                src.to_reg() == Some(dst.to_reg())
-                    && (*op == SseOpcode::Xorps || *op == SseOpcode::Xorpd)
+                    && (*op == SseOpcode::Xorps
                        || *op == SseOpcode::Xorpd
                        || *op == SseOpcode::Pxor)
            }
            Self::XmmRmRImm { op, src, dst, imm } => {
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -653,6 +653,37 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ctx.emit(Inst::shift_r(is_64, shift_kind, count, dst));
        }
        Opcode::Ineg => {
            // Zero's out a register and then does a packed subtraction
            // of the input from the register.
            let src = input_to_reg_mem(ctx, inputs[0]);
            let dst = output_to_reg(ctx, outputs[0]);
            let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
            let ty = ty.unwrap();
            let subtract_opcode = match ty {
                types::I8X16 => SseOpcode::Psubb,
                types::I16X8 => SseOpcode::Psubw,
                types::I32X4 => SseOpcode::Psubd,
                types::I64X2 => SseOpcode::Psubq,
                _ => panic!("Unsupported type for Ineg instruction, found {}", ty),
            };
            // Note we must zero out a tmp instead of using the destination register since
            // the desitnation could be an alias for the source input register
            ctx.emit(Inst::xmm_rm_r(
                SseOpcode::Pxor,
                RegMem::reg(tmp.to_reg()),
                tmp,
            ));
            ctx.emit(Inst::xmm_rm_r(subtract_opcode, src, tmp));
            ctx.emit(Inst::xmm_unary_rm_r(
                SseOpcode::Movapd,
                RegMem::reg(tmp.to_reg()),
                dst,
            ));
        }
        Opcode::Clz => {
            // TODO when the x86 flags have use_lzcnt, we can use LZCNT.