diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 51e3f03c89..fcb9955cb8 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -412,6 +412,7 @@ pub enum SseOpcode { Psubd, Psubq, Psubw, + Pxor, Rcpss, Roundss, Roundsd, @@ -512,6 +513,7 @@ impl SseOpcode { | SseOpcode::Psubd | SseOpcode::Psubq | SseOpcode::Psubw + | SseOpcode::Pxor | SseOpcode::Sqrtpd | SseOpcode::Sqrtsd | SseOpcode::Subpd @@ -607,6 +609,7 @@ impl fmt::Debug for SseOpcode { SseOpcode::Psubd => "psubd", SseOpcode::Psubq => "psubq", SseOpcode::Psubw => "psubw", + SseOpcode::Pxor => "pxor", SseOpcode::Rcpss => "rcpss", SseOpcode::Roundss => "roundss", SseOpcode::Roundsd => "roundsd", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index b091a21eff..4d76eec23f 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1703,6 +1703,7 @@ pub(crate) fn emit( SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2), SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2), SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2), + SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2), SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2), SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2), SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2), diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index cb1a6b855a..12c2ecf707 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3083,6 +3083,12 @@ fn test_x64_emit() { "pmuludq %xmm8, %xmm9", )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2), + "66410FEFD3", + "pxor %xmm11, %xmm2", + )); + // XMM_Mov_R_M: float stores insns.push(( Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None), diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index a39b0e6857..74355dff40 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1101,7 +1101,9 @@ impl Inst { Self::XMM_RM_R { op, src, dst, .. } => { src.to_reg() == Some(dst.to_reg()) - && (*op == SseOpcode::Xorps || *op == SseOpcode::Xorpd) + && (*op == SseOpcode::Xorps + || *op == SseOpcode::Xorpd + || *op == SseOpcode::Pxor) } Self::XmmRmRImm { op, src, dst, imm } => { diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index f3e3903c80..41af65cfcc 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -653,6 +653,37 @@ fn lower_insn_to_regs>( ctx.emit(Inst::shift_r(is_64, shift_kind, count, dst)); } + Opcode::Ineg => { + // Zero's out a register and then does a packed subtraction + // of the input from the register. + let src = input_to_reg_mem(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4); + let ty = ty.unwrap(); + + let subtract_opcode = match ty { + types::I8X16 => SseOpcode::Psubb, + types::I16X8 => SseOpcode::Psubw, + types::I32X4 => SseOpcode::Psubd, + types::I64X2 => SseOpcode::Psubq, + _ => panic!("Unsupported type for Ineg instruction, found {}", ty), + }; + + // Note we must zero out a tmp instead of using the destination register since + // the desitnation could be an alias for the source input register + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Pxor, + RegMem::reg(tmp.to_reg()), + tmp, + )); + ctx.emit(Inst::xmm_rm_r(subtract_opcode, src, tmp)); + ctx.emit(Inst::xmm_unary_rm_r( + SseOpcode::Movapd, + RegMem::reg(tmp.to_reg()), + dst, + )); + } + Opcode::Clz => { // TODO when the x86 flags have use_lzcnt, we can use LZCNT.