Adds x64 packed negation for the new backend
This commit is contained in:
@@ -412,6 +412,7 @@ pub enum SseOpcode {
|
||||
Psubd,
|
||||
Psubq,
|
||||
Psubw,
|
||||
Pxor,
|
||||
Rcpss,
|
||||
Roundss,
|
||||
Roundsd,
|
||||
@@ -512,6 +513,7 @@ impl SseOpcode {
|
||||
| SseOpcode::Psubd
|
||||
| SseOpcode::Psubq
|
||||
| SseOpcode::Psubw
|
||||
| SseOpcode::Pxor
|
||||
| SseOpcode::Sqrtpd
|
||||
| SseOpcode::Sqrtsd
|
||||
| SseOpcode::Subpd
|
||||
@@ -607,6 +609,7 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Psubd => "psubd",
|
||||
SseOpcode::Psubq => "psubq",
|
||||
SseOpcode::Psubw => "psubw",
|
||||
SseOpcode::Pxor => "pxor",
|
||||
SseOpcode::Rcpss => "rcpss",
|
||||
SseOpcode::Roundss => "roundss",
|
||||
SseOpcode::Roundsd => "roundsd",
|
||||
|
||||
@@ -1703,6 +1703,7 @@ pub(crate) fn emit(
|
||||
SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
|
||||
SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
|
||||
SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
|
||||
SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
|
||||
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
|
||||
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
|
||||
SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
|
||||
|
||||
@@ -3083,6 +3083,12 @@ fn test_x64_emit() {
|
||||
"pmuludq %xmm8, %xmm9",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
|
||||
"66410FEFD3",
|
||||
"pxor %xmm11, %xmm2",
|
||||
));
|
||||
|
||||
// XMM_Mov_R_M: float stores
|
||||
insns.push((
|
||||
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),
|
||||
|
||||
@@ -1101,7 +1101,9 @@ impl Inst {
|
||||
|
||||
Self::XMM_RM_R { op, src, dst, .. } => {
|
||||
src.to_reg() == Some(dst.to_reg())
|
||||
&& (*op == SseOpcode::Xorps || *op == SseOpcode::Xorpd)
|
||||
&& (*op == SseOpcode::Xorps
|
||||
|| *op == SseOpcode::Xorpd
|
||||
|| *op == SseOpcode::Pxor)
|
||||
}
|
||||
|
||||
Self::XmmRmRImm { op, src, dst, imm } => {
|
||||
|
||||
@@ -653,6 +653,37 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(Inst::shift_r(is_64, shift_kind, count, dst));
|
||||
}
|
||||
|
||||
Opcode::Ineg => {
|
||||
// Zero's out a register and then does a packed subtraction
|
||||
// of the input from the register.
|
||||
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
|
||||
let ty = ty.unwrap();
|
||||
|
||||
let subtract_opcode = match ty {
|
||||
types::I8X16 => SseOpcode::Psubb,
|
||||
types::I16X8 => SseOpcode::Psubw,
|
||||
types::I32X4 => SseOpcode::Psubd,
|
||||
types::I64X2 => SseOpcode::Psubq,
|
||||
_ => panic!("Unsupported type for Ineg instruction, found {}", ty),
|
||||
};
|
||||
|
||||
// Note we must zero out a tmp instead of using the destination register since
|
||||
// the desitnation could be an alias for the source input register
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pxor,
|
||||
RegMem::reg(tmp.to_reg()),
|
||||
tmp,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(subtract_opcode, src, tmp));
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Movapd,
|
||||
RegMem::reg(tmp.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
|
||||
Opcode::Clz => {
|
||||
// TODO when the x86 flags have use_lzcnt, we can use LZCNT.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user