Adds x64 packed negation for the new backend
This commit is contained in:
@@ -412,6 +412,7 @@ pub enum SseOpcode {
|
|||||||
Psubd,
|
Psubd,
|
||||||
Psubq,
|
Psubq,
|
||||||
Psubw,
|
Psubw,
|
||||||
|
Pxor,
|
||||||
Rcpss,
|
Rcpss,
|
||||||
Roundss,
|
Roundss,
|
||||||
Roundsd,
|
Roundsd,
|
||||||
@@ -512,6 +513,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Psubd
|
| SseOpcode::Psubd
|
||||||
| SseOpcode::Psubq
|
| SseOpcode::Psubq
|
||||||
| SseOpcode::Psubw
|
| SseOpcode::Psubw
|
||||||
|
| SseOpcode::Pxor
|
||||||
| SseOpcode::Sqrtpd
|
| SseOpcode::Sqrtpd
|
||||||
| SseOpcode::Sqrtsd
|
| SseOpcode::Sqrtsd
|
||||||
| SseOpcode::Subpd
|
| SseOpcode::Subpd
|
||||||
@@ -607,6 +609,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Psubd => "psubd",
|
SseOpcode::Psubd => "psubd",
|
||||||
SseOpcode::Psubq => "psubq",
|
SseOpcode::Psubq => "psubq",
|
||||||
SseOpcode::Psubw => "psubw",
|
SseOpcode::Psubw => "psubw",
|
||||||
|
SseOpcode::Pxor => "pxor",
|
||||||
SseOpcode::Rcpss => "rcpss",
|
SseOpcode::Rcpss => "rcpss",
|
||||||
SseOpcode::Roundss => "roundss",
|
SseOpcode::Roundss => "roundss",
|
||||||
SseOpcode::Roundsd => "roundsd",
|
SseOpcode::Roundsd => "roundsd",
|
||||||
|
|||||||
@@ -1703,6 +1703,7 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
|
SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
|
||||||
SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
|
SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
|
||||||
SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
|
SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
|
||||||
|
SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
|
||||||
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
|
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
|
||||||
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
|
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
|
||||||
SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
|
SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
|
||||||
|
|||||||
@@ -3083,6 +3083,12 @@ fn test_x64_emit() {
|
|||||||
"pmuludq %xmm8, %xmm9",
|
"pmuludq %xmm8, %xmm9",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
|
||||||
|
"66410FEFD3",
|
||||||
|
"pxor %xmm11, %xmm2",
|
||||||
|
));
|
||||||
|
|
||||||
// XMM_Mov_R_M: float stores
|
// XMM_Mov_R_M: float stores
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),
|
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),
|
||||||
|
|||||||
@@ -1101,7 +1101,9 @@ impl Inst {
|
|||||||
|
|
||||||
Self::XMM_RM_R { op, src, dst, .. } => {
|
Self::XMM_RM_R { op, src, dst, .. } => {
|
||||||
src.to_reg() == Some(dst.to_reg())
|
src.to_reg() == Some(dst.to_reg())
|
||||||
&& (*op == SseOpcode::Xorps || *op == SseOpcode::Xorpd)
|
&& (*op == SseOpcode::Xorps
|
||||||
|
|| *op == SseOpcode::Xorpd
|
||||||
|
|| *op == SseOpcode::Pxor)
|
||||||
}
|
}
|
||||||
|
|
||||||
Self::XmmRmRImm { op, src, dst, imm } => {
|
Self::XmmRmRImm { op, src, dst, imm } => {
|
||||||
|
|||||||
@@ -653,6 +653,37 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(Inst::shift_r(is_64, shift_kind, count, dst));
|
ctx.emit(Inst::shift_r(is_64, shift_kind, count, dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::Ineg => {
|
||||||
|
// Zero's out a register and then does a packed subtraction
|
||||||
|
// of the input from the register.
|
||||||
|
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||||
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
|
let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
|
||||||
|
let subtract_opcode = match ty {
|
||||||
|
types::I8X16 => SseOpcode::Psubb,
|
||||||
|
types::I16X8 => SseOpcode::Psubw,
|
||||||
|
types::I32X4 => SseOpcode::Psubd,
|
||||||
|
types::I64X2 => SseOpcode::Psubq,
|
||||||
|
_ => panic!("Unsupported type for Ineg instruction, found {}", ty),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Note we must zero out a tmp instead of using the destination register since
|
||||||
|
// the desitnation could be an alias for the source input register
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Pxor,
|
||||||
|
RegMem::reg(tmp.to_reg()),
|
||||||
|
tmp,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(subtract_opcode, src, tmp));
|
||||||
|
ctx.emit(Inst::xmm_unary_rm_r(
|
||||||
|
SseOpcode::Movapd,
|
||||||
|
RegMem::reg(tmp.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Clz => {
|
Opcode::Clz => {
|
||||||
// TODO when the x86 flags have use_lzcnt, we can use LZCNT.
|
// TODO when the x86 flags have use_lzcnt, we can use LZCNT.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user