Add support for some packed multiplication for new x64 backend
Adds support for i32x4, and i16x8 and lowering for pmuludq in preperation for i64x2.
This commit is contained in:
@@ -395,6 +395,9 @@ pub enum SseOpcode {
|
|||||||
Paddd,
|
Paddd,
|
||||||
Paddq,
|
Paddq,
|
||||||
Paddw,
|
Paddw,
|
||||||
|
Pmulld,
|
||||||
|
Pmullw,
|
||||||
|
Pmuludq,
|
||||||
Psllw,
|
Psllw,
|
||||||
Pslld,
|
Pslld,
|
||||||
Psllq,
|
Psllq,
|
||||||
@@ -491,6 +494,8 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Paddd
|
| SseOpcode::Paddd
|
||||||
| SseOpcode::Paddq
|
| SseOpcode::Paddq
|
||||||
| SseOpcode::Paddw
|
| SseOpcode::Paddw
|
||||||
|
| SseOpcode::Pmullw
|
||||||
|
| SseOpcode::Pmuludq
|
||||||
| SseOpcode::Psllw
|
| SseOpcode::Psllw
|
||||||
| SseOpcode::Pslld
|
| SseOpcode::Pslld
|
||||||
| SseOpcode::Psllq
|
| SseOpcode::Psllq
|
||||||
@@ -510,7 +515,9 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Ucomisd
|
| SseOpcode::Ucomisd
|
||||||
| SseOpcode::Xorpd => SSE2,
|
| SseOpcode::Xorpd => SSE2,
|
||||||
|
|
||||||
SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41,
|
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
|
||||||
|
SSE41
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -579,6 +586,9 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Paddd => "paddd",
|
SseOpcode::Paddd => "paddd",
|
||||||
SseOpcode::Paddq => "paddq",
|
SseOpcode::Paddq => "paddq",
|
||||||
SseOpcode::Paddw => "paddw",
|
SseOpcode::Paddw => "paddw",
|
||||||
|
SseOpcode::Pmulld => "pmulld",
|
||||||
|
SseOpcode::Pmullw => "pmullw",
|
||||||
|
SseOpcode::Pmuludq => "pmuludq",
|
||||||
SseOpcode::Psllw => "psllw",
|
SseOpcode::Psllw => "psllw",
|
||||||
SseOpcode::Pslld => "pslld",
|
SseOpcode::Pslld => "pslld",
|
||||||
SseOpcode::Psllq => "psllq",
|
SseOpcode::Psllq => "psllq",
|
||||||
|
|||||||
@@ -1632,57 +1632,60 @@ pub(crate) fn emit(
|
|||||||
dst: reg_g,
|
dst: reg_g,
|
||||||
} => {
|
} => {
|
||||||
let rex = RexFlags::clear_w();
|
let rex = RexFlags::clear_w();
|
||||||
let (prefix, opcode) = match op {
|
let (prefix, opcode, length) = match op {
|
||||||
SseOpcode::Addps => (LegacyPrefix::None, 0x0F58),
|
SseOpcode::Addps => (LegacyPrefix::None, 0x0F58, 2),
|
||||||
SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58),
|
SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58, 2),
|
||||||
SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58),
|
SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58, 2),
|
||||||
SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58),
|
SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58, 2),
|
||||||
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54),
|
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54, 2),
|
||||||
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54),
|
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54, 2),
|
||||||
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55),
|
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55, 2),
|
||||||
SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55),
|
SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55, 2),
|
||||||
SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E),
|
SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E, 2),
|
||||||
SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E),
|
SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E, 2),
|
||||||
SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E),
|
SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E, 2),
|
||||||
SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E),
|
SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E, 2),
|
||||||
SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D),
|
SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D, 2),
|
||||||
SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D),
|
SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D, 2),
|
||||||
SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D),
|
SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D, 2),
|
||||||
SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D),
|
SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D, 2),
|
||||||
SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F),
|
SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F, 2),
|
||||||
SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F),
|
SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F, 2),
|
||||||
SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F),
|
SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F, 2),
|
||||||
SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F),
|
SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F, 2),
|
||||||
SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59),
|
SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59, 2),
|
||||||
SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59),
|
SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59, 2),
|
||||||
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59),
|
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59, 2),
|
||||||
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59),
|
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59, 2),
|
||||||
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56),
|
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56, 2),
|
||||||
SseOpcode::Orps => (LegacyPrefix::None, 0x0F56),
|
SseOpcode::Orps => (LegacyPrefix::None, 0x0F56, 2),
|
||||||
SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC),
|
SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC, 2),
|
||||||
SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE),
|
SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE, 2),
|
||||||
SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4),
|
SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4, 2),
|
||||||
SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD),
|
SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD, 2),
|
||||||
SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8),
|
SseOpcode::Pmulld => (LegacyPrefix::_66, 0x0F3840, 3),
|
||||||
SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA),
|
SseOpcode::Pmullw => (LegacyPrefix::_66, 0x0FD5, 2),
|
||||||
SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB),
|
SseOpcode::Pmuludq => (LegacyPrefix::_66, 0x0FF4, 2),
|
||||||
SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9),
|
SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8, 2),
|
||||||
SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C),
|
SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA, 2),
|
||||||
SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C),
|
SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB, 2),
|
||||||
SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C),
|
SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9, 2),
|
||||||
SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C),
|
SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C, 2),
|
||||||
SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57),
|
SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C, 2),
|
||||||
SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57),
|
SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C, 2),
|
||||||
|
SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C, 2),
|
||||||
|
SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57, 2),
|
||||||
|
SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57, 2),
|
||||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||||
};
|
};
|
||||||
|
|
||||||
match src_e {
|
match src_e {
|
||||||
RegMem::Reg { reg: reg_e } => {
|
RegMem::Reg { reg: reg_e } => {
|
||||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
|
emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex);
|
||||||
}
|
}
|
||||||
RegMem::Mem { addr } => {
|
RegMem::Mem { addr } => {
|
||||||
let addr = &addr.finalize(state);
|
let addr = &addr.finalize(state);
|
||||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
emit_std_reg_mem(sink, prefix, opcode, length, reg_g.to_reg(), addr, rex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3062,6 +3062,24 @@ fn test_x64_emit() {
|
|||||||
"psubq %xmm8, %xmm1",
|
"psubq %xmm8, %xmm1",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6),
|
||||||
|
"66410F3840F7",
|
||||||
|
"pmulld %xmm15, %xmm6",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1),
|
||||||
|
"66410FD5CE",
|
||||||
|
"pmullw %xmm14, %xmm1",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9),
|
||||||
|
"66450FF4C8",
|
||||||
|
"pmuludq %xmm8, %xmm9",
|
||||||
|
));
|
||||||
|
|
||||||
// XMM_Mov_R_M: float stores
|
// XMM_Mov_R_M: float stores
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),
|
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),
|
||||||
|
|||||||
@@ -357,6 +357,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
types::I64X2 => SseOpcode::Psubq,
|
types::I64X2 => SseOpcode::Psubq,
|
||||||
_ => panic!("Unsupported type for packed Isub instruction"),
|
_ => panic!("Unsupported type for packed Isub instruction"),
|
||||||
},
|
},
|
||||||
|
Opcode::Imul => match ty {
|
||||||
|
types::I16X8 => SseOpcode::Pmullw,
|
||||||
|
types::I32X4 => SseOpcode::Pmulld,
|
||||||
|
_ => panic!("Unsupported type for packed Imul instruction"),
|
||||||
|
},
|
||||||
_ => panic!("Unsupported packed instruction"),
|
_ => panic!("Unsupported packed instruction"),
|
||||||
};
|
};
|
||||||
let lhs = input_to_reg(ctx, inputs[0]);
|
let lhs = input_to_reg(ctx, inputs[0]);
|
||||||
|
|||||||
Reference in New Issue
Block a user