Add support for some packed multiplication for new x64 backend
Adds support for i32x4, and i16x8 and lowering for pmuludq in preperation for i64x2.
This commit is contained in:
@@ -395,6 +395,9 @@ pub enum SseOpcode {
|
||||
Paddd,
|
||||
Paddq,
|
||||
Paddw,
|
||||
Pmulld,
|
||||
Pmullw,
|
||||
Pmuludq,
|
||||
Psllw,
|
||||
Pslld,
|
||||
Psllq,
|
||||
@@ -491,6 +494,8 @@ impl SseOpcode {
|
||||
| SseOpcode::Paddd
|
||||
| SseOpcode::Paddq
|
||||
| SseOpcode::Paddw
|
||||
| SseOpcode::Pmullw
|
||||
| SseOpcode::Pmuludq
|
||||
| SseOpcode::Psllw
|
||||
| SseOpcode::Pslld
|
||||
| SseOpcode::Psllq
|
||||
@@ -510,7 +515,9 @@ impl SseOpcode {
|
||||
| SseOpcode::Ucomisd
|
||||
| SseOpcode::Xorpd => SSE2,
|
||||
|
||||
SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41,
|
||||
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
|
||||
SSE41
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -579,6 +586,9 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Paddd => "paddd",
|
||||
SseOpcode::Paddq => "paddq",
|
||||
SseOpcode::Paddw => "paddw",
|
||||
SseOpcode::Pmulld => "pmulld",
|
||||
SseOpcode::Pmullw => "pmullw",
|
||||
SseOpcode::Pmuludq => "pmuludq",
|
||||
SseOpcode::Psllw => "psllw",
|
||||
SseOpcode::Pslld => "pslld",
|
||||
SseOpcode::Psllq => "psllq",
|
||||
|
||||
@@ -1632,57 +1632,60 @@ pub(crate) fn emit(
|
||||
dst: reg_g,
|
||||
} => {
|
||||
let rex = RexFlags::clear_w();
|
||||
let (prefix, opcode) = match op {
|
||||
SseOpcode::Addps => (LegacyPrefix::None, 0x0F58),
|
||||
SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58),
|
||||
SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58),
|
||||
SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58),
|
||||
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54),
|
||||
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54),
|
||||
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55),
|
||||
SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55),
|
||||
SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E),
|
||||
SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E),
|
||||
SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E),
|
||||
SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E),
|
||||
SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D),
|
||||
SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D),
|
||||
SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D),
|
||||
SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D),
|
||||
SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F),
|
||||
SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F),
|
||||
SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F),
|
||||
SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F),
|
||||
SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59),
|
||||
SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59),
|
||||
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59),
|
||||
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59),
|
||||
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56),
|
||||
SseOpcode::Orps => (LegacyPrefix::None, 0x0F56),
|
||||
SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC),
|
||||
SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE),
|
||||
SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4),
|
||||
SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD),
|
||||
SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8),
|
||||
SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA),
|
||||
SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB),
|
||||
SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9),
|
||||
SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C),
|
||||
SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C),
|
||||
SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C),
|
||||
SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C),
|
||||
SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57),
|
||||
SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57),
|
||||
let (prefix, opcode, length) = match op {
|
||||
SseOpcode::Addps => (LegacyPrefix::None, 0x0F58, 2),
|
||||
SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58, 2),
|
||||
SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58, 2),
|
||||
SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58, 2),
|
||||
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54, 2),
|
||||
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54, 2),
|
||||
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55, 2),
|
||||
SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55, 2),
|
||||
SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E, 2),
|
||||
SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E, 2),
|
||||
SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E, 2),
|
||||
SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E, 2),
|
||||
SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D, 2),
|
||||
SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D, 2),
|
||||
SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D, 2),
|
||||
SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D, 2),
|
||||
SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F, 2),
|
||||
SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F, 2),
|
||||
SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F, 2),
|
||||
SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F, 2),
|
||||
SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59, 2),
|
||||
SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59, 2),
|
||||
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59, 2),
|
||||
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59, 2),
|
||||
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56, 2),
|
||||
SseOpcode::Orps => (LegacyPrefix::None, 0x0F56, 2),
|
||||
SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC, 2),
|
||||
SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE, 2),
|
||||
SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4, 2),
|
||||
SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD, 2),
|
||||
SseOpcode::Pmulld => (LegacyPrefix::_66, 0x0F3840, 3),
|
||||
SseOpcode::Pmullw => (LegacyPrefix::_66, 0x0FD5, 2),
|
||||
SseOpcode::Pmuludq => (LegacyPrefix::_66, 0x0FF4, 2),
|
||||
SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8, 2),
|
||||
SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA, 2),
|
||||
SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB, 2),
|
||||
SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9, 2),
|
||||
SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C, 2),
|
||||
SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C, 2),
|
||||
SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C, 2),
|
||||
SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C, 2),
|
||||
SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57, 2),
|
||||
SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57, 2),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
|
||||
match src_e {
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
|
||||
emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex);
|
||||
}
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
||||
emit_std_reg_mem(sink, prefix, opcode, length, reg_g.to_reg(), addr, rex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3062,6 +3062,24 @@ fn test_x64_emit() {
|
||||
"psubq %xmm8, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6),
|
||||
"66410F3840F7",
|
||||
"pmulld %xmm15, %xmm6",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1),
|
||||
"66410FD5CE",
|
||||
"pmullw %xmm14, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9),
|
||||
"66450FF4C8",
|
||||
"pmuludq %xmm8, %xmm9",
|
||||
));
|
||||
|
||||
// XMM_Mov_R_M: float stores
|
||||
insns.push((
|
||||
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),
|
||||
|
||||
@@ -357,6 +357,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
types::I64X2 => SseOpcode::Psubq,
|
||||
_ => panic!("Unsupported type for packed Isub instruction"),
|
||||
},
|
||||
Opcode::Imul => match ty {
|
||||
types::I16X8 => SseOpcode::Pmullw,
|
||||
types::I32X4 => SseOpcode::Pmulld,
|
||||
_ => panic!("Unsupported type for packed Imul instruction"),
|
||||
},
|
||||
_ => panic!("Unsupported packed instruction"),
|
||||
};
|
||||
let lhs = input_to_reg(ctx, inputs[0]);
|
||||
|
||||
Reference in New Issue
Block a user