Add support for some packed multiplication for new x64 backend

Adds support for i32x4, and i16x8 and lowering for pmuludq in
preperation for i64x2.
This commit is contained in:
Johnnie Birch
2020-08-17 13:44:10 -07:00
parent 81b3450114
commit a31336996c
4 changed files with 80 additions and 44 deletions

View File

@@ -1632,57 +1632,60 @@ pub(crate) fn emit(
dst: reg_g,
} => {
let rex = RexFlags::clear_w();
let (prefix, opcode) = match op {
SseOpcode::Addps => (LegacyPrefix::None, 0x0F58),
SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58),
SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58),
SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58),
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54),
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54),
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55),
SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55),
SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E),
SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E),
SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E),
SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E),
SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D),
SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D),
SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D),
SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D),
SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F),
SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F),
SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F),
SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F),
SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59),
SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59),
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59),
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59),
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56),
SseOpcode::Orps => (LegacyPrefix::None, 0x0F56),
SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC),
SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE),
SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4),
SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD),
SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8),
SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA),
SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB),
SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9),
SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C),
SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C),
SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C),
SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C),
SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57),
SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57),
let (prefix, opcode, length) = match op {
SseOpcode::Addps => (LegacyPrefix::None, 0x0F58, 2),
SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58, 2),
SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58, 2),
SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58, 2),
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54, 2),
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54, 2),
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55, 2),
SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55, 2),
SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E, 2),
SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E, 2),
SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E, 2),
SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E, 2),
SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D, 2),
SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D, 2),
SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D, 2),
SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D, 2),
SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F, 2),
SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F, 2),
SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F, 2),
SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F, 2),
SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59, 2),
SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59, 2),
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59, 2),
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59, 2),
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56, 2),
SseOpcode::Orps => (LegacyPrefix::None, 0x0F56, 2),
SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC, 2),
SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE, 2),
SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4, 2),
SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD, 2),
SseOpcode::Pmulld => (LegacyPrefix::_66, 0x0F3840, 3),
SseOpcode::Pmullw => (LegacyPrefix::_66, 0x0FD5, 2),
SseOpcode::Pmuludq => (LegacyPrefix::_66, 0x0FF4, 2),
SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8, 2),
SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA, 2),
SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB, 2),
SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9, 2),
SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C, 2),
SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C, 2),
SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C, 2),
SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C, 2),
SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57, 2),
SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57, 2),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
match src_e {
RegMem::Reg { reg: reg_e } => {
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex);
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state);
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
emit_std_reg_mem(sink, prefix, opcode, length, reg_g.to_reg(), addr, rex);
}
}
}