[machinst x64]: add packed min/max implementations

This commit is contained in:
Andrew Brown
2020-09-21 12:32:32 -07:00
parent 7546d98844
commit ac2bf9d246
4 changed files with 166 additions and 3 deletions

View File

@@ -403,6 +403,18 @@ pub enum SseOpcode {
Paddw,
Pavgb,
Pavgw,
Pmaxsb,
Pmaxsw,
Pmaxsd,
Pmaxub,
Pmaxuw,
Pmaxud,
Pminsb,
Pminsw,
Pminsd,
Pminub,
Pminuw,
Pminud,
Pmulld,
Pmullw,
Pmuludq,
@@ -507,6 +519,10 @@ impl SseOpcode {
| SseOpcode::Paddw
| SseOpcode::Pavgb
| SseOpcode::Pavgw
| SseOpcode::Pmaxsw
| SseOpcode::Pmaxub
| SseOpcode::Pminsw
| SseOpcode::Pminub
| SseOpcode::Pmullw
| SseOpcode::Pmuludq
| SseOpcode::Psllw
@@ -531,9 +547,18 @@ impl SseOpcode {
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
SSE41
}
SseOpcode::Insertps
| SseOpcode::Pmaxsb
| SseOpcode::Pmaxsd
| SseOpcode::Pmaxuw
| SseOpcode::Pmaxud
| SseOpcode::Pminsb
| SseOpcode::Pminsd
| SseOpcode::Pminuw
| SseOpcode::Pminud
| SseOpcode::Pmulld
| SseOpcode::Roundss
| SseOpcode::Roundsd => SSE41,
}
}
@@ -609,6 +634,18 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Paddw => "paddw",
SseOpcode::Pavgb => "pavgb",
SseOpcode::Pavgw => "pavgw",
SseOpcode::Pmaxsb => "pmaxsb",
SseOpcode::Pmaxsw => "pmaxsw",
SseOpcode::Pmaxsd => "pmaxsd",
SseOpcode::Pmaxub => "pmaxub",
SseOpcode::Pmaxuw => "pmaxuw",
SseOpcode::Pmaxud => "pmaxud",
SseOpcode::Pminsb => "pminsb",
SseOpcode::Pminsw => "pminsw",
SseOpcode::Pminsd => "pminsd",
SseOpcode::Pminub => "pminub",
SseOpcode::Pminuw => "pminuw",
SseOpcode::Pminud => "pminud",
SseOpcode::Pmulld => "pmulld",
SseOpcode::Pmullw => "pmullw",
SseOpcode::Pmuludq => "pmuludq",

View File

@@ -1780,6 +1780,18 @@ pub(crate) fn emit(
SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2),
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
SseOpcode::Pmaxub => (LegacyPrefixes::_66, 0x0FDE, 2),
SseOpcode::Pmaxuw => (LegacyPrefixes::_66, 0x0F383E, 3),
SseOpcode::Pmaxud => (LegacyPrefixes::_66, 0x0F383F, 3),
SseOpcode::Pminsb => (LegacyPrefixes::_66, 0x0F3838, 3),
SseOpcode::Pminsw => (LegacyPrefixes::_66, 0x0FEA, 2),
SseOpcode::Pminsd => (LegacyPrefixes::_66, 0x0F3839, 3),
SseOpcode::Pminub => (LegacyPrefixes::_66, 0x0FDA, 2),
SseOpcode::Pminuw => (LegacyPrefixes::_66, 0x0F383A, 3),
SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),

View File

@@ -3165,6 +3165,78 @@ fn test_x64_emit() {
"pmuludq %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6),
"66410F383CF7",
"pmaxsb %xmm15, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6),
"66410FEEF7",
"pmaxsw %xmm15, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6),
"66410F383DF7",
"pmaxsd %xmm15, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1),
"66410FDECE",
"pmaxub %xmm14, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1),
"66410F383ECE",
"pmaxuw %xmm14, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1),
"66410F383FCE",
"pmaxud %xmm14, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9),
"66450F3838C8",
"pminsb %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9),
"66450FEAC8",
"pminsw %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9),
"66450F3839C8",
"pminsd %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2),
"660FDAD3",
"pminub %xmm3, %xmm2",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2),
"660F383AD3",
"pminuw %xmm3, %xmm2",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2),
"660F383BD3",
"pminud %xmm3, %xmm2",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
"66410FEFD3",