Adds x86 SIMD support for Ceil, Floor, Trunc, and Nearest

This commit is contained in:
Johnnie Birch
2020-11-28 19:57:03 -08:00
parent dcbc4768c9
commit a33e755cb2
4 changed files with 82 additions and 16 deletions

View File

@@ -550,6 +550,8 @@ pub enum SseOpcode {
Punpcklbw,
Pxor,
Rcpss,
Roundps,
Roundpd,
Roundss,
Roundsd,
Rsqrtss,
@@ -729,6 +731,8 @@ impl SseOpcode {
| SseOpcode::Pmovzxdq
| SseOpcode::Pmulld
| SseOpcode::Ptest
| SseOpcode::Roundps
| SseOpcode::Roundpd
| SseOpcode::Roundss
| SseOpcode::Roundsd => SSE41,
@@ -890,6 +894,8 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Punpcklbw => "punpcklbw",
SseOpcode::Pxor => "pxor",
SseOpcode::Rcpss => "rcpss",
SseOpcode::Roundps => "roundps",
SseOpcode::Roundpd => "roundpd",
SseOpcode::Roundss => "roundss",
SseOpcode::Roundsd => "roundsd",
SseOpcode::Rsqrtss => "rsqrtss",
@@ -1238,6 +1244,20 @@ impl From<FloatCC> for FcmpImm {
}
}
/// Encode the rounding modes used as part of the Rounding Control field.
pub(crate) enum RoundImm {
RoundNearest = 0x00,
RoundDown = 0x01,
RoundUp = 0x02,
RoundZero = 0x03,
}
impl RoundImm {
pub(crate) fn encode(self) -> u8 {
self as u8
}
}
/// An operand's size in bits.
#[derive(Clone, Copy, PartialEq)]
pub enum OperandSize {

View File

@@ -1981,6 +1981,8 @@ pub(crate) fn emit(
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
SseOpcode::Roundps => (LegacyPrefixes::_66, 0x0F3A08, 3),
SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
let rex = if *is64 {

View File

@@ -3505,6 +3505,27 @@ fn test_x64_emit() {
"palignr $3, %xmm1, %xmm9",
));
insns.push((
Inst::xmm_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm7), w_xmm8, 3, false),
"66440F3A08C703",
"roundps $3, %xmm7, %xmm8",
));
insns.push((
Inst::xmm_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm10), w_xmm7, 2, false),
"66410F3A09FA02",
"roundpd $2, %xmm10, %xmm7",
));
insns.push((
Inst::xmm_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm4), w_xmm8, 1, false),
"66440F3A08C401",
"roundps $1, %xmm4, %xmm8",
));
insns.push((
Inst::xmm_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm15), w_xmm15, 0, false),
"66450F3A09FF00",
"roundpd $0, %xmm15, %xmm15",
));
// ========================================================
// Pertaining to atomics.
let am1: SyntheticAmode = Amode::imm_reg_reg_shift(321, r10, rdx, 2).into();