Adds x86 SIMD support for Ceil, Floor, Trunc, and Nearest
This commit is contained in:
@@ -550,6 +550,8 @@ pub enum SseOpcode {
|
|||||||
Punpcklbw,
|
Punpcklbw,
|
||||||
Pxor,
|
Pxor,
|
||||||
Rcpss,
|
Rcpss,
|
||||||
|
Roundps,
|
||||||
|
Roundpd,
|
||||||
Roundss,
|
Roundss,
|
||||||
Roundsd,
|
Roundsd,
|
||||||
Rsqrtss,
|
Rsqrtss,
|
||||||
@@ -729,6 +731,8 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Pmovzxdq
|
| SseOpcode::Pmovzxdq
|
||||||
| SseOpcode::Pmulld
|
| SseOpcode::Pmulld
|
||||||
| SseOpcode::Ptest
|
| SseOpcode::Ptest
|
||||||
|
| SseOpcode::Roundps
|
||||||
|
| SseOpcode::Roundpd
|
||||||
| SseOpcode::Roundss
|
| SseOpcode::Roundss
|
||||||
| SseOpcode::Roundsd => SSE41,
|
| SseOpcode::Roundsd => SSE41,
|
||||||
|
|
||||||
@@ -890,6 +894,8 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Punpcklbw => "punpcklbw",
|
SseOpcode::Punpcklbw => "punpcklbw",
|
||||||
SseOpcode::Pxor => "pxor",
|
SseOpcode::Pxor => "pxor",
|
||||||
SseOpcode::Rcpss => "rcpss",
|
SseOpcode::Rcpss => "rcpss",
|
||||||
|
SseOpcode::Roundps => "roundps",
|
||||||
|
SseOpcode::Roundpd => "roundpd",
|
||||||
SseOpcode::Roundss => "roundss",
|
SseOpcode::Roundss => "roundss",
|
||||||
SseOpcode::Roundsd => "roundsd",
|
SseOpcode::Roundsd => "roundsd",
|
||||||
SseOpcode::Rsqrtss => "rsqrtss",
|
SseOpcode::Rsqrtss => "rsqrtss",
|
||||||
@@ -1238,6 +1244,20 @@ impl From<FloatCC> for FcmpImm {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Encode the rounding modes used as part of the Rounding Control field.
|
||||||
|
pub(crate) enum RoundImm {
|
||||||
|
RoundNearest = 0x00,
|
||||||
|
RoundDown = 0x01,
|
||||||
|
RoundUp = 0x02,
|
||||||
|
RoundZero = 0x03,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RoundImm {
|
||||||
|
pub(crate) fn encode(self) -> u8 {
|
||||||
|
self as u8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// An operand's size in bits.
|
/// An operand's size in bits.
|
||||||
#[derive(Clone, Copy, PartialEq)]
|
#[derive(Clone, Copy, PartialEq)]
|
||||||
pub enum OperandSize {
|
pub enum OperandSize {
|
||||||
|
|||||||
@@ -1981,6 +1981,8 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
|
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
|
||||||
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
|
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
|
||||||
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
|
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
|
||||||
|
SseOpcode::Roundps => (LegacyPrefixes::_66, 0x0F3A08, 3),
|
||||||
|
SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3),
|
||||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||||
};
|
};
|
||||||
let rex = if *is64 {
|
let rex = if *is64 {
|
||||||
|
|||||||
@@ -3505,6 +3505,27 @@ fn test_x64_emit() {
|
|||||||
"palignr $3, %xmm1, %xmm9",
|
"palignr $3, %xmm1, %xmm9",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm7), w_xmm8, 3, false),
|
||||||
|
"66440F3A08C703",
|
||||||
|
"roundps $3, %xmm7, %xmm8",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm10), w_xmm7, 2, false),
|
||||||
|
"66410F3A09FA02",
|
||||||
|
"roundpd $2, %xmm10, %xmm7",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm4), w_xmm8, 1, false),
|
||||||
|
"66440F3A08C401",
|
||||||
|
"roundps $1, %xmm4, %xmm8",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm15), w_xmm15, 0, false),
|
||||||
|
"66450F3A09FF00",
|
||||||
|
"roundpd $0, %xmm15, %xmm15",
|
||||||
|
));
|
||||||
|
|
||||||
// ========================================================
|
// ========================================================
|
||||||
// Pertaining to atomics.
|
// Pertaining to atomics.
|
||||||
let am1: SyntheticAmode = Amode::imm_reg_reg_shift(321, r10, rdx, 2).into();
|
let am1: SyntheticAmode = Amode::imm_reg_reg_shift(321, r10, rdx, 2).into();
|
||||||
|
|||||||
@@ -3207,22 +3207,45 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
|
|
||||||
// Lower to VM calls when there's no access to SSE4.1.
|
// Lower to VM calls when there's no access to SSE4.1.
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
let libcall = match (ty, op) {
|
if !ty.is_vector() {
|
||||||
(types::F32, Opcode::Ceil) => LibCall::CeilF32,
|
let libcall = match (op, ty) {
|
||||||
(types::F64, Opcode::Ceil) => LibCall::CeilF64,
|
(Opcode::Ceil, types::F32) => LibCall::CeilF32,
|
||||||
(types::F32, Opcode::Floor) => LibCall::FloorF32,
|
(Opcode::Ceil, types::F64) => LibCall::CeilF64,
|
||||||
(types::F64, Opcode::Floor) => LibCall::FloorF64,
|
(Opcode::Floor, types::F32) => LibCall::FloorF32,
|
||||||
(types::F32, Opcode::Nearest) => LibCall::NearestF32,
|
(Opcode::Floor, types::F64) => LibCall::FloorF64,
|
||||||
(types::F64, Opcode::Nearest) => LibCall::NearestF64,
|
(Opcode::Nearest, types::F32) => LibCall::NearestF32,
|
||||||
(types::F32, Opcode::Trunc) => LibCall::TruncF32,
|
(Opcode::Nearest, types::F64) => LibCall::NearestF64,
|
||||||
(types::F64, Opcode::Trunc) => LibCall::TruncF64,
|
(Opcode::Trunc, types::F32) => LibCall::TruncF32,
|
||||||
|
(Opcode::Trunc, types::F64) => LibCall::TruncF64,
|
||||||
_ => panic!(
|
_ => panic!(
|
||||||
"unexpected type/opcode {:?}/{:?} in Ceil/Floor/Nearest/Trunc",
|
"unexpected type/opcode {:?}/{:?} in Ceil/Floor/Nearest/Trunc",
|
||||||
ty, op
|
ty, op
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
|
||||||
emit_vm_call(ctx, flags, triple, libcall, insn, inputs, outputs)?;
|
emit_vm_call(ctx, flags, triple, libcall, insn, inputs, outputs)?;
|
||||||
|
} else {
|
||||||
|
let (op, mode) = match (op, ty) {
|
||||||
|
(Opcode::Ceil, types::F32X4) => (SseOpcode::Roundps, RoundImm::RoundUp),
|
||||||
|
(Opcode::Ceil, types::F64X2) => (SseOpcode::Roundpd, RoundImm::RoundUp),
|
||||||
|
(Opcode::Floor, types::F32X4) => (SseOpcode::Roundps, RoundImm::RoundDown),
|
||||||
|
(Opcode::Floor, types::F64X2) => (SseOpcode::Roundpd, RoundImm::RoundDown),
|
||||||
|
(Opcode::Trunc, types::F32X4) => (SseOpcode::Roundps, RoundImm::RoundZero),
|
||||||
|
(Opcode::Trunc, types::F64X2) => (SseOpcode::Roundpd, RoundImm::RoundZero),
|
||||||
|
(Opcode::Nearest, types::F32X4) => (SseOpcode::Roundps, RoundImm::RoundNearest),
|
||||||
|
(Opcode::Nearest, types::F64X2) => (SseOpcode::Roundpd, RoundImm::RoundNearest),
|
||||||
|
_ => panic!("Unknown op/ty combination (vector){:?}", ty),
|
||||||
|
};
|
||||||
|
let src = put_input_in_reg(ctx, inputs[0]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
ctx.emit(Inst::gen_move(dst, src, ty));
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(
|
||||||
|
op,
|
||||||
|
RegMem::reg(dst.to_reg()),
|
||||||
|
dst,
|
||||||
|
mode.encode(),
|
||||||
|
false,
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Load
|
Opcode::Load
|
||||||
|
|||||||
Reference in New Issue
Block a user