x64: Add native lowering for scalar fma (#4539)
Use `vfmadd213{ss,sd}` for these lowerings.
This commit is contained in:
@@ -1383,6 +1383,8 @@ impl fmt::Display for SseOpcode {
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum AvxOpcode {
|
||||
Vfmadd213ss,
|
||||
Vfmadd213sd,
|
||||
Vfmadd213ps,
|
||||
Vfmadd213pd,
|
||||
}
|
||||
@@ -1391,8 +1393,10 @@ impl AvxOpcode {
|
||||
/// Which `InstructionSet`s support the opcode?
|
||||
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
|
||||
match self {
|
||||
AvxOpcode::Vfmadd213ps => smallvec![InstructionSet::FMA],
|
||||
AvxOpcode::Vfmadd213pd => smallvec![InstructionSet::FMA],
|
||||
AvxOpcode::Vfmadd213ss
|
||||
| AvxOpcode::Vfmadd213sd
|
||||
| AvxOpcode::Vfmadd213ps
|
||||
| AvxOpcode::Vfmadd213pd => smallvec![InstructionSet::FMA],
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1400,6 +1404,8 @@ impl AvxOpcode {
|
||||
impl fmt::Debug for AvxOpcode {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
let name = match self {
|
||||
AvxOpcode::Vfmadd213ss => "vfmadd213ss",
|
||||
AvxOpcode::Vfmadd213sd => "vfmadd213sd",
|
||||
AvxOpcode::Vfmadd213ps => "vfmadd213ps",
|
||||
AvxOpcode::Vfmadd213pd => "vfmadd213pd",
|
||||
};
|
||||
|
||||
@@ -1742,6 +1742,8 @@ pub(crate) fn emit(
|
||||
let src3 = src3.clone().to_reg_mem().with_allocs(allocs);
|
||||
|
||||
let (w, opcode) = match op {
|
||||
AvxOpcode::Vfmadd213ss => (false, 0xA9),
|
||||
AvxOpcode::Vfmadd213sd => (true, 0xA9),
|
||||
AvxOpcode::Vfmadd213ps => (false, 0xA8),
|
||||
AvxOpcode::Vfmadd213pd => (true, 0xA8),
|
||||
};
|
||||
|
||||
@@ -3531,6 +3531,18 @@ fn test_x64_emit() {
|
||||
// ========================================================
|
||||
// XMM FMA
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r_vex(AvxOpcode::Vfmadd213ss, RegMem::reg(xmm2), xmm1, w_xmm0),
|
||||
"C4E271A9C2",
|
||||
"vfmadd213ss %xmm0, %xmm1, %xmm2, %xmm0",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r_vex(AvxOpcode::Vfmadd213sd, RegMem::reg(xmm5), xmm4, w_xmm3),
|
||||
"C4E2D9A9DD",
|
||||
"vfmadd213sd %xmm3, %xmm4, %xmm5, %xmm3",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r_vex(AvxOpcode::Vfmadd213ps, RegMem::reg(xmm2), xmm1, w_xmm0),
|
||||
"C4E271A8C2",
|
||||
|
||||
@@ -1847,7 +1847,12 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
// Vfmadd uses and defs the dst reg, that is not the case with all
|
||||
// AVX's ops, if you're adding a new op, make sure to correctly define
|
||||
// register uses.
|
||||
assert!(*op == AvxOpcode::Vfmadd213ps || *op == AvxOpcode::Vfmadd213pd);
|
||||
assert!(
|
||||
*op == AvxOpcode::Vfmadd213ss
|
||||
|| *op == AvxOpcode::Vfmadd213sd
|
||||
|| *op == AvxOpcode::Vfmadd213ps
|
||||
|| *op == AvxOpcode::Vfmadd213pd
|
||||
);
|
||||
|
||||
collector.reg_use(src1.to_reg());
|
||||
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||
|
||||
Reference in New Issue
Block a user