x64: Add native lowering for scalar fma (#4539)

Use `vfmadd213{ss,sd}` for these lowerings.
This commit is contained in:
Afonso Bordado
2022-08-11 23:48:16 +01:00
committed by GitHub
parent 755cd4311e
commit 3ea1813173
10 changed files with 124 additions and 6 deletions

View File

@@ -1383,6 +1383,8 @@ impl fmt::Display for SseOpcode {
#[derive(Clone, PartialEq)]
pub enum AvxOpcode {
Vfmadd213ss,
Vfmadd213sd,
Vfmadd213ps,
Vfmadd213pd,
}
@@ -1391,8 +1393,10 @@ impl AvxOpcode {
/// Which `InstructionSet`s support the opcode?
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
match self {
AvxOpcode::Vfmadd213ps => smallvec![InstructionSet::FMA],
AvxOpcode::Vfmadd213pd => smallvec![InstructionSet::FMA],
AvxOpcode::Vfmadd213ss
| AvxOpcode::Vfmadd213sd
| AvxOpcode::Vfmadd213ps
| AvxOpcode::Vfmadd213pd => smallvec![InstructionSet::FMA],
}
}
}
@@ -1400,6 +1404,8 @@ impl AvxOpcode {
impl fmt::Debug for AvxOpcode {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
let name = match self {
AvxOpcode::Vfmadd213ss => "vfmadd213ss",
AvxOpcode::Vfmadd213sd => "vfmadd213sd",
AvxOpcode::Vfmadd213ps => "vfmadd213ps",
AvxOpcode::Vfmadd213pd => "vfmadd213pd",
};

View File

@@ -1742,6 +1742,8 @@ pub(crate) fn emit(
let src3 = src3.clone().to_reg_mem().with_allocs(allocs);
let (w, opcode) = match op {
AvxOpcode::Vfmadd213ss => (false, 0xA9),
AvxOpcode::Vfmadd213sd => (true, 0xA9),
AvxOpcode::Vfmadd213ps => (false, 0xA8),
AvxOpcode::Vfmadd213pd => (true, 0xA8),
};

View File

@@ -3531,6 +3531,18 @@ fn test_x64_emit() {
// ========================================================
// XMM FMA
insns.push((
Inst::xmm_rm_r_vex(AvxOpcode::Vfmadd213ss, RegMem::reg(xmm2), xmm1, w_xmm0),
"C4E271A9C2",
"vfmadd213ss %xmm0, %xmm1, %xmm2, %xmm0",
));
insns.push((
Inst::xmm_rm_r_vex(AvxOpcode::Vfmadd213sd, RegMem::reg(xmm5), xmm4, w_xmm3),
"C4E2D9A9DD",
"vfmadd213sd %xmm3, %xmm4, %xmm5, %xmm3",
));
insns.push((
Inst::xmm_rm_r_vex(AvxOpcode::Vfmadd213ps, RegMem::reg(xmm2), xmm1, w_xmm0),
"C4E271A8C2",

View File

@@ -1847,7 +1847,12 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
// Vfmadd uses and defs the dst reg, that is not the case with all
// AVX's ops, if you're adding a new op, make sure to correctly define
// register uses.
assert!(*op == AvxOpcode::Vfmadd213ps || *op == AvxOpcode::Vfmadd213pd);
assert!(
*op == AvxOpcode::Vfmadd213ss
|| *op == AvxOpcode::Vfmadd213sd
|| *op == AvxOpcode::Vfmadd213ps
|| *op == AvxOpcode::Vfmadd213pd
);
collector.reg_use(src1.to_reg());
collector.reg_reuse_def(dst.to_writable_reg(), 0);