x64: Add more fma instruction lowerings (#5846)
The relaxed-simd proposal for WebAssembly adds a fused-multiply-add operation for `v128` types so I was poking around at Cranelift's existing support for its `fma` instruction. I was also poking around at the x86_64 ISA's offerings for the FMA operation and ended up with this PR that improves the lowering of the `fma` instruction on the x64 backend in a number of ways: * A libcall-based fallback is now provided for `f32x4` and `f64x2` types in preparation for eventual support of the relaxed-simd proposal. These encodings are horribly slow, but it's expected that if FMA semantics must be guaranteed then it's the best that can be done without the `fma` feature. Otherwise it'll be up to producers (e.g. Wasmtime embedders) whether wasm-level FMA operations should be FMA or multiply-then-add. * In addition to the existing `vfmadd213*` instructions opcodes were added for `vfmadd132*`. The `132` variant is selected based on which argument can have a sinkable load. * Any argument in the `fma` CLIF instruction can now have a `sinkable_load` and it'll generate a single FMA instruction. * All `vfnmadd*` opcodes were added as well. These are pattern-matched where one of the arguments to the CLIF instruction is an `fneg`. I opted to not add a new CLIF instruction here since it seemed like pattern matching was easy enough but I'm also not intimately familiar with the semantics here so if that's the preferred approach I can do that too.
This commit is contained in:
@@ -2281,32 +2281,46 @@ pub(crate) fn emit(
|
||||
let dst = allocs.next(dst.to_reg().to_reg());
|
||||
debug_assert_eq!(src1, dst);
|
||||
let src2 = allocs.next(src2.to_reg());
|
||||
let src3 = src3.clone().to_reg_mem().with_allocs(allocs);
|
||||
let src3 = match src3.clone().to_reg_mem().with_allocs(allocs) {
|
||||
RegMem::Reg { reg } => {
|
||||
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
|
||||
}
|
||||
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
|
||||
};
|
||||
|
||||
let (w, map, opcode) = match op {
|
||||
AvxOpcode::Vfmadd132ss => (false, OpcodeMap::_0F38, 0x99),
|
||||
AvxOpcode::Vfmadd213ss => (false, OpcodeMap::_0F38, 0xA9),
|
||||
AvxOpcode::Vfnmadd132ss => (false, OpcodeMap::_0F38, 0x9D),
|
||||
AvxOpcode::Vfnmadd213ss => (false, OpcodeMap::_0F38, 0xAD),
|
||||
AvxOpcode::Vfmadd132sd => (true, OpcodeMap::_0F38, 0x99),
|
||||
AvxOpcode::Vfmadd213sd => (true, OpcodeMap::_0F38, 0xA9),
|
||||
AvxOpcode::Vfnmadd132sd => (true, OpcodeMap::_0F38, 0x9D),
|
||||
AvxOpcode::Vfnmadd213sd => (true, OpcodeMap::_0F38, 0xAD),
|
||||
AvxOpcode::Vfmadd132ps => (false, OpcodeMap::_0F38, 0x98),
|
||||
AvxOpcode::Vfmadd213ps => (false, OpcodeMap::_0F38, 0xA8),
|
||||
AvxOpcode::Vfnmadd132ps => (false, OpcodeMap::_0F38, 0x9C),
|
||||
AvxOpcode::Vfnmadd213ps => (false, OpcodeMap::_0F38, 0xAC),
|
||||
AvxOpcode::Vfmadd132pd => (true, OpcodeMap::_0F38, 0x98),
|
||||
AvxOpcode::Vfmadd213pd => (true, OpcodeMap::_0F38, 0xA8),
|
||||
AvxOpcode::Vfnmadd132pd => (true, OpcodeMap::_0F38, 0x9C),
|
||||
AvxOpcode::Vfnmadd213pd => (true, OpcodeMap::_0F38, 0xAC),
|
||||
AvxOpcode::Vblendvps => (false, OpcodeMap::_0F3A, 0x4A),
|
||||
AvxOpcode::Vblendvpd => (false, OpcodeMap::_0F3A, 0x4B),
|
||||
AvxOpcode::Vpblendvb => (false, OpcodeMap::_0F3A, 0x4C),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
match src3 {
|
||||
RegMem::Reg { reg: src } => VexInstruction::new()
|
||||
.length(VexVectorLength::V128)
|
||||
.prefix(LegacyPrefixes::_66)
|
||||
.map(map)
|
||||
.w(w)
|
||||
.opcode(opcode)
|
||||
.reg(dst.to_real_reg().unwrap().hw_enc())
|
||||
.rm(src.to_real_reg().unwrap().hw_enc())
|
||||
.vvvv(src2.to_real_reg().unwrap().hw_enc())
|
||||
.encode(sink),
|
||||
_ => todo!(),
|
||||
};
|
||||
VexInstruction::new()
|
||||
.length(VexVectorLength::V128)
|
||||
.prefix(LegacyPrefixes::_66)
|
||||
.map(map)
|
||||
.w(w)
|
||||
.opcode(opcode)
|
||||
.reg(dst.to_real_reg().unwrap().hw_enc())
|
||||
.rm(src3)
|
||||
.vvvv(src2.to_real_reg().unwrap().hw_enc())
|
||||
.encode(sink);
|
||||
}
|
||||
|
||||
Inst::XmmRmRBlendVex {
|
||||
|
||||
Reference in New Issue
Block a user