x64: lower fcvt_from_uint to VCVTUDQ2PS when possible
When AVX512VL and AVX512F are available, use a single instruction (`VCVTUDQ2PS`) instead of a length 9-instruction sequence. This optimization is a port from the legacy x86 backend.
This commit is contained in:
@@ -1000,6 +1000,7 @@ impl fmt::Display for SseOpcode {
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum Avx512Opcode {
|
||||
Vcvtudq2ps,
|
||||
Vpabsq,
|
||||
Vpmullq,
|
||||
}
|
||||
@@ -1008,6 +1009,9 @@ impl Avx512Opcode {
|
||||
/// Which `InstructionSet`s support the opcode?
|
||||
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
|
||||
match self {
|
||||
Avx512Opcode::Vcvtudq2ps => {
|
||||
smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL]
|
||||
}
|
||||
Avx512Opcode::Vpabsq => smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL],
|
||||
Avx512Opcode::Vpmullq => smallvec![InstructionSet::AVX512VL, InstructionSet::AVX512DQ],
|
||||
}
|
||||
@@ -1017,6 +1021,7 @@ impl Avx512Opcode {
|
||||
impl fmt::Debug for Avx512Opcode {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
let name = match self {
|
||||
Avx512Opcode::Vcvtudq2ps => "vcvtudq2ps",
|
||||
Avx512Opcode::Vpabsq => "vpabsq",
|
||||
Avx512Opcode::Vpmullq => "vpmullq",
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user