x64: lower fcvt_from_uint to VCVTUDQ2PS when possible

When AVX512VL and AVX512F are available, use a single instruction
(`VCVTUDQ2PS`) instead of a length 9-instruction sequence. This
optimization is a port from the legacy x86 backend.
This commit is contained in:
Andrew Brown
2021-05-19 12:20:11 -07:00
parent 3b3b126fe2
commit 54b45d28a3
5 changed files with 93 additions and 63 deletions

View File

@@ -1000,6 +1000,7 @@ impl fmt::Display for SseOpcode {
#[derive(Clone)]
pub enum Avx512Opcode {
Vcvtudq2ps,
Vpabsq,
Vpmullq,
}
@@ -1008,6 +1009,9 @@ impl Avx512Opcode {
/// Which `InstructionSet`s support the opcode?
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
match self {
Avx512Opcode::Vcvtudq2ps => {
smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL]
}
Avx512Opcode::Vpabsq => smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL],
Avx512Opcode::Vpmullq => smallvec![InstructionSet::AVX512VL, InstructionSet::AVX512DQ],
}
@@ -1017,6 +1021,7 @@ impl Avx512Opcode {
impl fmt::Debug for Avx512Opcode {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
let name = match self {
Avx512Opcode::Vcvtudq2ps => "vcvtudq2ps",
Avx512Opcode::Vpabsq => "vpabsq",
Avx512Opcode::Vpmullq => "vpmullq",
};

View File

@@ -1408,16 +1408,17 @@ pub(crate) fn emit(
}
Inst::XmmUnaryRmREvex { op, src, dst } => {
let opcode = match op {
Avx512Opcode::Vpabsq => 0x1f,
let (prefix, map, w, opcode) = match op {
Avx512Opcode::Vpabsq => (LegacyPrefixes::_66, OpcodeMap::_0F38, true, 0x1f),
Avx512Opcode::Vcvtudq2ps => (LegacyPrefixes::_F2, OpcodeMap::_0F, false, 0x7a),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
match src {
RegMem::Reg { reg: src } => EvexInstruction::new()
.length(EvexVectorLength::V128)
.prefix(LegacyPrefixes::_66)
.map(OpcodeMap::_0F38)
.w(true)
.prefix(prefix)
.map(map)
.w(w)
.opcode(opcode)
.reg(dst.to_reg().get_hw_encoding())
.rm(src.get_hw_encoding())

View File

@@ -3889,6 +3889,12 @@ fn test_x64_emit() {
"vpabsq %xmm2, %xmm8",
));
insns.push((
Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vcvtudq2ps, RegMem::reg(xmm2), w_xmm8),
"62717F087AC2",
"vcvtudq2ps %xmm2, %xmm8",
));
// Xmm to int conversions, and conversely.
insns.push((