x64: lower i8x16.shuffle to VPERMI2B when possible
When shuffling values from two different registers, the x64 lowering for `i8x16.shuffle` must first shuffle each register separately and then OR the results with SSE instructions. With `VPERMI2B`, available in AVX512VL + AVX512VBMI, this can be done in a single instruction after the shuffle mask has been moved into the destination register. This change uses `VPERMI2B` for that case when the CPU supports it.
This commit is contained in:
@@ -463,6 +463,7 @@ pub(crate) enum InstructionSet {
|
||||
AVX512BITALG,
|
||||
AVX512DQ,
|
||||
AVX512F,
|
||||
AVX512VBMI,
|
||||
AVX512VL,
|
||||
}
|
||||
|
||||
@@ -999,10 +1000,11 @@ impl fmt::Display for SseOpcode {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum Avx512Opcode {
|
||||
Vcvtudq2ps,
|
||||
Vpabsq,
|
||||
Vpermi2b,
|
||||
Vpmullq,
|
||||
Vpopcntb,
|
||||
}
|
||||
@@ -1015,6 +1017,9 @@ impl Avx512Opcode {
|
||||
smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL]
|
||||
}
|
||||
Avx512Opcode::Vpabsq => smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL],
|
||||
Avx512Opcode::Vpermi2b => {
|
||||
smallvec![InstructionSet::AVX512VL, InstructionSet::AVX512VBMI]
|
||||
}
|
||||
Avx512Opcode::Vpmullq => smallvec![InstructionSet::AVX512VL, InstructionSet::AVX512DQ],
|
||||
Avx512Opcode::Vpopcntb => {
|
||||
smallvec![InstructionSet::AVX512VL, InstructionSet::AVX512BITALG]
|
||||
@@ -1028,6 +1033,7 @@ impl fmt::Debug for Avx512Opcode {
|
||||
let name = match self {
|
||||
Avx512Opcode::Vcvtudq2ps => "vcvtudq2ps",
|
||||
Avx512Opcode::Vpabsq => "vpabsq",
|
||||
Avx512Opcode::Vpermi2b => "vpermi2b",
|
||||
Avx512Opcode::Vpmullq => "vpmullq",
|
||||
Avx512Opcode::Vpopcntb => "vpopcntb",
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user