x64: lower i8x16.popcnt to VPOPCNTB when possible
When AVX512VL or AVX512BITALG are available, Wasm SIMD's `popcnt` instruction can be lowered to a single x64 instruction, `VPOPCNTB`, instead of 8+ instructions.
This commit is contained in:
@@ -460,9 +460,10 @@ pub(crate) enum InstructionSet {
|
||||
BMI1,
|
||||
#[allow(dead_code)] // never constructed (yet).
|
||||
BMI2,
|
||||
AVX512BITALG,
|
||||
AVX512DQ,
|
||||
AVX512F,
|
||||
AVX512VL,
|
||||
AVX512DQ,
|
||||
}
|
||||
|
||||
/// Some SSE operations requiring 2 operands r/m and r.
|
||||
@@ -1003,6 +1004,7 @@ pub enum Avx512Opcode {
|
||||
Vcvtudq2ps,
|
||||
Vpabsq,
|
||||
Vpmullq,
|
||||
Vpopcntb,
|
||||
}
|
||||
|
||||
impl Avx512Opcode {
|
||||
@@ -1014,6 +1016,9 @@ impl Avx512Opcode {
|
||||
}
|
||||
Avx512Opcode::Vpabsq => smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL],
|
||||
Avx512Opcode::Vpmullq => smallvec![InstructionSet::AVX512VL, InstructionSet::AVX512DQ],
|
||||
Avx512Opcode::Vpopcntb => {
|
||||
smallvec![InstructionSet::AVX512VL, InstructionSet::AVX512BITALG]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1024,6 +1029,7 @@ impl fmt::Debug for Avx512Opcode {
|
||||
Avx512Opcode::Vcvtudq2ps => "vcvtudq2ps",
|
||||
Avx512Opcode::Vpabsq => "vpabsq",
|
||||
Avx512Opcode::Vpmullq => "vpmullq",
|
||||
Avx512Opcode::Vpopcntb => "vpopcntb",
|
||||
};
|
||||
write!(fmt, "{}", name)
|
||||
}
|
||||
|
||||
@@ -126,9 +126,10 @@ pub(crate) fn emit(
|
||||
InstructionSet::Lzcnt => info.isa_flags.use_lzcnt(),
|
||||
InstructionSet::BMI1 => info.isa_flags.use_bmi1(),
|
||||
InstructionSet::BMI2 => info.isa_flags.has_bmi2(),
|
||||
InstructionSet::AVX512BITALG => info.isa_flags.has_avx512bitalg(),
|
||||
InstructionSet::AVX512F => info.isa_flags.has_avx512f(),
|
||||
InstructionSet::AVX512VL => info.isa_flags.has_avx512vl(),
|
||||
InstructionSet::AVX512DQ => info.isa_flags.has_avx512dq(),
|
||||
InstructionSet::AVX512VL => info.isa_flags.has_avx512vl(),
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1409,8 +1410,9 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::XmmUnaryRmREvex { op, src, dst } => {
|
||||
let (prefix, map, w, opcode) = match op {
|
||||
Avx512Opcode::Vpabsq => (LegacyPrefixes::_66, OpcodeMap::_0F38, true, 0x1f),
|
||||
Avx512Opcode::Vcvtudq2ps => (LegacyPrefixes::_F2, OpcodeMap::_0F, false, 0x7a),
|
||||
Avx512Opcode::Vpabsq => (LegacyPrefixes::_66, OpcodeMap::_0F38, true, 0x1f),
|
||||
Avx512Opcode::Vpopcntb => (LegacyPrefixes::_66, OpcodeMap::_0F38, false, 0x54),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
match src {
|
||||
|
||||
@@ -3895,6 +3895,12 @@ fn test_x64_emit() {
|
||||
"vcvtudq2ps %xmm2, %xmm8",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpopcntb, RegMem::reg(xmm2), w_xmm8),
|
||||
"62727D0854C2",
|
||||
"vpopcntb %xmm2, %xmm8",
|
||||
));
|
||||
|
||||
// Xmm to int conversions, and conversely.
|
||||
|
||||
insns.push((
|
||||
@@ -4308,6 +4314,7 @@ fn test_x64_emit() {
|
||||
isa_flag_builder.enable("has_sse41").unwrap();
|
||||
isa_flag_builder.enable("has_avx512f").unwrap();
|
||||
isa_flag_builder.enable("has_avx512dq").unwrap();
|
||||
isa_flag_builder.enable("has_avx512vl").unwrap();
|
||||
let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);
|
||||
|
||||
let rru = regs::create_reg_universe_systemv(&flags);
|
||||
|
||||
Reference in New Issue
Block a user