x64: lower i8x16.popcnt to VPOPCNTB when possible

When AVX512VL or AVX512BITALG are available, Wasm SIMD's `popcnt`
instruction can be lowered to a single x64 instruction, `VPOPCNTB`,
instead of 8+ instructions.
This commit is contained in:
Andrew Brown
2021-05-24 11:21:07 -07:00
parent 2b0649c74c
commit 459fce3467
6 changed files with 107 additions and 68 deletions

View File

@@ -126,9 +126,10 @@ pub(crate) fn emit(
InstructionSet::Lzcnt => info.isa_flags.use_lzcnt(),
InstructionSet::BMI1 => info.isa_flags.use_bmi1(),
InstructionSet::BMI2 => info.isa_flags.has_bmi2(),
InstructionSet::AVX512BITALG => info.isa_flags.has_avx512bitalg(),
InstructionSet::AVX512F => info.isa_flags.has_avx512f(),
InstructionSet::AVX512VL => info.isa_flags.has_avx512vl(),
InstructionSet::AVX512DQ => info.isa_flags.has_avx512dq(),
InstructionSet::AVX512VL => info.isa_flags.has_avx512vl(),
}
};
@@ -1409,8 +1410,9 @@ pub(crate) fn emit(
Inst::XmmUnaryRmREvex { op, src, dst } => {
let (prefix, map, w, opcode) = match op {
Avx512Opcode::Vpabsq => (LegacyPrefixes::_66, OpcodeMap::_0F38, true, 0x1f),
Avx512Opcode::Vcvtudq2ps => (LegacyPrefixes::_F2, OpcodeMap::_0F, false, 0x7a),
Avx512Opcode::Vpabsq => (LegacyPrefixes::_66, OpcodeMap::_0F38, true, 0x1f),
Avx512Opcode::Vpopcntb => (LegacyPrefixes::_66, OpcodeMap::_0F38, false, 0x54),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
match src {