x64: lower i8x16.popcnt to VPOPCNTB when possible

When AVX512VL or AVX512BITALG are available, Wasm SIMD's `popcnt`
instruction can be lowered to a single x64 instruction, `VPOPCNTB`,
instead of 8+ instructions.
This commit is contained in:
Andrew Brown
2021-05-24 11:21:07 -07:00
parent 2b0649c74c
commit 459fce3467
6 changed files with 107 additions and 68 deletions

View File

@@ -3895,6 +3895,12 @@ fn test_x64_emit() {
"vcvtudq2ps %xmm2, %xmm8",
));
insns.push((
Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpopcntb, RegMem::reg(xmm2), w_xmm8),
"62727D0854C2",
"vpopcntb %xmm2, %xmm8",
));
// Xmm to int conversions, and conversely.
insns.push((
@@ -4308,6 +4314,7 @@ fn test_x64_emit() {
isa_flag_builder.enable("has_sse41").unwrap();
isa_flag_builder.enable("has_avx512f").unwrap();
isa_flag_builder.enable("has_avx512dq").unwrap();
isa_flag_builder.enable("has_avx512vl").unwrap();
let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);
let rru = regs::create_reg_universe_systemv(&flags);