x64: lower i8x16.popcnt to VPOPCNTB when possible

When AVX512VL or AVX512BITALG are available, Wasm SIMD's `popcnt`
instruction can be lowered to a single x64 instruction, `VPOPCNTB`,
instead of 8+ instructions.
This commit is contained in:
Andrew Brown
2021-05-24 11:21:07 -07:00
parent 2b0649c74c
commit 459fce3467
6 changed files with 107 additions and 68 deletions

View File

@@ -40,6 +40,12 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
"AVX2: CPUID.07H:EBX.AVX2[bit 5]",
false,
);
let has_avx512bitalg = settings.add_bool(
"has_avx512bitalg",
"Has support for AVX512BITALG.",
"AVX512BITALG: CPUID.07H:ECX.AVX512BITALG[bit 12]",
false,
);
let has_avx512dq = settings.add_bool(
"has_avx512dq",
"Has support for AVX512DQ.",
@@ -108,6 +114,10 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
settings.add_predicate("use_avx_simd", predicate!(shared_enable_simd && has_avx));
settings.add_predicate("use_avx2_simd", predicate!(shared_enable_simd && has_avx2));
settings.add_predicate(
"use_avx512bitalg_simd",
predicate!(shared_enable_simd && has_avx512bitalg),
);
settings.add_predicate(
"use_avx512dq_simd",
predicate!(shared_enable_simd && has_avx512dq),