x64: lower i8x16.shuffle to VPERMI2B when possible
When shuffling values from two different registers, the x64 lowering for `i8x16.shuffle` must first shuffle each register separately and then OR the results with SSE instructions. With `VPERMI2B`, available in AVX512VL + AVX512VBMI, this can be done in a single instruction after the shuffle mask has been moved into the destination register. This change uses `VPERMI2B` for that case when the CPU supports it.
This commit is contained in:
@@ -58,6 +58,12 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
|
||||
"AVX512VL: CPUID.07H:EBX.AVX512VL[bit 31]",
|
||||
false,
|
||||
);
|
||||
let has_avx512vbmi = settings.add_bool(
|
||||
"has_avx512vbmi",
|
||||
"Has support for AVX512VMBI.",
|
||||
"AVX512VBMI: CPUID.07H:ECX.AVX512VBMI[bit 1]",
|
||||
false,
|
||||
);
|
||||
let has_avx512f = settings.add_bool(
|
||||
"has_avx512f",
|
||||
"Has support for AVX512F.",
|
||||
@@ -126,6 +132,10 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
|
||||
"use_avx512vl_simd",
|
||||
predicate!(shared_enable_simd && has_avx512vl),
|
||||
);
|
||||
settings.add_predicate(
|
||||
"use_avx512vbmi_simd",
|
||||
predicate!(shared_enable_simd && has_avx512vbmi),
|
||||
);
|
||||
settings.add_predicate(
|
||||
"use_avx512f_simd",
|
||||
predicate!(shared_enable_simd && has_avx512f),
|
||||
|
||||
Reference in New Issue
Block a user