x64: lower i8x16.shuffle to VPERMI2B when possible
When shuffling values from two different registers, the x64 lowering for `i8x16.shuffle` must first shuffle each register separately and then OR the results with SSE instructions. With `VPERMI2B`, available in AVX512VL + AVX512VBMI, this can be done in a single instruction after the shuffle mask has been moved into the destination register. This change uses `VPERMI2B` for that case when the CPU supports it.
This commit is contained in:
@@ -97,11 +97,14 @@ pub fn builder_with_options(
|
||||
if std::is_x86_feature_detected!("avx512dq") {
|
||||
isa_builder.enable("has_avx512dq").unwrap();
|
||||
}
|
||||
if std::is_x86_feature_detected!("avx512f") {
|
||||
isa_builder.enable("has_avx512f").unwrap();
|
||||
}
|
||||
if std::is_x86_feature_detected!("avx512vl") {
|
||||
isa_builder.enable("has_avx512vl").unwrap();
|
||||
}
|
||||
if std::is_x86_feature_detected!("avx512f") {
|
||||
isa_builder.enable("has_avx512f").unwrap();
|
||||
if std::is_x86_feature_detected!("avx512vbmi") {
|
||||
isa_builder.enable("has_avx512vbmi").unwrap();
|
||||
}
|
||||
if std::is_x86_feature_detected!("lzcnt") {
|
||||
isa_builder.enable("has_lzcnt").unwrap();
|
||||
|
||||
Reference in New Issue
Block a user