Add x86-specific SIMD settings, e.g. SSE2

Also, ties SIMD ISA predicates to the shared enable_simd setting
This commit is contained in:
Andrew Brown
2019-07-11 10:21:12 -07:00
committed by Dan Gohman
parent f2c48009e8
commit 659725b465

View File

@@ -3,6 +3,9 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
pub fn define(shared: &SettingGroup) -> SettingGroup {
let mut settings = SettingGroupBuilder::new("x86");
// CPUID.01H:EDX
let has_sse2 = settings.add_bool("has_sse2", "SSE2: CPUID.01H:EDX.SSE2[bit 26]", false);
// CPUID.01H:ECX
let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
@@ -30,8 +33,15 @@ pub fn define(shared: &SettingGroup) -> SettingGroup {
false,
);
settings.add_predicate("use_sse41", predicate!(has_sse41));
settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42));
let shared_enable_simd = shared.get_bool("enable_simd");
settings.add_predicate("use_sse2", predicate!(shared_enable_simd && has_sse2));
settings.add_predicate("use_ssse3", predicate!(shared_enable_simd && has_ssse3));
settings.add_predicate("use_sse41", predicate!(shared_enable_simd && has_sse41));
settings.add_predicate(
"use_sse42",
predicate!(shared_enable_simd && has_sse41 && has_sse42),
);
settings.add_predicate("use_popcnt", predicate!(has_popcnt && has_sse42));
settings.add_predicate("use_bmi1", predicate!(has_bmi1));
settings.add_predicate("use_lzcnt", predicate!(has_lzcnt));
@@ -59,7 +69,7 @@ pub fn define(shared: &SettingGroup) -> SettingGroup {
settings.add_preset("baseline", preset!());
let nehalem = settings.add_preset(
"nehalem",
preset!(has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt),
preset!(has_sse2 && has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt),
);
let haswell = settings.add_preset(
"haswell",
@@ -72,7 +82,8 @@ pub fn define(shared: &SettingGroup) -> SettingGroup {
settings.add_preset(
"znver1",
preset!(
has_sse3
has_sse2
&& has_sse3
&& has_ssse3
&& has_sse41
&& has_sse42