From b4ef90cfcdae28122902717ce248cb5c34b4ab88 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Wed, 17 Jul 2019 09:45:58 -0700 Subject: [PATCH] Remove SSE2 setting for x86 In talking to @sunfishcode, he preferred to avoid the confusion of more ISA predicates by eliminating SSE2. SSE2 was released with the Pentium 4 in 2000 so it is unlikely that current CPUs would have SIMD enabled and not have this feature. I tried to note the SSE2-specific instructions with comments in the code. --- .../codegen/meta/src/isa/x86/encodings.rs | 71 +++++++++++++------ .../codegen/meta/src/isa/x86/settings.rs | 9 +-- .../filetests/filetests/isa/x86/pshufb.clif | 2 +- .../filetests/filetests/isa/x86/pshufd.clif | 2 +- .../filetests/isa/x86/scalar_to_vector.clif | 2 +- 5 files changed, 53 insertions(+), 33 deletions(-) diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 3b4be51a66..c5bbfe1a83 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -268,14 +268,38 @@ impl PerCpuModeEncodings { } /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened - fn enc_32_64_isap( + fn enc_32_64_maybe_isap( &mut self, inst: BoundInstruction, template: Template, - isap: SettingPredicateNumber, + isap: Option, ) { - self.enc32_isap(inst.clone(), template.clone(), isap); - self.enc64_isap(inst, template, isap); + self.enc32_maybe_isap(inst.clone(), template.clone(), isap); + self.enc64_maybe_isap(inst, template, isap); + } + + fn enc32_maybe_isap( + &mut self, + inst: BoundInstruction, + template: Template, + isap: Option, + ) { + match isap { + None => self.enc32(inst, template), + Some(isap) => self.enc32_isap(inst, template, isap), + } + } + + fn enc64_maybe_isap( + &mut self, + inst: BoundInstruction, + template: Template, + isap: Option, + ) { + match isap { + None => self.enc64(inst, template), + Some(isap) => self.enc64_isap(inst, template, isap), + } } } @@ -559,7 +583,6 @@ pub fn define( let use_popcnt = settings.predicate_by_name("use_popcnt"); let use_lzcnt = settings.predicate_by_name("use_lzcnt"); let use_bmi1 = settings.predicate_by_name("use_bmi1"); - let use_sse2 = settings.predicate_by_name("use_sse2"); let use_ssse3 = settings.predicate_by_name("use_ssse3"); let use_sse41 = settings.predicate_by_name("use_sse41"); @@ -1648,8 +1671,8 @@ pub fn define( let template = rec_r_ib_unsigned_fpr .nonrex() .opcodes(vec![0x66, 0x0f, 0x70]); - e.enc32_isap(instruction.clone(), template.clone(), use_sse2); - e.enc64_isap(instruction, template, use_sse2); + e.enc32(instruction.clone(), template.clone()); + e.enc64(instruction, template); } // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according @@ -1662,47 +1685,49 @@ pub fn define( let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ if ty.lane_bits() < 64 { // no 32-bit encodings for 64-bit widths - e.enc32_isap(instruction.clone(), template.clone(), use_sse2); + e.enc32(instruction.clone(), template.clone()); } - e.enc_x86_64_isap(instruction, template, use_sse2); + e.enc_x86_64(instruction, template); } // SIMD insertlane - let mut insertlane_mapping: HashMap, SettingPredicateNumber)> = HashMap::new(); - insertlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x20], use_sse41)); // PINSRB - insertlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc4], use_sse2)); // PINSRW - insertlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x22], use_sse41)); // PINSRD - insertlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x22], use_sse41)); // PINSRQ, only x86_64 + let mut insertlane_mapping: HashMap, Option)> = + HashMap::new(); + insertlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x20], Some(use_sse41))); // PINSRB + insertlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc4], None)); // PINSRW from SSE2 + insertlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41))); // PINSRD + insertlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41))); // PINSRQ, only x86_64 for ty in ValueType::all_lane_types() { if let Some((opcode, isap)) = insertlane_mapping.get(&ty.lane_bits()) { let instruction = insertlane.bind_vector_from_lane(ty, sse_vector_size); let template = rec_r_ib_unsigned_r.opcodes(opcode.clone()); if ty.lane_bits() < 64 { - e.enc_32_64_isap(instruction, template.nonrex(), isap.clone()); + e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone()); } else { // turns out the 64-bit widths have REX/W encodings and only are available on x86_64 - e.enc64_isap(instruction, template.rex().w(), isap.clone()); + e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone()); } } } // SIMD extractlane - let mut extractlane_mapping: HashMap, SettingPredicateNumber)> = HashMap::new(); - extractlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x14], use_sse41)); // PEXTRB - extractlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc5], use_sse2)); // PEXTRW, SSE4.1 has a PEXTRW that can move to reg/m16 but the opcode is four bytes - extractlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x16], use_sse41)); // PEXTRD - extractlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x16], use_sse41)); // PEXTRQ, only x86_64 + let mut extractlane_mapping: HashMap, Option)> = + HashMap::new(); + extractlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x14], Some(use_sse41))); // PEXTRB + extractlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc5], None)); // PEXTRW from zSSE2, SSE4.1 has a PEXTRW that can move to reg/m16 but the opcode is four bytes + extractlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41))); // PEXTRD + extractlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41))); // PEXTRQ, only x86_64 for ty in ValueType::all_lane_types() { if let Some((opcode, isap)) = extractlane_mapping.get(&ty.lane_bits()) { let instruction = extractlane.bind_vector_from_lane(ty, sse_vector_size); let template = rec_r_ib_unsigned_gpr.opcodes(opcode.clone()); if ty.lane_bits() < 64 { - e.enc_32_64_isap(instruction, template.nonrex(), isap.clone()); + e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone()); } else { // turns out the 64-bit widths have REX/W encodings and only are available on x86_64 - e.enc64_isap(instruction, template.rex().w(), isap.clone()); + e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone()); } } } diff --git a/cranelift/codegen/meta/src/isa/x86/settings.rs b/cranelift/codegen/meta/src/isa/x86/settings.rs index bc8c81f484..3a42553386 100644 --- a/cranelift/codegen/meta/src/isa/x86/settings.rs +++ b/cranelift/codegen/meta/src/isa/x86/settings.rs @@ -3,9 +3,6 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder}; pub fn define(shared: &SettingGroup) -> SettingGroup { let mut settings = SettingGroupBuilder::new("x86"); - // CPUID.01H:EDX - let has_sse2 = settings.add_bool("has_sse2", "SSE2: CPUID.01H:EDX.SSE2[bit 26]", false); - // CPUID.01H:ECX let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false); let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false); @@ -35,7 +32,6 @@ pub fn define(shared: &SettingGroup) -> SettingGroup { let shared_enable_simd = shared.get_bool("enable_simd"); - settings.add_predicate("use_sse2", predicate!(shared_enable_simd && has_sse2)); settings.add_predicate("use_ssse3", predicate!(shared_enable_simd && has_ssse3)); settings.add_predicate("use_sse41", predicate!(shared_enable_simd && has_sse41)); settings.add_predicate( @@ -69,7 +65,7 @@ pub fn define(shared: &SettingGroup) -> SettingGroup { settings.add_preset("baseline", preset!()); let nehalem = settings.add_preset( "nehalem", - preset!(has_sse2 && has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt), + preset!(has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt), ); let haswell = settings.add_preset( "haswell", @@ -82,8 +78,7 @@ pub fn define(shared: &SettingGroup) -> SettingGroup { settings.add_preset( "znver1", preset!( - has_sse2 - && has_sse3 + has_sse3 && has_ssse3 && has_sse41 && has_sse42 diff --git a/cranelift/filetests/filetests/isa/x86/pshufb.clif b/cranelift/filetests/filetests/isa/x86/pshufb.clif index 7c23c5ab61..6fb31b198c 100644 --- a/cranelift/filetests/filetests/isa/x86/pshufb.clif +++ b/cranelift/filetests/filetests/isa/x86/pshufb.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 has_sse2=true has_ssse3=true +target x86_64 has_ssse3=true function %test_pshufb() { ebb0: diff --git a/cranelift/filetests/filetests/isa/x86/pshufd.clif b/cranelift/filetests/filetests/isa/x86/pshufd.clif index 183af4fc0e..6f4896d0d9 100644 --- a/cranelift/filetests/filetests/isa/x86/pshufd.clif +++ b/cranelift/filetests/filetests/isa/x86/pshufd.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 has_sse2=true +target x86_64 function %test_pshuf() { ebb0: diff --git a/cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif b/cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif index 6c77dfafdb..51ddea3e7e 100644 --- a/cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif +++ b/cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif @@ -1,7 +1,7 @@ test binemit set opt_level=best set enable_simd -target x86_64 has_sse2=true +target x86_64 function %test_scalar_to_vector_b8() { ebb0: