diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 818e07bd0b..9ce33817c7 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -589,8 +589,9 @@ pub fn define( let use_popcnt = settings.predicate_by_name("use_popcnt"); let use_lzcnt = settings.predicate_by_name("use_lzcnt"); let use_bmi1 = settings.predicate_by_name("use_bmi1"); - let use_ssse3 = settings.predicate_by_name("use_ssse3"); let use_sse41 = settings.predicate_by_name("use_sse41"); + let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd"); + let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); // Definitions. let mut e = PerCpuModeEncodings::new(); @@ -1694,8 +1695,8 @@ pub fn define( for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size); let template = rec_fa.nonrex().opcodes(vec![0x66, 0x0f, 0x38, 00]); - e.enc32_isap(instruction.clone(), template.clone(), use_ssse3); - e.enc64_isap(instruction, template, use_ssse3); + e.enc32_isap(instruction.clone(), template.clone(), use_ssse3_simd); + e.enc64_isap(instruction, template, use_ssse3_simd); } // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate @@ -1726,10 +1727,10 @@ pub fn define( // SIMD insertlane let mut insertlane_mapping: HashMap, Option)> = HashMap::new(); - insertlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x20], Some(use_sse41))); // PINSRB + insertlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x20], Some(use_sse41_simd))); // PINSRB insertlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc4], None)); // PINSRW from SSE2 - insertlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41))); // PINSRD - insertlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41))); // PINSRQ, only x86_64 + insertlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41_simd))); // PINSRD + insertlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41_simd))); // PINSRQ, only x86_64 for ty in ValueType::all_lane_types() { if let Some((opcode, isap)) = insertlane_mapping.get(&ty.lane_bits()) { @@ -1747,10 +1748,10 @@ pub fn define( // SIMD extractlane let mut extractlane_mapping: HashMap, Option)> = HashMap::new(); - extractlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x14], Some(use_sse41))); // PEXTRB + extractlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x14], Some(use_sse41_simd))); // PEXTRB extractlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc5], None)); // PEXTRW from zSSE2, SSE4.1 has a PEXTRW that can move to reg/m16 but the opcode is four bytes - extractlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41))); // PEXTRD - extractlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41))); // PEXTRQ, only x86_64 + extractlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41_simd))); // PEXTRD + extractlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41_simd))); // PEXTRQ, only x86_64 for ty in ValueType::all_lane_types() { if let Some((opcode, isap)) = extractlane_mapping.get(&ty.lane_bits()) { diff --git a/cranelift/codegen/meta/src/isa/x86/settings.rs b/cranelift/codegen/meta/src/isa/x86/settings.rs index 3a42553386..10cb516207 100644 --- a/cranelift/codegen/meta/src/isa/x86/settings.rs +++ b/cranelift/codegen/meta/src/isa/x86/settings.rs @@ -32,12 +32,23 @@ pub fn define(shared: &SettingGroup) -> SettingGroup { let shared_enable_simd = shared.get_bool("enable_simd"); - settings.add_predicate("use_ssse3", predicate!(shared_enable_simd && has_ssse3)); - settings.add_predicate("use_sse41", predicate!(shared_enable_simd && has_sse41)); + settings.add_predicate("use_ssse3", predicate!(has_ssse3)); + settings.add_predicate("use_sse41", predicate!(has_sse41)); + settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42)); + settings.add_predicate( - "use_sse42", + "use_ssse3_simd", + predicate!(shared_enable_simd && has_ssse3), + ); + settings.add_predicate( + "use_sse41_simd", + predicate!(shared_enable_simd && has_sse41), + ); + settings.add_predicate( + "use_sse42_simd", predicate!(shared_enable_simd && has_sse41 && has_sse42), ); + settings.add_predicate("use_popcnt", predicate!(has_popcnt && has_sse42)); settings.add_predicate("use_bmi1", predicate!(has_bmi1)); settings.add_predicate("use_lzcnt", predicate!(has_lzcnt)); diff --git a/cranelift/filetests/filetests/isa/x86/binary32-float.clif b/cranelift/filetests/filetests/isa/x86/binary32-float.clif index c6092e2e22..8a4ae5fe7e 100644 --- a/cranelift/filetests/filetests/isa/x86/binary32-float.clif +++ b/cranelift/filetests/filetests/isa/x86/binary32-float.clif @@ -1,6 +1,5 @@ ; Binary emission of 32-bit floating point code. test binemit -set enable_simd target i686 haswell ; The binary encodings can be verified with the command: diff --git a/cranelift/filetests/filetests/isa/x86/binary64-float.clif b/cranelift/filetests/filetests/isa/x86/binary64-float.clif index 8c34299705..59e024a042 100644 --- a/cranelift/filetests/filetests/isa/x86/binary64-float.clif +++ b/cranelift/filetests/filetests/isa/x86/binary64-float.clif @@ -1,7 +1,6 @@ ; Binary emission of 64-bit floating point code. test binemit set opt_level=best -set enable_simd target x86_64 haswell ; The binary encodings can be verified with the command: