diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs index fdc5ad8bf8..0ad5c1397a 100644 --- a/cranelift/codegen/meta/src/cdsl/instructions.rs +++ b/cranelift/codegen/meta/src/cdsl/instructions.rs @@ -628,6 +628,9 @@ pub enum FormatPredicateKind { /// Is the referenced data object colocated? IsColocatedData, + + /// Does the operation have a specific condition code? + HasConditionCode(&'static str), } #[derive(Clone, Hash, PartialEq, Eq)] @@ -714,6 +717,10 @@ impl FormatPredicateNode { FormatPredicateKind::IsColocatedData => { format!("predicates::is_colocated_data({}, func)", self.member_name) } + FormatPredicateKind::HasConditionCode(code) => format!( + "predicates::match_condition_code_to_str({}, \"{}\")", + self.member_name, code + ), } } } @@ -997,6 +1004,18 @@ impl InstructionPredicate { )) } + pub fn new_has_condition_code( + format: &InstructionFormat, + condition_code: &'static str, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::HasConditionCode(condition_code), + )) + } + pub fn and(mut self, new_node: InstructionPredicateNode) -> Self { let node = self.node; let mut and_nodes = match node { diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 6c578c458a..453ecc37dc 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -573,6 +573,7 @@ pub(crate) fn define( let rec_gvaddr4 = r.template("gvaddr4"); let rec_gvaddr8 = r.template("gvaddr8"); let rec_icscc = r.template("icscc"); + let rec_icscc_fpr = r.template("icscc_fpr"); let rec_icscc_ib = r.template("icscc_ib"); let rec_icscc_id = r.template("icscc_id"); let rec_indirect_jmp = r.template("indirect_jmp"); @@ -2058,6 +2059,31 @@ pub(crate) fn define( e.enc_32_64(iadd, rec_fa.opcodes(opcodes.to_vec())); } + // SIMD icmp using PCMPEQ* + let mut pcmpeq_mapping: HashMap, Option)> = + HashMap::new(); + pcmpeq_mapping.insert(8, (vec![0x66, 0x0f, 0x74], None)); // PCMPEQB from SSE2 + pcmpeq_mapping.insert(16, (vec![0x66, 0x0f, 0x75], None)); // PCMPEQW from SSE2 + pcmpeq_mapping.insert(32, (vec![0x66, 0x0f, 0x76], None)); // PCMPEQD from SSE2 + pcmpeq_mapping.insert(64, (vec![0x66, 0x0f, 0x38, 0x29], Some(use_sse41_simd))); // PCMPEQQ from SSE4.1 + for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) { + if let Some((opcodes, isa_predicate)) = pcmpeq_mapping.get(&ty.lane_bits()) { + let instruction = icmp.bind_vector_from_lane(ty, sse_vector_size); + let f_int_compare = formats.get(formats.by_name("IntCompare")); + let has_eq_condition_code = + InstructionPredicate::new_has_condition_code(f_int_compare, "eq", "cond"); + let template = rec_icscc_fpr.nonrex().opcodes(opcodes.clone()); + e.enc_32_64_func(instruction, template, |builder| { + let builder = builder.inst_predicate(has_eq_condition_code); + if let Some(p) = isa_predicate { + builder.isa_predicate(*p) + } else { + builder + } + }); + } + } + // Reference type instructions // Null references implemented as iconst 0. diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index e37759e892..57057ce6bb 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -329,6 +329,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct def!(y = splat_any8x16(x)), vec![ def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register + // TODO replace the following two instructions with `vconst(0)` when this is possible; see https://github.com/CraneStation/cranelift/issues/1052 def!(b = f64const(ieee64_zero)), // zero out a different XMM register; the shuffle mask for moving the lowest byte to all other byte lanes is 0x0 def!(c = bitcast_f64_to_any8x16(b)), // no instruction emitted; informs the SSA that the 0 in b can be used as a vector of this type def!(y = x86_pshufb(a, c)), // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b) diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index b9a10c86f6..da602b9973 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -2940,6 +2940,19 @@ pub(crate) fn define<'shared>( ), ); + recipes.add_template_recipe( + EncodingRecipeBuilder::new("icscc_fpr", f_int_compare, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![0]) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + ); + { let format = formats.get(f_int_compare_imm); diff --git a/cranelift/codegen/src/predicates.rs b/cranelift/codegen/src/predicates.rs index 16672b145b..cb56fb0100 100644 --- a/cranelift/codegen/src/predicates.rs +++ b/cranelift/codegen/src/predicates.rs @@ -10,6 +10,8 @@ //! dead code warning. use crate::ir; +use crate::ir::condcodes::IntCC; +use std::string::ToString; /// Check that an integer value is zero. #[allow(dead_code)] @@ -83,6 +85,14 @@ pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function value_list.len(&func.dfg.value_lists) == num } +#[allow(dead_code)] +pub fn match_condition_code_to_str( + condition_code: IntCC, + stringified_condition_code: &str, +) -> bool { + condition_code.to_string().eq(stringified_condition_code) +} + #[cfg(test)] mod tests { use super::*; @@ -136,4 +146,10 @@ mod tests { assert!(!is_all_ones_128_bit(&[0; 16])); assert!(is_all_ones_128_bit(&[0xff; 16])); } + + #[test] + fn condition_code() { + assert!(match_condition_code_to_str(IntCC::Equal, "eq")); + assert!(!match_condition_code_to_str(IntCC::Equal, "ne")); + } } diff --git a/cranelift/filetests/filetests/isa/x86/icmp-compile.clif b/cranelift/filetests/filetests/isa/x86/icmp-compile.clif new file mode 100644 index 0000000000..cf9cb3ff07 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/icmp-compile.clif @@ -0,0 +1,35 @@ +test binemit +set enable_simd +target x86_64 skylake + +function %icmp_i8x16() { +ebb0: +[-, %xmm3] v0 = vconst.i8x16 0x00 ; bin: 66 0f ef db +[-, %xmm4] v1 = vconst.i8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 e4 +[-, %xmm3] v2 = icmp eq v0, v1 ; bin: 66 0f 74 dc + return +} + +function %icmp_i16x8() { +ebb0: +[-, %xmm0] v0 = vconst.i16x8 0x00 +[-, %xmm7] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff +[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 75 c7 + return +} + +function %icmp_i32x4() { +ebb0: +[-, %xmm0] v0 = vconst.i32x4 0x00 +[-, %xmm4] v1 = vconst.i32x4 0xffffffffffffffffffffffffffffffff +[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 76 c4 + return +} + +function %icmp_i64x2() { +ebb0: +[-, %xmm0] v0 = vconst.i64x2 0x00 +[-, %xmm1] v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff +[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 38 29 c1 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/icmp-run.clif b/cranelift/filetests/filetests/isa/x86/icmp-run.clif new file mode 100644 index 0000000000..c470af662a --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/icmp-run.clif @@ -0,0 +1,24 @@ +test run +set enable_simd + +function %run_icmp_i8x16() -> b8 { +ebb0: + v0 = vconst.i8x16 0x00 + v1 = vconst.i8x16 0x00 + v2 = icmp eq v0, v1 + v3 = extractlane v2, 0 + return v3 +} + +; run + +function %run_icmp_i64x2() -> b64 { +ebb0: + v0 = vconst.i64x2 0xffffffffffffffffffffffffffffffff + v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff + v2 = icmp eq v0, v1 + v3 = extractlane v2, 1 + return v3 +} + +; run