diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index f62019367b..c6161cf43b 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -552,6 +552,26 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]); } + // SIMD fcmp greater-/less-than + let gt = Literal::enumerator_for(&imm.floatcc, "gt"); + let lt = Literal::enumerator_for(&imm.floatcc, "lt"); + let ge = Literal::enumerator_for(&imm.floatcc, "ge"); + let le = Literal::enumerator_for(&imm.floatcc, "le"); + let ugt = Literal::enumerator_for(&imm.floatcc, "ugt"); + let ult = Literal::enumerator_for(&imm.floatcc, "ult"); + let uge = Literal::enumerator_for(&imm.floatcc, "uge"); + let ule = Literal::enumerator_for(&imm.floatcc, "ule"); + for ty in &[F32, F64] { + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]); + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]); + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]); + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]); + } + for ty in &[F32, F64] { let fneg = fneg.bind(vector(*ty, sse_vector_size)); let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index 521248082b..b2ce9e628e 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -3169,7 +3169,7 @@ pub(crate) fn define<'shared>( ); { - let supported_floatccs: Vec = ["eq", "lt", "le", "uno", "ne", "gt", "ge", "ord"] + let supported_floatccs: Vec = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"] .iter() .map(|name| Literal::enumerator_for(floatcc, name)) .collect(); @@ -3189,14 +3189,14 @@ pub(crate) fn define<'shared>( // Add immediate byte indicating what type of comparison. use crate::ir::condcodes::FloatCC::*; let imm = match cond { - Equal => 0x00, - LessThan => 0x01, - LessThanOrEqual => 0x02, - Unordered => 0x03, - NotEqual => 0x04, - GreaterThanOrEqual => 0x05, - GreaterThan => 0x06, - Ordered => 0x07, + Equal => 0x00, + LessThan => 0x01, + LessThanOrEqual => 0x02, + Unordered => 0x03, + NotEqual => 0x04, + UnorderedOrGreaterThanOrEqual => 0x05, + UnorderedOrGreaterThan => 0x06, + Ordered => 0x07, _ => panic!("{} not supported by pfcmp", cond), }; sink.put1(imm); diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif index be8e7d4e8e..722e705a85 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif @@ -60,8 +60,8 @@ ebb0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]): [-, %xmm2] v4 = fcmp le v0, v1 ; bin: 40 0f c2 d4 02 [-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 40 0f c2 d4 03 [-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 40 0f c2 d4 04 -[-, %xmm2] v7 = fcmp ge v0, v1 ; bin: 40 0f c2 d4 05 -[-, %xmm2] v8 = fcmp gt v0, v1 ; bin: 40 0f c2 d4 06 +[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 40 0f c2 d4 05 +[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 40 0f c2 d4 06 [-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 40 0f c2 d4 07 return } @@ -73,8 +73,8 @@ ebb0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]): [-, %xmm2] v4 = fcmp le v0, v1 ; bin: 66 40 0f c2 d0 02 [-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 66 40 0f c2 d0 03 [-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 66 40 0f c2 d0 04 -[-, %xmm2] v7 = fcmp ge v0, v1 ; bin: 66 40 0f c2 d0 05 -[-, %xmm2] v8 = fcmp gt v0, v1 ; bin: 66 40 0f c2 d0 06 +[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 66 40 0f c2 d0 05 +[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 66 40 0f c2 d0 06 [-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 66 40 0f c2 d0 07 return } diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif index e4a5e6fea7..444d4e28bd 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif @@ -231,3 +231,17 @@ ebb0: return v8 } ; run + +function %fcmp_gt_nans_f32x4() -> b1 { +ebb0: + v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN] + v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf] + v2 = fcmp gt v0, v1 + ; now check that the result v2 is all zeroes + v3 = vconst.i32x4 0x00 + v4 = raw_bitcast.i32x4 v2 + v5 = icmp eq v3, v4 + v8 = vall_true v5 + return v8 +} +; run