diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index f9e140eead..3d57444cce 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -610,6 +610,7 @@ pub(crate) fn define( let rec_null_fpr = r.recipe("null_fpr"); let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8"); let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8"); + let rec_pfcmp = r.template("pfcmp"); let rec_popq = r.template("popq"); let rec_pu_id = r.template("pu_id"); let rec_pu_id_bool = r.template("pu_id_bool"); @@ -2070,6 +2071,16 @@ pub(crate) fn define( e.enc_32_64_maybe_isap(inst_, rec_fa.opcodes(opcodes), *isa_predicate); } + // SIMD float comparisons + e.enc_both( + fcmp.bind(vector(F32, sse_vector_size)), + rec_pfcmp.opcodes(&CMPPS), + ); + e.enc_both( + fcmp.bind(vector(F64, sse_vector_size)), + rec_pfcmp.opcodes(&CMPPD), + ); + // Reference type instructions // Null references implemented as iconst 0. diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index 8187283778..fde15899e7 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -61,6 +61,14 @@ pub static CMP_IMM8: [u8; 1] = [0x83]; /// Compare r{16,32,64} with r/m of the same size. pub static CMP_REG: [u8; 1] = [0x39]; +/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of +/// imm8 as comparison predicate (SSE2). +pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2]; + +/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of +/// imm8 as comparison predicate (SSE). +pub static CMPPS: [u8; 2] = [0x0f, 0xc2]; + /// Convert scalar double-precision floating-point value to scalar single-precision /// floating-point value. pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a]; diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index 83d30a37c2..6b5367a50e 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -3015,6 +3015,43 @@ pub(crate) fn define<'shared>( ), ); + { + let supported_floatccs: Vec = ["eq", "lt", "le", "uno", "ne", "gt", "ge", "ord"] + .iter() + .map(|name| Literal::enumerator_for(floatcc, name)) + .collect(); + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![0]) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs[..], + &*formats.float_compare, + )) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + // Add immediate byte indicating what type of comparison. + use crate::ir::condcodes::FloatCC::*; + let imm = match cond { + Equal => 0x00, + LessThan => 0x01, + LessThanOrEqual => 0x02, + Unordered => 0x03, + NotEqual => 0x04, + GreaterThanOrEqual => 0x05, + GreaterThan => 0x06, + Ordered => 0x07, + _ => panic!("{} not supported by pfcmp", cond), + }; + sink.put1(imm); + "#, + ), + ); + } + recipes.add_template_recipe( EncodingRecipeBuilder::new("is_zero", &formats.unary, 2 + 2) .operands_in(vec![gpr]) diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif index aecfbe1ad7..be8e7d4e8e 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif @@ -52,3 +52,29 @@ ebb0(v0: i32x4 [%xmm2], v1: i32x4 [%xmm4]): [-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3b d4 return } + +function %fcmp_f32x4(f32x4, f32x4) { +ebb0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]): +[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 40 0f c2 d4 00 +[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 40 0f c2 d4 01 +[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 40 0f c2 d4 02 +[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 40 0f c2 d4 03 +[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 40 0f c2 d4 04 +[-, %xmm2] v7 = fcmp ge v0, v1 ; bin: 40 0f c2 d4 05 +[-, %xmm2] v8 = fcmp gt v0, v1 ; bin: 40 0f c2 d4 06 +[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 40 0f c2 d4 07 + return +} + +function %fcmp_f64x2(f64x2, f64x2) { +ebb0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]): +[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 66 40 0f c2 d0 00 +[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 66 40 0f c2 d0 01 +[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 66 40 0f c2 d0 02 +[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 66 40 0f c2 d0 03 +[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 66 40 0f c2 d0 04 +[-, %xmm2] v7 = fcmp ge v0, v1 ; bin: 66 40 0f c2 d0 05 +[-, %xmm2] v8 = fcmp gt v0, v1 ; bin: 66 40 0f c2 d0 06 +[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 66 40 0f c2 d0 07 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif index 5d96585be0..4b9da6e4a2 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif @@ -177,3 +177,43 @@ ebb0: return v8 } ; run + +function %fcmp_eq_f32x4() -> b1 { +ebb0: + v0 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0] + v1 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0] + v2 = fcmp eq v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %fcmp_lt_f32x4() -> b1 { +ebb0: + v0 = vconst.f32x4 [0.0 -0x4.2 0x0.0 -0.0] + v1 = vconst.f32x4 [0x0.001 0x4.2 0x0.33333 0x1.0] + v2 = fcmp lt v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %fcmp_ge_f64x2() -> b1 { +ebb0: + v0 = vconst.f64x2 [0x0.0 0x4.2] + v1 = vconst.f64x2 [0.0 0x4.1] + v2 = fcmp ge v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %fcmp_uno_f64x2() -> b1 { +ebb0: + v0 = vconst.f64x2 [0.0 NaN] + v1 = vconst.f64x2 [NaN 0x4.1] + v2 = fcmp uno v0, v1 + v8 = vall_true v2 + return v8 +} +; run