diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 6148b2ffad..2f21ddf044 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -685,6 +685,7 @@ pub(crate) fn define( let use_sse41 = settings.predicate_by_name("use_sse41"); let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd"); let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); + let use_sse42_simd = settings.predicate_by_name("use_sse42_simd"); // Definitions. let mut e = PerCpuModeEncodings::new(); @@ -2025,21 +2026,25 @@ pub(crate) fn define( e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes)); } - // SIMD icmp using PCMPEQ* - for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) { - let (opcodes, isa_predicate): (&[_], _) = match ty.lane_bits() { - 8 => (&PCMPEQB, None), - 16 => (&PCMPEQW, None), - 32 => (&PCMPEQD, None), - 64 => (&PCMPEQQ, Some(use_sse41_simd)), - _ => panic!("invalid size for SIMD icmp"), - }; - - let instruction = icmp - .bind(Immediate::IntCC(IntCC::Equal)) - .bind(vector(ty, sse_vector_size)); - let template = rec_icscc_fpr.nonrex().opcodes(opcodes); - e.enc_32_64_maybe_isap(instruction, template, isa_predicate); + // SIMD integer comparisons + { + use IntCC::*; + for (ty, cc, opcodes, isa_predicate) in &[ + (I8, Equal, &PCMPEQB[..], None), + (I16, Equal, &PCMPEQW[..], None), + (I32, Equal, &PCMPEQD[..], None), + (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)), + (I8, SignedGreaterThan, &PCMPGTB[..], None), + (I16, SignedGreaterThan, &PCMPGTW[..], None), + (I32, SignedGreaterThan, &PCMPGTD[..], None), + (I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)), + ] { + let instruction = icmp + .bind(Immediate::IntCC(*cc)) + .bind(vector(*ty, sse_vector_size)); + let template = rec_icscc_fpr.nonrex().opcodes(opcodes); + e.enc_32_64_maybe_isap(instruction, template, *isa_predicate); + } } // Reference type instructions diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index 0491028810..ecc64f560d 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -281,6 +281,18 @@ pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29]; /// Compare packed data for equal (SSE2). pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75]; +/// Compare packed signed byte integers for greater than (SSE2). +pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64]; + +/// Compare packed signed doubleword integers for greater than (SSE2). +pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66]; + +/// Compare packed signed quadword integers for greater than (SSE4.2). +pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37]; + +/// Compare packed signed word integers for greater than (SSE2). +pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65]; + /// Extract doubleword or quadword, depending on REX.W (SSE4.1). pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16]; diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif new file mode 100644 index 0000000000..a60b0eaf4c --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif @@ -0,0 +1,27 @@ +test binemit +set enable_simd +target x86_64 skylake + +function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 { +ebb0(v0: i8x16 [%xmm2], v1: i8x16 [%xmm1]): +[-, %xmm2] v2 = icmp sgt v0, v1 ; bin: 66 0f 64 d1 + return v2 +} + +function %icmp_sgt_i16x8(i16x8, i16x8) -> b16x8 { +ebb0(v0: i16x8 [%xmm4], v1: i16x8 [%xmm3]): +[-, %xmm4] v2 = icmp sgt v0, v1 ; bin: 66 0f 65 e3 + return v2 +} + +function %icmp_sgt_i32x4(i32x4, i32x4) -> b32x4 { +ebb0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm5]): +[-, %xmm6] v2 = icmp sgt v0, v1 ; bin: 66 0f 66 f5 + return v2 +} + +function %icmp_sgt_i64x2(i64x2, i64x2) -> b64x2 { +ebb0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]): +[-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7 + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif index 6afbf418d6..d3f2abe304 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif @@ -43,3 +43,26 @@ ebb0: return v5 } ; run + +function %icmp_sgt_i8x16() -> b1 { +ebb0: + v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0] + v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff] + v2 = icmp sgt v0, v1 + v3 = raw_bitcast.i8x16 v2 + v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff] + v7 = icmp eq v3, v4 + v8 = vall_true v7 + return v8 +} +; run + +function %icmp_sgt_i64x2() -> b1 { +ebb0: + v0 = vconst.i64x2 [0 -42] + v1 = vconst.i64x2 [-1 -43] + v2 = icmp sgt v0, v1 + v8 = vall_true v2 + return v8 +} +; run