diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 2f21ddf044..f9e140eead 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -517,6 +517,10 @@ pub(crate) fn define( let x86_pop = x86.by_name("x86_pop"); let x86_pextr = x86.by_name("x86_pextr"); let x86_pinsr = x86.by_name("x86_pinsr"); + let x86_pmaxs = x86.by_name("x86_pmaxs"); + let x86_pmaxu = x86.by_name("x86_pmaxu"); + let x86_pmins = x86.by_name("x86_pmins"); + let x86_pminu = x86.by_name("x86_pminu"); let x86_pshufd = x86.by_name("x86_pshufd"); let x86_pshufb = x86.by_name("x86_pshufb"); let x86_psll = x86.by_name("x86_psll"); @@ -2047,6 +2051,25 @@ pub(crate) fn define( } } + // SIMD min/max + for (ty, inst, opcodes, isa_predicate) in &[ + (I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)), + (I16, x86_pmaxs, &PMAXSW[..], None), + (I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)), + (I8, x86_pmaxu, &PMAXUB[..], None), + (I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)), + (I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)), + (I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)), + (I16, x86_pmins, &PMINSW[..], None), + (I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)), + (I8, x86_pminu, &PMINUB[..], None), + (I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)), + (I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)), + ] { + let inst_ = inst.bind(vector(*ty, sse_vector_size)); + e.enc_32_64_maybe_isap(inst_, rec_fa.opcodes(opcodes), *isa_predicate); + } + // Reference type instructions // Null references implemented as iconst 0. diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs index 7b77ceaf42..04dc6cfe12 100644 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs @@ -487,5 +487,60 @@ pub(crate) fn define( .operands_out(vec![f]), ); + let x = &Operand::new("x", IxN); + let y = &Operand::new("y", IxN); + let a = &Operand::new("a", IxN); + ig.push( + Inst::new( + "x86_pmaxs", + r#" + Maximum of Packed Signed Integers -- Compare signed integers in the first and second + operand and return the maximum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pmaxu", + r#" + Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second + operand and return the maximum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pmins", + r#" + Minimum of Packed Signed Integers -- Compare signed integers in the first and second + operand and return the minimum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pminu", + r#" + Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second + operand and return the minimum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + ig.build() } diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index ecc64f560d..8187283778 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -311,6 +311,54 @@ pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20]; /// Insert word (SSE2). pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4]; +/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE4.1). +pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c]; + +/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum +/// values in xmm1 (SSE4.1). +pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d]; + +/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE2). +pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee]; + +/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE2). +pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde]; + +/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum +/// values in xmm1 (SSE4.1). +pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f]; + +/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE4.1). +pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e]; + +/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE4.1). +pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38]; + +/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum +/// values in xmm1 (SSE4.1). +pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39]; + +/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE2). +pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea]; + +/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE2). +pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda]; + +/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum +/// values in xmm1 (SSE4.1). +pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b]; + +/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE4.1). +pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a]; + /// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of /// the results in xmm1 (SSE2). pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5]; diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif index a60b0eaf4c..aecfbe1ad7 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif @@ -25,3 +25,30 @@ ebb0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]): [-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7 return v2 } + +function %min_max_i8x16(i8x16, i8x16) { +ebb0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm1]): +[-, %xmm3] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3c d9 +[-, %xmm3] v3 = x86_pmaxu v0, v1 ; bin: 66 0f de d9 +[-, %xmm3] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 38 d9 +[-, %xmm3] v5 = x86_pminu v0, v1 ; bin: 66 0f da d9 + return +} + +function %min_max_i16x8(i16x8, i16x8) { +ebb0(v0: i16x8 [%xmm2], v1: i16x8 [%xmm5]): +[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f ee d5 +[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3e d5 +[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f ea d5 +[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3a d5 + return +} + +function %min_max_i32x4(i32x4, i32x4) { +ebb0(v0: i32x4 [%xmm2], v1: i32x4 [%xmm4]): +[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3d d4 +[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3f d4 +[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 39 d4 +[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3b d4 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif index d3f2abe304..ab3a525243 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif @@ -66,3 +66,44 @@ ebb0: return v8 } ; run + +function %maxs_i8x16() -> b1 { +ebb0: + v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 will be greater than -1 == 0xff with + ; signed max + v1 = vconst.i8x16 [0xff 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + v2 = x86_pmaxs v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %maxu_i16x8() -> b1 { +ebb0: + v0 = vconst.i16x8 [0 1 1 1 1 1 1 1] + v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] ; -1 == 0xff will be greater with unsigned max + v2 = x86_pmaxu v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %mins_i32x4() -> b1 { +ebb0: + v0 = vconst.i32x4 [0 1 1 1] + v1 = vconst.i32x4 [-1 1 1 1] ; -1 == 0xff will be less with signed min + v2 = x86_pmins v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %minu_i8x16() -> b1 { +ebb0: + v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 < 2 with unsiged min + v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] + v2 = x86_pminu v0, v1 + v8 = vall_true v2 + return v8 +} +; run