diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 1aa76798ea..e30ea592a9 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -3,7 +3,7 @@ use crate::cdsl::instructions::{vector, Bindable, InstructionGroup}; use crate::cdsl::types::{LaneType, ValueType}; use crate::cdsl::xform::TransformGroupBuilder; use crate::shared::types::Float::F64; -use crate::shared::types::Int::{I16, I32, I64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; use crate::shared::Definitions as SharedDefinitions; #[allow(clippy::many_single_char_names)] @@ -69,6 +69,9 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let x86_bsf = x86_instructions.by_name("x86_bsf"); let x86_bsr = x86_instructions.by_name("x86_bsr"); + let x86_pmaxu = x86_instructions.by_name("x86_pmaxu"); + let x86_pmins = x86_instructions.by_name("x86_pmins"); + let x86_pminu = x86_instructions.by_name("x86_pminu"); let x86_pshufb = x86_instructions.by_name("x86_pshufb"); let x86_pshufd = x86_instructions.by_name("x86_pshufd"); let x86_psll = x86_instructions.by_name("x86_psll"); @@ -506,6 +509,36 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct ); } + // SIMD icmp ugt + let ugt = Literal::enumerator_for(&imm.intcc, "ugt"); + for ty in &[I8, I16, I32] { + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(c = icmp_(ugt, a, b)), + vec![def!(x = x86_pmaxu(a, b)), def!(c = icmp(eq, a, x))], + ); + } + + // SIMD icmp sge + let sge = Literal::enumerator_for(&imm.intcc, "sge"); + for ty in &[I8, I16, I32] { + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(c = icmp_(sge, a, b)), + vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))], + ); + } + + // SIMD icmp uge + let uge = Literal::enumerator_for(&imm.intcc, "uge"); + for ty in &[I8, I16, I32] { + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(c = icmp_(uge, a, b)), + vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))], + ); + } + narrow.custom_legalize(shuffle, "convert_shuffle"); narrow.custom_legalize(extractlane, "convert_extractlane"); narrow.custom_legalize(insertlane, "convert_insertlane"); diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif index 4555a25b42..b4d9681285 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif @@ -10,3 +10,27 @@ ebb0(v0: i32x4, v1: i32x4): ; nextln: v2 = bxor v4, v3 return v2 } + +function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 { +ebb0(v0: i32x4, v1: i32x4): + v2 = icmp ugt v0, v1 + ; check: v3 = x86_pmaxu v0, v1 + ; nextln: v2 = icmp eq v0, v3 + return v2 +} + +function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 { +ebb0(v0: i16x8, v1: i16x8): + v2 = icmp sge v0, v1 + ; check: v3 = x86_pmins v0, v1 + ; nextln: v2 = icmp eq v3, v1 + return v2 +} + +function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 { +ebb0(v0: i8x16, v1: i8x16): + v2 = icmp uge v0, v1 + ; check: v3 = x86_pminu v0, v1 + ; nextln: v2 = icmp eq v3, v1 + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif index ab3a525243..0cffbc6708 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif @@ -107,3 +107,33 @@ ebb0: return v8 } ; run + +function %icmp_ugt_i8x16() -> b1 { +ebb0: + v0 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16] + v1 = vconst.i8x16 [0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + v2 = icmp ugt v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %icmp_sge_i16x8() -> b1 { +ebb0: + v0 = vconst.i16x8 [-1 1 2 3 4 5 6 7] + v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] + v2 = icmp sge v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %icmp_uge_i32x4() -> b1 { +ebb0: + v0 = vconst.i32x4 [1 2 3 4] + v1 = vconst.i32x4 [1 1 1 1] + v2 = icmp uge v0, v1 + v8 = vall_true v2 + return v8 +} +; run