Add x86 SIMD legalizations for integer greater-than

This includes `icmp ugt`, `icmp sge`, and `icmp uge` for vectors with lanes of I8, I16, and I32.
2019-10-25 10:43:21 -07:00
parent 0ab5760fd7
commit feffed85d2
3 changed files with 88 additions and 1 deletions
--- a/cranelift/codegen/meta/src/isa/x86/legalize.rs
+++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs
@@ -3,7 +3,7 @@ use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
 use crate::cdsl::types::{LaneType, ValueType};
 use crate::cdsl::xform::TransformGroupBuilder;
 use crate::shared::types::Float::F64;
-use crate::shared::types::Int::{I16, I32, I64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
 use crate::shared::Definitions as SharedDefinitions;
 #[allow(clippy::many_single_char_names)]
@@ -69,6 +69,9 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    let x86_bsf = x86_instructions.by_name("x86_bsf");
    let x86_bsr = x86_instructions.by_name("x86_bsr");
    let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
    let x86_pmins = x86_instructions.by_name("x86_pmins");
    let x86_pminu = x86_instructions.by_name("x86_pminu");
    let x86_pshufb = x86_instructions.by_name("x86_pshufb");
    let x86_pshufd = x86_instructions.by_name("x86_pshufd");
    let x86_psll = x86_instructions.by_name("x86_psll");
@@ -506,6 +509,36 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
        );
    }
    // SIMD icmp ugt
    let ugt = Literal::enumerator_for(&imm.intcc, "ugt");
    for ty in &[I8, I16, I32] {
        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
        narrow.legalize(
            def!(c = icmp_(ugt, a, b)),
            vec![def!(x = x86_pmaxu(a, b)), def!(c = icmp(eq, a, x))],
        );
    }
    // SIMD icmp sge
    let sge = Literal::enumerator_for(&imm.intcc, "sge");
    for ty in &[I8, I16, I32] {
        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
        narrow.legalize(
            def!(c = icmp_(sge, a, b)),
            vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))],
        );
    }
    // SIMD icmp uge
    let uge = Literal::enumerator_for(&imm.intcc, "uge");
    for ty in &[I8, I16, I32] {
        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
        narrow.legalize(
            def!(c = icmp_(uge, a, b)),
            vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))],
        );
    }
    narrow.custom_legalize(shuffle, "convert_shuffle");
    narrow.custom_legalize(extractlane, "convert_extractlane");
    narrow.custom_legalize(insertlane, "convert_insertlane");
--- a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif
@@ -10,3 +10,27 @@ ebb0(v0: i32x4, v1: i32x4):
    ; nextln: v2 = bxor v4, v3
    return v2
 }
 function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {
 ebb0(v0: i32x4, v1: i32x4):
    v2 = icmp ugt v0, v1
    ; check: v3 = x86_pmaxu v0, v1
    ; nextln: v2 = icmp eq v0, v3
    return v2
 }
 function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 {
 ebb0(v0: i16x8, v1: i16x8):
    v2 = icmp sge v0, v1
    ; check: v3 = x86_pmins v0, v1
    ; nextln: v2 = icmp eq v3, v1
    return v2
 }
 function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 {
 ebb0(v0: i8x16, v1: i8x16):
    v2 = icmp uge v0, v1
    ; check: v3 = x86_pminu v0, v1
    ; nextln: v2 = icmp eq v3, v1
    return v2
 }
--- a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif
@@ -107,3 +107,33 @@ ebb0:
    return v8
 }
 ; run
 function %icmp_ugt_i8x16() -> b1 {
 ebb0:
    v0 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]
    v1 = vconst.i8x16 [0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
    v2 = icmp ugt v0, v1
    v8 = vall_true v2
    return v8
 }
 ; run
 function %icmp_sge_i16x8() -> b1 {
 ebb0:
    v0 = vconst.i16x8 [-1 1 2 3 4 5 6 7]
    v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1]
    v2 = icmp sge v0, v1
    v8 = vall_true v2
    return v8
 }
 ; run
 function %icmp_uge_i32x4() -> b1 {
 ebb0:
    v0 = vconst.i32x4 [1 2 3 4]
    v1 = vconst.i32x4 [1 1 1 1]
    v2 = icmp uge v0, v1
    v8 = vall_true v2
    return v8
 }
 ; run