Add x86 SIMD legalizations for integer greater-than

This includes `icmp ugt`, `icmp sge`, and `icmp uge` for vectors with lanes of I8, I16, and I32.
This commit is contained in:
Andrew Brown
2019-10-25 10:43:21 -07:00
parent 0ab5760fd7
commit feffed85d2
3 changed files with 88 additions and 1 deletions

View File

@@ -3,7 +3,7 @@ use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
use crate::cdsl::types::{LaneType, ValueType}; use crate::cdsl::types::{LaneType, ValueType};
use crate::cdsl::xform::TransformGroupBuilder; use crate::cdsl::xform::TransformGroupBuilder;
use crate::shared::types::Float::F64; use crate::shared::types::Float::F64;
use crate::shared::types::Int::{I16, I32, I64}; use crate::shared::types::Int::{I16, I32, I64, I8};
use crate::shared::Definitions as SharedDefinitions; use crate::shared::Definitions as SharedDefinitions;
#[allow(clippy::many_single_char_names)] #[allow(clippy::many_single_char_names)]
@@ -69,6 +69,9 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let x86_bsf = x86_instructions.by_name("x86_bsf"); let x86_bsf = x86_instructions.by_name("x86_bsf");
let x86_bsr = x86_instructions.by_name("x86_bsr"); let x86_bsr = x86_instructions.by_name("x86_bsr");
let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
let x86_pmins = x86_instructions.by_name("x86_pmins");
let x86_pminu = x86_instructions.by_name("x86_pminu");
let x86_pshufb = x86_instructions.by_name("x86_pshufb"); let x86_pshufb = x86_instructions.by_name("x86_pshufb");
let x86_pshufd = x86_instructions.by_name("x86_pshufd"); let x86_pshufd = x86_instructions.by_name("x86_pshufd");
let x86_psll = x86_instructions.by_name("x86_psll"); let x86_psll = x86_instructions.by_name("x86_psll");
@@ -506,6 +509,36 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
); );
} }
// SIMD icmp ugt
let ugt = Literal::enumerator_for(&imm.intcc, "ugt");
for ty in &[I8, I16, I32] {
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
narrow.legalize(
def!(c = icmp_(ugt, a, b)),
vec![def!(x = x86_pmaxu(a, b)), def!(c = icmp(eq, a, x))],
);
}
// SIMD icmp sge
let sge = Literal::enumerator_for(&imm.intcc, "sge");
for ty in &[I8, I16, I32] {
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
narrow.legalize(
def!(c = icmp_(sge, a, b)),
vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))],
);
}
// SIMD icmp uge
let uge = Literal::enumerator_for(&imm.intcc, "uge");
for ty in &[I8, I16, I32] {
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
narrow.legalize(
def!(c = icmp_(uge, a, b)),
vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))],
);
}
narrow.custom_legalize(shuffle, "convert_shuffle"); narrow.custom_legalize(shuffle, "convert_shuffle");
narrow.custom_legalize(extractlane, "convert_extractlane"); narrow.custom_legalize(extractlane, "convert_extractlane");
narrow.custom_legalize(insertlane, "convert_insertlane"); narrow.custom_legalize(insertlane, "convert_insertlane");

View File

@@ -10,3 +10,27 @@ ebb0(v0: i32x4, v1: i32x4):
; nextln: v2 = bxor v4, v3 ; nextln: v2 = bxor v4, v3
return v2 return v2
} }
function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {
ebb0(v0: i32x4, v1: i32x4):
v2 = icmp ugt v0, v1
; check: v3 = x86_pmaxu v0, v1
; nextln: v2 = icmp eq v0, v3
return v2
}
function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 {
ebb0(v0: i16x8, v1: i16x8):
v2 = icmp sge v0, v1
; check: v3 = x86_pmins v0, v1
; nextln: v2 = icmp eq v3, v1
return v2
}
function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 {
ebb0(v0: i8x16, v1: i8x16):
v2 = icmp uge v0, v1
; check: v3 = x86_pminu v0, v1
; nextln: v2 = icmp eq v3, v1
return v2
}

View File

@@ -107,3 +107,33 @@ ebb0:
return v8 return v8
} }
; run ; run
function %icmp_ugt_i8x16() -> b1 {
ebb0:
v0 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]
v1 = vconst.i8x16 [0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
v2 = icmp ugt v0, v1
v8 = vall_true v2
return v8
}
; run
function %icmp_sge_i16x8() -> b1 {
ebb0:
v0 = vconst.i16x8 [-1 1 2 3 4 5 6 7]
v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1]
v2 = icmp sge v0, v1
v8 = vall_true v2
return v8
}
; run
function %icmp_uge_i32x4() -> b1 {
ebb0:
v0 = vconst.i32x4 [1 2 3 4]
v1 = vconst.i32x4 [1 1 1 1]
v2 = icmp uge v0, v1
v8 = vall_true v2
return v8
}
; run