Add x86 SIMD legalizations for integer greater-than
This includes `icmp ugt`, `icmp sge`, and `icmp uge` for vectors with lanes of I8, I16, and I32.
This commit is contained in:
@@ -3,7 +3,7 @@ use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
|
||||
use crate::cdsl::types::{LaneType, ValueType};
|
||||
use crate::cdsl::xform::TransformGroupBuilder;
|
||||
use crate::shared::types::Float::F64;
|
||||
use crate::shared::types::Int::{I16, I32, I64};
|
||||
use crate::shared::types::Int::{I16, I32, I64, I8};
|
||||
use crate::shared::Definitions as SharedDefinitions;
|
||||
|
||||
#[allow(clippy::many_single_char_names)]
|
||||
@@ -69,6 +69,9 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
||||
|
||||
let x86_bsf = x86_instructions.by_name("x86_bsf");
|
||||
let x86_bsr = x86_instructions.by_name("x86_bsr");
|
||||
let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
|
||||
let x86_pmins = x86_instructions.by_name("x86_pmins");
|
||||
let x86_pminu = x86_instructions.by_name("x86_pminu");
|
||||
let x86_pshufb = x86_instructions.by_name("x86_pshufb");
|
||||
let x86_pshufd = x86_instructions.by_name("x86_pshufd");
|
||||
let x86_psll = x86_instructions.by_name("x86_psll");
|
||||
@@ -506,6 +509,36 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD icmp ugt
|
||||
let ugt = Literal::enumerator_for(&imm.intcc, "ugt");
|
||||
for ty in &[I8, I16, I32] {
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(c = icmp_(ugt, a, b)),
|
||||
vec![def!(x = x86_pmaxu(a, b)), def!(c = icmp(eq, a, x))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD icmp sge
|
||||
let sge = Literal::enumerator_for(&imm.intcc, "sge");
|
||||
for ty in &[I8, I16, I32] {
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(c = icmp_(sge, a, b)),
|
||||
vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD icmp uge
|
||||
let uge = Literal::enumerator_for(&imm.intcc, "uge");
|
||||
for ty in &[I8, I16, I32] {
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(c = icmp_(uge, a, b)),
|
||||
vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))],
|
||||
);
|
||||
}
|
||||
|
||||
narrow.custom_legalize(shuffle, "convert_shuffle");
|
||||
narrow.custom_legalize(extractlane, "convert_extractlane");
|
||||
narrow.custom_legalize(insertlane, "convert_insertlane");
|
||||
|
||||
@@ -10,3 +10,27 @@ ebb0(v0: i32x4, v1: i32x4):
|
||||
; nextln: v2 = bxor v4, v3
|
||||
return v2
|
||||
}
|
||||
|
||||
function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {
|
||||
ebb0(v0: i32x4, v1: i32x4):
|
||||
v2 = icmp ugt v0, v1
|
||||
; check: v3 = x86_pmaxu v0, v1
|
||||
; nextln: v2 = icmp eq v0, v3
|
||||
return v2
|
||||
}
|
||||
|
||||
function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 {
|
||||
ebb0(v0: i16x8, v1: i16x8):
|
||||
v2 = icmp sge v0, v1
|
||||
; check: v3 = x86_pmins v0, v1
|
||||
; nextln: v2 = icmp eq v3, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 {
|
||||
ebb0(v0: i8x16, v1: i8x16):
|
||||
v2 = icmp uge v0, v1
|
||||
; check: v3 = x86_pminu v0, v1
|
||||
; nextln: v2 = icmp eq v3, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
@@ -107,3 +107,33 @@ ebb0:
|
||||
return v8
|
||||
}
|
||||
; run
|
||||
|
||||
function %icmp_ugt_i8x16() -> b1 {
|
||||
ebb0:
|
||||
v0 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]
|
||||
v1 = vconst.i8x16 [0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
|
||||
v2 = icmp ugt v0, v1
|
||||
v8 = vall_true v2
|
||||
return v8
|
||||
}
|
||||
; run
|
||||
|
||||
function %icmp_sge_i16x8() -> b1 {
|
||||
ebb0:
|
||||
v0 = vconst.i16x8 [-1 1 2 3 4 5 6 7]
|
||||
v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1]
|
||||
v2 = icmp sge v0, v1
|
||||
v8 = vall_true v2
|
||||
return v8
|
||||
}
|
||||
; run
|
||||
|
||||
function %icmp_uge_i32x4() -> b1 {
|
||||
ebb0:
|
||||
v0 = vconst.i32x4 [1 2 3 4]
|
||||
v1 = vconst.i32x4 [1 1 1 1]
|
||||
v2 = icmp uge v0, v1
|
||||
v8 = vall_true v2
|
||||
return v8
|
||||
}
|
||||
; run
|
||||
|
||||
Reference in New Issue
Block a user