Fix legalization of icmp ugt (#1278)
Previously, the same pattern (pmax + pcmpeq) as `uge` was used but this logic was incorrect for operands with equal values.
This commit is contained in:
committed by
Sean Stangl
parent
6181f20326
commit
4433ad2858
@@ -335,6 +335,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
|
let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
|
||||||
let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
|
let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
|
||||||
let u128_zeroes = constant(vec![0x00; 16]);
|
let u128_zeroes = constant(vec![0x00; 16]);
|
||||||
|
let u128_ones = constant(vec![0xff; 16]);
|
||||||
let b = var("b");
|
let b = var("b");
|
||||||
let c = var("c");
|
let c = var("c");
|
||||||
let d = var("d");
|
let d = var("d");
|
||||||
@@ -405,12 +406,11 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
}
|
}
|
||||||
|
|
||||||
// SIMD bnot
|
// SIMD bnot
|
||||||
let ones = constant(vec![0xff; 16]);
|
|
||||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
let bnot = bnot.bind(vector(ty, sse_vector_size));
|
let bnot = bnot.bind(vector(ty, sse_vector_size));
|
||||||
narrow.legalize(
|
narrow.legalize(
|
||||||
def!(y = bnot(x)),
|
def!(y = bnot(x)),
|
||||||
vec![def!(a = vconst(ones)), def!(y = bxor(a, x))],
|
vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -524,7 +524,11 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||||
narrow.legalize(
|
narrow.legalize(
|
||||||
def!(c = icmp_(ugt, a, b)),
|
def!(c = icmp_(ugt, a, b)),
|
||||||
vec![def!(x = x86_pmaxu(a, b)), def!(c = icmp(eq, a, x))],
|
vec![
|
||||||
|
def!(x = x86_pmaxu(a, b)),
|
||||||
|
def!(y = icmp(eq, x, b)),
|
||||||
|
def!(c = bnot(y)),
|
||||||
|
],
|
||||||
);
|
);
|
||||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||||
narrow.legalize(
|
narrow.legalize(
|
||||||
@@ -574,7 +578,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
narrow.legalize(
|
narrow.legalize(
|
||||||
def!(b = fabs(a)),
|
def!(b = fabs(a)),
|
||||||
vec![
|
vec![
|
||||||
def!(c = vconst(ones)),
|
def!(c = vconst(u128_ones)),
|
||||||
def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB.
|
def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB.
|
||||||
def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type.
|
def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type.
|
||||||
def!(b = band(a, e)), // Unset the MSB.
|
def!(b = band(a, e)), // Unset the MSB.
|
||||||
|
|||||||
@@ -15,7 +15,9 @@ function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {
|
|||||||
ebb0(v0: i32x4, v1: i32x4):
|
ebb0(v0: i32x4, v1: i32x4):
|
||||||
v2 = icmp ugt v0, v1
|
v2 = icmp ugt v0, v1
|
||||||
; check: v3 = x86_pmaxu v0, v1
|
; check: v3 = x86_pmaxu v0, v1
|
||||||
; nextln: v2 = icmp eq v0, v3
|
; nextln: v4 = icmp eq v3, v1
|
||||||
|
; nextln: v5 = vconst.b32x4 0xffffffffffffffffffffffffffffffff
|
||||||
|
; nextln: v2 = bxor v5, v4
|
||||||
return v2
|
return v2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -158,6 +158,20 @@ ebb0:
|
|||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
|
||||||
|
function %icmp_ult_i16x8() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
|
||||||
|
v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
|
||||||
|
v2 = icmp ult v0, v1
|
||||||
|
v3 = vconst.i16x8 0x00
|
||||||
|
v4 = raw_bitcast.i16x8 v2
|
||||||
|
v5 = icmp eq v3, v4
|
||||||
|
v8 = vall_true v5
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
function %icmp_sle_i16x8() -> b1 {
|
function %icmp_sle_i16x8() -> b1 {
|
||||||
ebb0:
|
ebb0:
|
||||||
v0 = vconst.i16x8 [-1 -1 0 0 0 0 0 0]
|
v0 = vconst.i16x8 [-1 -1 0 0 0 0 0 0]
|
||||||
|
|||||||
Reference in New Issue
Block a user