Fix legalization of icmp ugt (#1278)

Previously, the same pattern (pmax + pcmpeq) as `uge` was used but this logic was incorrect for operands with equal values.
2019-12-16 13:14:51 -08:00
parent 6181f20326
commit 4433ad2858
3 changed files with 25 additions and 5 deletions
--- a/cranelift/codegen/meta/src/isa/x86/legalize.rs
+++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs
@@ -335,6 +335,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
    let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
    let u128_zeroes = constant(vec![0x00; 16]);
+    let u128_ones = constant(vec![0xff; 16]);
    let b = var("b");
    let c = var("c");
    let d = var("d");
@@ -405,12 +406,11 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    }

    // SIMD bnot
-    let ones = constant(vec![0xff; 16]);
    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
        let bnot = bnot.bind(vector(ty, sse_vector_size));
        narrow.legalize(
            def!(y = bnot(x)),
-            vec![def!(a = vconst(ones)), def!(y = bxor(a, x))],
+            vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))],
        );
    }

@@ -524,7 +524,11 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
        narrow.legalize(
            def!(c = icmp_(ugt, a, b)),
-            vec![def!(x = x86_pmaxu(a, b)), def!(c = icmp(eq, a, x))],
+            vec![
+                def!(x = x86_pmaxu(a, b)),
+                def!(y = icmp(eq, x, b)),
+                def!(c = bnot(y)),
+            ],
        );
        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
        narrow.legalize(
@@ -574,7 +578,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
        narrow.legalize(
            def!(b = fabs(a)),
            vec![
-                def!(c = vconst(ones)),
+                def!(c = vconst(u128_ones)),
                def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB.
                def!(e = bitcast_to_float(d)),    // Cast mask to the floating-point type.
                def!(b = band(a, e)),             // Unset the MSB.
--- a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif
@@ -15,7 +15,9 @@ function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {
 ebb0(v0: i32x4, v1: i32x4):
    v2 = icmp ugt v0, v1
    ; check: v3 = x86_pmaxu v0, v1
-    ; nextln: v2 = icmp eq v0, v3
+    ; nextln: v4 = icmp eq v3, v1
+    ; nextln: v5 = vconst.b32x4 0xffffffffffffffffffffffffffffffff
+    ; nextln: v2 = bxor v5, v4
    return v2
 }

--- a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif
@@ -158,6 +158,20 @@ ebb0:
 }
 ; run

+
+function %icmp_ult_i16x8() -> b1 {
+ebb0:
+    v0 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
+    v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
+    v2 = icmp ult v0, v1
+    v3 = vconst.i16x8 0x00
+    v4 = raw_bitcast.i16x8 v2
+    v5 = icmp eq v3, v4
+    v8 = vall_true v5
+    return v8
+}
+; run
+
 function %icmp_sle_i16x8() -> b1 {
 ebb0:
    v0 = vconst.i16x8 [-1 -1 0 0 0 0 0 0]