diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 6644bae161..8e7c79d814 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -335,6 +335,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let uimm8_zero = Literal::constant(&imm.uimm8, 0x00); let uimm8_one = Literal::constant(&imm.uimm8, 0x01); let u128_zeroes = constant(vec![0x00; 16]); + let u128_ones = constant(vec![0xff; 16]); let b = var("b"); let c = var("c"); let d = var("d"); @@ -405,12 +406,11 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct } // SIMD bnot - let ones = constant(vec![0xff; 16]); for ty in ValueType::all_lane_types().filter(allowed_simd_type) { let bnot = bnot.bind(vector(ty, sse_vector_size)); narrow.legalize( def!(y = bnot(x)), - vec![def!(a = vconst(ones)), def!(y = bxor(a, x))], + vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))], ); } @@ -524,7 +524,11 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); narrow.legalize( def!(c = icmp_(ugt, a, b)), - vec![def!(x = x86_pmaxu(a, b)), def!(c = icmp(eq, a, x))], + vec![ + def!(x = x86_pmaxu(a, b)), + def!(y = icmp(eq, x, b)), + def!(c = bnot(y)), + ], ); let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); narrow.legalize( @@ -574,7 +578,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct narrow.legalize( def!(b = fabs(a)), vec![ - def!(c = vconst(ones)), + def!(c = vconst(u128_ones)), def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB. def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. def!(b = band(a, e)), // Unset the MSB. diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif index b4d9681285..acbff943eb 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif @@ -15,7 +15,9 @@ function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 { ebb0(v0: i32x4, v1: i32x4): v2 = icmp ugt v0, v1 ; check: v3 = x86_pmaxu v0, v1 - ; nextln: v2 = icmp eq v0, v3 + ; nextln: v4 = icmp eq v3, v1 + ; nextln: v5 = vconst.b32x4 0xffffffffffffffffffffffffffffffff + ; nextln: v2 = bxor v5, v4 return v2 } diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif index 4b9da6e4a2..e4a5e6fea7 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif @@ -158,6 +158,20 @@ ebb0: } ; run + +function %icmp_ult_i16x8() -> b1 { +ebb0: + v0 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1] + v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1] + v2 = icmp ult v0, v1 + v3 = vconst.i16x8 0x00 + v4 = raw_bitcast.i16x8 v2 + v5 = icmp eq v3, v4 + v8 = vall_true v5 + return v8 +} +; run + function %icmp_sle_i16x8() -> b1 { ebb0: v0 = vconst.i16x8 [-1 -1 0 0 0 0 0 0]