diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 8af7f3447a..68ca67685f 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -39,6 +39,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let fmax = insts.by_name("fmax"); let fmin = insts.by_name("fmin"); let iadd = insts.by_name("iadd"); + let icmp = insts.by_name("icmp"); let iconst = insts.by_name("iconst"); let imul = insts.by_name("imul"); let ineg = insts.by_name("ineg"); @@ -62,6 +63,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let urem = insts.by_name("urem"); let ushr = insts.by_name("ushr"); let vconst = insts.by_name("vconst"); + let vall_true = insts.by_name("vall_true"); let vany_true = insts.by_name("vany_true"); let x86_bsf = x86_instructions.by_name("x86_bsf"); @@ -459,6 +461,40 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct ); } + // SIMD vall_true + let zeroes = constant(vec![0x00; 16]); + let eq = Literal::enumerator_for(&imm.intcc, "eq"); + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let vall_true = vall_true.bind(vector(ty, sse_vector_size)); + if ty.is_int() { + // In the common case (Wasm's integer-only all_true), we do not require a bitcast. + narrow.legalize( + def!(y = vall_true(x)), + vec![ + def!(a = vconst(zeroes)), + def!(c = icmp(eq, x, a)), + def!(d = x86_ptest(c, c)), + def!(y = trueif(eq, d)), + ], + ); + } else { + // However, to support other types we must bitcast them to an integer vector to use + // icmp. + let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16); + let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size)); + narrow.legalize( + def!(y = vall_true(x)), + vec![ + def!(a = vconst(zeroes)), + def!(b = raw_bitcast_to_int(x)), + def!(c = icmp(eq, b, a)), + def!(d = x86_ptest(c, c)), + def!(y = trueif(eq, d)), + ], + ); + } + } + narrow.custom_legalize(shuffle, "convert_shuffle"); narrow.custom_legalize(extractlane, "convert_extractlane"); narrow.custom_legalize(insertlane, "convert_insertlane"); diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 6d0b79ccfa..532fee6c87 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -1639,6 +1639,20 @@ pub(crate) fn define( .operands_out(vec![s]), ); + ig.push( + Inst::new( + "vall_true", + r#" + Reduce a vector to a scalar boolean. + + Return a scalar boolean true if all lanes in ``i`` are non-zero, false otherwise. + "#, + &formats.unary, + ) + .operands_in(vec![a]) + .operands_out(vec![s]), + ); + let x = &operand("x", &TxN.lane_of()); ig.push( diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif index 925e18573c..2e13f79b9b 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif @@ -17,3 +17,13 @@ ebb0(v0: b32x4): ; nextln: v1 = trueif ne v2 return v1 } + +function %vall_true_i64x2(i64x2) -> b1 { +ebb0(v0: i64x2): + v1 = vall_true v0 + ; check: v2 = vconst.i64x2 0x00 + ; nextln: v3 = icmp eq v0, v2 + ; nextln: v4 = x86_ptest v3, v3 + ; nextln: v1 = trueif eq v4 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif index 2a85c44b04..9b525f2e10 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif @@ -39,3 +39,21 @@ ebb0: return v3 } ; run + +function %vall_true_i16x8() -> b1 { +ebb0: + v0 = vconst.i16x8 [1 0 0 0 0 0 0 0] + v1 = vall_true v0 + v2 = bint.i32 v1 + v3 = icmp_imm eq v2, 0 + return v3 +} +; run + +function %vall_true_b32x4() -> b1 { +ebb0: + v0 = vconst.b32x4 [true true true true] + v1 = vall_true v0 + return v1 +} +; run