diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 94ade711a1..3a8f8b696c 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -521,6 +521,7 @@ pub(crate) fn define( let x86_psll = x86.by_name("x86_psll"); let x86_psra = x86.by_name("x86_psra"); let x86_psrl = x86.by_name("x86_psrl"); + let x86_ptest = x86.by_name("x86_ptest"); let x86_push = x86.by_name("x86_push"); let x86_sdivmodx = x86.by_name("x86_sdivmodx"); let x86_smulx = x86.by_name("x86_smulx"); @@ -1988,6 +1989,10 @@ pub(crate) fn define( // xor let bxor = bxor.bind(vector(ty, sse_vector_size)); e.enc_32_64(bxor, rec_fa.opcodes(&PXOR)); + + // ptest + let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size)); + e.enc_32_64_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd)); } // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs index 21e51982ce..366df755b9 100644 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs @@ -467,5 +467,24 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let x = &operand("x", TxN); + let y = &operand("y", TxN); + let f = &operand("f", iflags); + ig.push( + Inst::new( + "x86_ptest", + r#" + Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the + bitwise AND of the first source operand (first operand) and the second source operand + (second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise + AND of the second source operand (second operand) and the logical NOT of the destination + operand (first operand). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![f]), + ); + ig.build() } diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index df0b88dc04..8af7f3447a 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -55,12 +55,14 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let shuffle = insts.by_name("shuffle"); let srem = insts.by_name("srem"); let sshr = insts.by_name("sshr"); + let trueif = insts.by_name("trueif"); let udiv = insts.by_name("udiv"); let umulhi = insts.by_name("umulhi"); let ushr_imm = insts.by_name("ushr_imm"); let urem = insts.by_name("urem"); let ushr = insts.by_name("ushr"); let vconst = insts.by_name("vconst"); + let vany_true = insts.by_name("vany_true"); let x86_bsf = x86_instructions.by_name("x86_bsf"); let x86_bsr = x86_instructions.by_name("x86_bsr"); @@ -69,6 +71,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let x86_psll = x86_instructions.by_name("x86_psll"); let x86_psra = x86_instructions.by_name("x86_psra"); let x86_psrl = x86_instructions.by_name("x86_psrl"); + let x86_ptest = x86_instructions.by_name("x86_ptest"); let x86_umulx = x86_instructions.by_name("x86_umulx"); let x86_smulx = x86_instructions.by_name("x86_smulx"); @@ -446,6 +449,16 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct ); } + // SIMD vany_true + let ne = Literal::enumerator_for(&imm.intcc, "ne"); + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let vany_true = vany_true.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(y = vany_true(x)), + vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))], + ); + } + narrow.custom_legalize(shuffle, "convert_shuffle"); narrow.custom_legalize(extractlane, "convert_extractlane"); narrow.custom_legalize(insertlane, "convert_insertlane"); diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index 2df259f37e..0491028810 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -375,6 +375,10 @@ pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8]; /// and saturate results (SSE2). pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9]; +/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all +/// 0s (SSE4.1). +pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17]; + /// Push r{16,32,64}. pub static PUSH_REG: [u8; 1] = [0x50]; diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 1980730d95..6d0b79ccfa 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -1623,6 +1623,22 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let s = &operand("s", b1); + + ig.push( + Inst::new( + "vany_true", + r#" + Reduce a vector to a scalar boolean. + + Return a scalar boolean true if any lane in ``a`` is non-zero, false otherwise. + "#, + &formats.unary, + ) + .operands_in(vec![a]) + .operands_out(vec![s]), + ); + let x = &operand("x", &TxN.lane_of()); ig.push( diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif index 835eb8ca2f..6d6a3fac31 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif @@ -25,3 +25,9 @@ ebb0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]): [-, %xmm3] v2 = band_not v0, v1 ; bin: 66 0f df de return v2 } + +function %x86_ptest_f64x2(f64x2, f64x2) { +ebb0(v0: f64x2 [%xmm0], v1: f64x2 [%xmm2]): +[-, %rflags] v2 = x86_ptest v0, v1 ; bin: 66 0f 38 17 c2 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif index be00fe7278..925e18573c 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif @@ -9,3 +9,11 @@ ebb0(v0: b32x4): ; nextln: v1 = bxor v2, v0 return v1 } + +function %vany_true_b32x4(b32x4) -> b1 { +ebb0(v0: b32x4): + v1 = vany_true v0 + ; check: v2 = x86_ptest v0, v0 + ; nextln: v1 = trueif ne v2 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif index 6ab5db0c49..2a85c44b04 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif @@ -21,3 +21,21 @@ ebb0: return v4 } ; run + +function %vany_true_i16x8() -> b1 { +ebb0: + v0 = vconst.i16x8 [1 0 0 0 0 0 0 0] + v1 = vany_true v0 + return v1 +} +; run + +function %vany_true_b32x4() -> b1 { +ebb0: + v0 = vconst.b32x4 [false false false false] + v1 = vany_true v0 + v2 = bint.i32 v1 + v3 = icmp_imm eq v2, 0 + return v3 +} +; run