Add x86 SIMD vany_true and x86_ptest

In order to implement SIMD's any_true (https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#any-lane-true), we must legalize some instruction (I chose `vany_true`) to a sequence of `PTEST` and `SETNZ`. To emit `PTEST` I added the new CLIF instruction `x86_ptest` and used CLIF's `trueif ne` for `SETNZ`.
This commit is contained in:
Andrew Brown
2019-10-18 15:35:27 -07:00
parent 873465e7a9
commit 186effc420
8 changed files with 89 additions and 0 deletions

View File

@@ -521,6 +521,7 @@ pub(crate) fn define(
let x86_psll = x86.by_name("x86_psll");
let x86_psra = x86.by_name("x86_psra");
let x86_psrl = x86.by_name("x86_psrl");
let x86_ptest = x86.by_name("x86_ptest");
let x86_push = x86.by_name("x86_push");
let x86_sdivmodx = x86.by_name("x86_sdivmodx");
let x86_smulx = x86.by_name("x86_smulx");
@@ -1988,6 +1989,10 @@ pub(crate) fn define(
// xor
let bxor = bxor.bind(vector(ty, sse_vector_size));
e.enc_32_64(bxor, rec_fa.opcodes(&PXOR));
// ptest
let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size));
e.enc_32_64_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd));
}
// SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement

View File

@@ -467,5 +467,24 @@ pub(crate) fn define(
.operands_out(vec![a]),
);
let x = &operand("x", TxN);
let y = &operand("y", TxN);
let f = &operand("f", iflags);
ig.push(
Inst::new(
"x86_ptest",
r#"
Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the
bitwise AND of the first source operand (first operand) and the second source operand
(second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise
AND of the second source operand (second operand) and the logical NOT of the destination
operand (first operand).
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![f]),
);
ig.build()
}

View File

@@ -55,12 +55,14 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let shuffle = insts.by_name("shuffle");
let srem = insts.by_name("srem");
let sshr = insts.by_name("sshr");
let trueif = insts.by_name("trueif");
let udiv = insts.by_name("udiv");
let umulhi = insts.by_name("umulhi");
let ushr_imm = insts.by_name("ushr_imm");
let urem = insts.by_name("urem");
let ushr = insts.by_name("ushr");
let vconst = insts.by_name("vconst");
let vany_true = insts.by_name("vany_true");
let x86_bsf = x86_instructions.by_name("x86_bsf");
let x86_bsr = x86_instructions.by_name("x86_bsr");
@@ -69,6 +71,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let x86_psll = x86_instructions.by_name("x86_psll");
let x86_psra = x86_instructions.by_name("x86_psra");
let x86_psrl = x86_instructions.by_name("x86_psrl");
let x86_ptest = x86_instructions.by_name("x86_ptest");
let x86_umulx = x86_instructions.by_name("x86_umulx");
let x86_smulx = x86_instructions.by_name("x86_smulx");
@@ -446,6 +449,16 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
);
}
// SIMD vany_true
let ne = Literal::enumerator_for(&imm.intcc, "ne");
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let vany_true = vany_true.bind(vector(ty, sse_vector_size));
narrow.legalize(
def!(y = vany_true(x)),
vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))],
);
}
narrow.custom_legalize(shuffle, "convert_shuffle");
narrow.custom_legalize(extractlane, "convert_extractlane");
narrow.custom_legalize(insertlane, "convert_insertlane");

View File

@@ -375,6 +375,10 @@ pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8];
/// and saturate results (SSE2).
pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all
/// 0s (SSE4.1).
pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
/// Push r{16,32,64}.
pub static PUSH_REG: [u8; 1] = [0x50];