Add x86 SIMD vany_true and x86_ptest

In order to implement SIMD's any_true (https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#any-lane-true), we must legalize some instruction (I chose `vany_true`) to a sequence of `PTEST` and `SETNZ`. To emit `PTEST` I added the new CLIF instruction `x86_ptest` and used CLIF's `trueif ne` for `SETNZ`.
This commit is contained in:
Andrew Brown
2019-10-18 15:35:27 -07:00
parent 873465e7a9
commit 186effc420
8 changed files with 89 additions and 0 deletions

View File

@@ -521,6 +521,7 @@ pub(crate) fn define(
let x86_psll = x86.by_name("x86_psll");
let x86_psra = x86.by_name("x86_psra");
let x86_psrl = x86.by_name("x86_psrl");
let x86_ptest = x86.by_name("x86_ptest");
let x86_push = x86.by_name("x86_push");
let x86_sdivmodx = x86.by_name("x86_sdivmodx");
let x86_smulx = x86.by_name("x86_smulx");
@@ -1988,6 +1989,10 @@ pub(crate) fn define(
// xor
let bxor = bxor.bind(vector(ty, sse_vector_size));
e.enc_32_64(bxor, rec_fa.opcodes(&PXOR));
// ptest
let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size));
e.enc_32_64_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd));
}
// SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement

View File

@@ -467,5 +467,24 @@ pub(crate) fn define(
.operands_out(vec![a]),
);
let x = &operand("x", TxN);
let y = &operand("y", TxN);
let f = &operand("f", iflags);
ig.push(
Inst::new(
"x86_ptest",
r#"
Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the
bitwise AND of the first source operand (first operand) and the second source operand
(second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise
AND of the second source operand (second operand) and the logical NOT of the destination
operand (first operand).
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![f]),
);
ig.build()
}

View File

@@ -55,12 +55,14 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let shuffle = insts.by_name("shuffle");
let srem = insts.by_name("srem");
let sshr = insts.by_name("sshr");
let trueif = insts.by_name("trueif");
let udiv = insts.by_name("udiv");
let umulhi = insts.by_name("umulhi");
let ushr_imm = insts.by_name("ushr_imm");
let urem = insts.by_name("urem");
let ushr = insts.by_name("ushr");
let vconst = insts.by_name("vconst");
let vany_true = insts.by_name("vany_true");
let x86_bsf = x86_instructions.by_name("x86_bsf");
let x86_bsr = x86_instructions.by_name("x86_bsr");
@@ -69,6 +71,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let x86_psll = x86_instructions.by_name("x86_psll");
let x86_psra = x86_instructions.by_name("x86_psra");
let x86_psrl = x86_instructions.by_name("x86_psrl");
let x86_ptest = x86_instructions.by_name("x86_ptest");
let x86_umulx = x86_instructions.by_name("x86_umulx");
let x86_smulx = x86_instructions.by_name("x86_smulx");
@@ -446,6 +449,16 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
);
}
// SIMD vany_true
let ne = Literal::enumerator_for(&imm.intcc, "ne");
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let vany_true = vany_true.bind(vector(ty, sse_vector_size));
narrow.legalize(
def!(y = vany_true(x)),
vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))],
);
}
narrow.custom_legalize(shuffle, "convert_shuffle");
narrow.custom_legalize(extractlane, "convert_extractlane");
narrow.custom_legalize(insertlane, "convert_insertlane");

View File

@@ -375,6 +375,10 @@ pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8];
/// and saturate results (SSE2).
pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all
/// 0s (SSE4.1).
pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
/// Push r{16,32,64}.
pub static PUSH_REG: [u8; 1] = [0x50];

View File

@@ -1623,6 +1623,22 @@ pub(crate) fn define(
.operands_out(vec![a]),
);
let s = &operand("s", b1);
ig.push(
Inst::new(
"vany_true",
r#"
Reduce a vector to a scalar boolean.
Return a scalar boolean true if any lane in ``a`` is non-zero, false otherwise.
"#,
&formats.unary,
)
.operands_in(vec![a])
.operands_out(vec![s]),
);
let x = &operand("x", &TxN.lane_of());
ig.push(

View File

@@ -25,3 +25,9 @@ ebb0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]):
[-, %xmm3] v2 = band_not v0, v1 ; bin: 66 0f df de
return v2
}
function %x86_ptest_f64x2(f64x2, f64x2) {
ebb0(v0: f64x2 [%xmm0], v1: f64x2 [%xmm2]):
[-, %rflags] v2 = x86_ptest v0, v1 ; bin: 66 0f 38 17 c2
return
}

View File

@@ -9,3 +9,11 @@ ebb0(v0: b32x4):
; nextln: v1 = bxor v2, v0
return v1
}
function %vany_true_b32x4(b32x4) -> b1 {
ebb0(v0: b32x4):
v1 = vany_true v0
; check: v2 = x86_ptest v0, v0
; nextln: v1 = trueif ne v2
return v1
}

View File

@@ -21,3 +21,21 @@ ebb0:
return v4
}
; run
function %vany_true_i16x8() -> b1 {
ebb0:
v0 = vconst.i16x8 [1 0 0 0 0 0 0 0]
v1 = vany_true v0
return v1
}
; run
function %vany_true_b32x4() -> b1 {
ebb0:
v0 = vconst.b32x4 [false false false false]
v1 = vany_true v0
v2 = bint.i32 v1
v3 = icmp_imm eq v2, 0
return v3
}
; run