Add x86 SIMD vany_true and x86_ptest
In order to implement SIMD's any_true (https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#any-lane-true), we must legalize some instruction (I chose `vany_true`) to a sequence of `PTEST` and `SETNZ`. To emit `PTEST` I added the new CLIF instruction `x86_ptest` and used CLIF's `trueif ne` for `SETNZ`.
This commit is contained in:
@@ -521,6 +521,7 @@ pub(crate) fn define(
|
||||
let x86_psll = x86.by_name("x86_psll");
|
||||
let x86_psra = x86.by_name("x86_psra");
|
||||
let x86_psrl = x86.by_name("x86_psrl");
|
||||
let x86_ptest = x86.by_name("x86_ptest");
|
||||
let x86_push = x86.by_name("x86_push");
|
||||
let x86_sdivmodx = x86.by_name("x86_sdivmodx");
|
||||
let x86_smulx = x86.by_name("x86_smulx");
|
||||
@@ -1988,6 +1989,10 @@ pub(crate) fn define(
|
||||
// xor
|
||||
let bxor = bxor.bind(vector(ty, sse_vector_size));
|
||||
e.enc_32_64(bxor, rec_fa.opcodes(&PXOR));
|
||||
|
||||
// ptest
|
||||
let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size));
|
||||
e.enc_32_64_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd));
|
||||
}
|
||||
|
||||
// SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
|
||||
|
||||
@@ -467,5 +467,24 @@ pub(crate) fn define(
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &operand("x", TxN);
|
||||
let y = &operand("y", TxN);
|
||||
let f = &operand("f", iflags);
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_ptest",
|
||||
r#"
|
||||
Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the
|
||||
bitwise AND of the first source operand (first operand) and the second source operand
|
||||
(second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise
|
||||
AND of the second source operand (second operand) and the logical NOT of the destination
|
||||
operand (first operand).
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![f]),
|
||||
);
|
||||
|
||||
ig.build()
|
||||
}
|
||||
|
||||
@@ -55,12 +55,14 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
||||
let shuffle = insts.by_name("shuffle");
|
||||
let srem = insts.by_name("srem");
|
||||
let sshr = insts.by_name("sshr");
|
||||
let trueif = insts.by_name("trueif");
|
||||
let udiv = insts.by_name("udiv");
|
||||
let umulhi = insts.by_name("umulhi");
|
||||
let ushr_imm = insts.by_name("ushr_imm");
|
||||
let urem = insts.by_name("urem");
|
||||
let ushr = insts.by_name("ushr");
|
||||
let vconst = insts.by_name("vconst");
|
||||
let vany_true = insts.by_name("vany_true");
|
||||
|
||||
let x86_bsf = x86_instructions.by_name("x86_bsf");
|
||||
let x86_bsr = x86_instructions.by_name("x86_bsr");
|
||||
@@ -69,6 +71,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
||||
let x86_psll = x86_instructions.by_name("x86_psll");
|
||||
let x86_psra = x86_instructions.by_name("x86_psra");
|
||||
let x86_psrl = x86_instructions.by_name("x86_psrl");
|
||||
let x86_ptest = x86_instructions.by_name("x86_ptest");
|
||||
let x86_umulx = x86_instructions.by_name("x86_umulx");
|
||||
let x86_smulx = x86_instructions.by_name("x86_smulx");
|
||||
|
||||
@@ -446,6 +449,16 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD vany_true
|
||||
let ne = Literal::enumerator_for(&imm.intcc, "ne");
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let vany_true = vany_true.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = vany_true(x)),
|
||||
vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))],
|
||||
);
|
||||
}
|
||||
|
||||
narrow.custom_legalize(shuffle, "convert_shuffle");
|
||||
narrow.custom_legalize(extractlane, "convert_extractlane");
|
||||
narrow.custom_legalize(insertlane, "convert_insertlane");
|
||||
|
||||
@@ -375,6 +375,10 @@ pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8];
|
||||
/// and saturate results (SSE2).
|
||||
pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
|
||||
|
||||
/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all
|
||||
/// 0s (SSE4.1).
|
||||
pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
|
||||
|
||||
/// Push r{16,32,64}.
|
||||
pub static PUSH_REG: [u8; 1] = [0x50];
|
||||
|
||||
|
||||
@@ -1623,6 +1623,22 @@ pub(crate) fn define(
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let s = &operand("s", b1);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"vany_true",
|
||||
r#"
|
||||
Reduce a vector to a scalar boolean.
|
||||
|
||||
Return a scalar boolean true if any lane in ``a`` is non-zero, false otherwise.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![a])
|
||||
.operands_out(vec![s]),
|
||||
);
|
||||
|
||||
let x = &operand("x", &TxN.lane_of());
|
||||
|
||||
ig.push(
|
||||
|
||||
@@ -25,3 +25,9 @@ ebb0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]):
|
||||
[-, %xmm3] v2 = band_not v0, v1 ; bin: 66 0f df de
|
||||
return v2
|
||||
}
|
||||
|
||||
function %x86_ptest_f64x2(f64x2, f64x2) {
|
||||
ebb0(v0: f64x2 [%xmm0], v1: f64x2 [%xmm2]):
|
||||
[-, %rflags] v2 = x86_ptest v0, v1 ; bin: 66 0f 38 17 c2
|
||||
return
|
||||
}
|
||||
|
||||
@@ -9,3 +9,11 @@ ebb0(v0: b32x4):
|
||||
; nextln: v1 = bxor v2, v0
|
||||
return v1
|
||||
}
|
||||
|
||||
function %vany_true_b32x4(b32x4) -> b1 {
|
||||
ebb0(v0: b32x4):
|
||||
v1 = vany_true v0
|
||||
; check: v2 = x86_ptest v0, v0
|
||||
; nextln: v1 = trueif ne v2
|
||||
return v1
|
||||
}
|
||||
|
||||
@@ -21,3 +21,21 @@ ebb0:
|
||||
return v4
|
||||
}
|
||||
; run
|
||||
|
||||
function %vany_true_i16x8() -> b1 {
|
||||
ebb0:
|
||||
v0 = vconst.i16x8 [1 0 0 0 0 0 0 0]
|
||||
v1 = vany_true v0
|
||||
return v1
|
||||
}
|
||||
; run
|
||||
|
||||
function %vany_true_b32x4() -> b1 {
|
||||
ebb0:
|
||||
v0 = vconst.b32x4 [false false false false]
|
||||
v1 = vany_true v0
|
||||
v2 = bint.i32 v1
|
||||
v3 = icmp_imm eq v2, 0
|
||||
return v3
|
||||
}
|
||||
; run
|
||||
|
||||
Reference in New Issue
Block a user