Add x86 SIMD vany_true and x86_ptest
In order to implement SIMD's any_true (https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#any-lane-true), we must legalize some instruction (I chose `vany_true`) to a sequence of `PTEST` and `SETNZ`. To emit `PTEST` I added the new CLIF instruction `x86_ptest` and used CLIF's `trueif ne` for `SETNZ`.
This commit is contained in:
@@ -521,6 +521,7 @@ pub(crate) fn define(
|
|||||||
let x86_psll = x86.by_name("x86_psll");
|
let x86_psll = x86.by_name("x86_psll");
|
||||||
let x86_psra = x86.by_name("x86_psra");
|
let x86_psra = x86.by_name("x86_psra");
|
||||||
let x86_psrl = x86.by_name("x86_psrl");
|
let x86_psrl = x86.by_name("x86_psrl");
|
||||||
|
let x86_ptest = x86.by_name("x86_ptest");
|
||||||
let x86_push = x86.by_name("x86_push");
|
let x86_push = x86.by_name("x86_push");
|
||||||
let x86_sdivmodx = x86.by_name("x86_sdivmodx");
|
let x86_sdivmodx = x86.by_name("x86_sdivmodx");
|
||||||
let x86_smulx = x86.by_name("x86_smulx");
|
let x86_smulx = x86.by_name("x86_smulx");
|
||||||
@@ -1988,6 +1989,10 @@ pub(crate) fn define(
|
|||||||
// xor
|
// xor
|
||||||
let bxor = bxor.bind(vector(ty, sse_vector_size));
|
let bxor = bxor.bind(vector(ty, sse_vector_size));
|
||||||
e.enc_32_64(bxor, rec_fa.opcodes(&PXOR));
|
e.enc_32_64(bxor, rec_fa.opcodes(&PXOR));
|
||||||
|
|
||||||
|
// ptest
|
||||||
|
let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size));
|
||||||
|
e.enc_32_64_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd));
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
|
// SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
|
||||||
|
|||||||
@@ -467,5 +467,24 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let x = &operand("x", TxN);
|
||||||
|
let y = &operand("y", TxN);
|
||||||
|
let f = &operand("f", iflags);
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"x86_ptest",
|
||||||
|
r#"
|
||||||
|
Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the
|
||||||
|
bitwise AND of the first source operand (first operand) and the second source operand
|
||||||
|
(second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise
|
||||||
|
AND of the second source operand (second operand) and the logical NOT of the destination
|
||||||
|
operand (first operand).
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![f]),
|
||||||
|
);
|
||||||
|
|
||||||
ig.build()
|
ig.build()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -55,12 +55,14 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
let shuffle = insts.by_name("shuffle");
|
let shuffle = insts.by_name("shuffle");
|
||||||
let srem = insts.by_name("srem");
|
let srem = insts.by_name("srem");
|
||||||
let sshr = insts.by_name("sshr");
|
let sshr = insts.by_name("sshr");
|
||||||
|
let trueif = insts.by_name("trueif");
|
||||||
let udiv = insts.by_name("udiv");
|
let udiv = insts.by_name("udiv");
|
||||||
let umulhi = insts.by_name("umulhi");
|
let umulhi = insts.by_name("umulhi");
|
||||||
let ushr_imm = insts.by_name("ushr_imm");
|
let ushr_imm = insts.by_name("ushr_imm");
|
||||||
let urem = insts.by_name("urem");
|
let urem = insts.by_name("urem");
|
||||||
let ushr = insts.by_name("ushr");
|
let ushr = insts.by_name("ushr");
|
||||||
let vconst = insts.by_name("vconst");
|
let vconst = insts.by_name("vconst");
|
||||||
|
let vany_true = insts.by_name("vany_true");
|
||||||
|
|
||||||
let x86_bsf = x86_instructions.by_name("x86_bsf");
|
let x86_bsf = x86_instructions.by_name("x86_bsf");
|
||||||
let x86_bsr = x86_instructions.by_name("x86_bsr");
|
let x86_bsr = x86_instructions.by_name("x86_bsr");
|
||||||
@@ -69,6 +71,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
let x86_psll = x86_instructions.by_name("x86_psll");
|
let x86_psll = x86_instructions.by_name("x86_psll");
|
||||||
let x86_psra = x86_instructions.by_name("x86_psra");
|
let x86_psra = x86_instructions.by_name("x86_psra");
|
||||||
let x86_psrl = x86_instructions.by_name("x86_psrl");
|
let x86_psrl = x86_instructions.by_name("x86_psrl");
|
||||||
|
let x86_ptest = x86_instructions.by_name("x86_ptest");
|
||||||
let x86_umulx = x86_instructions.by_name("x86_umulx");
|
let x86_umulx = x86_instructions.by_name("x86_umulx");
|
||||||
let x86_smulx = x86_instructions.by_name("x86_smulx");
|
let x86_smulx = x86_instructions.by_name("x86_smulx");
|
||||||
|
|
||||||
@@ -446,6 +449,16 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD vany_true
|
||||||
|
let ne = Literal::enumerator_for(&imm.intcc, "ne");
|
||||||
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
|
let vany_true = vany_true.bind(vector(ty, sse_vector_size));
|
||||||
|
narrow.legalize(
|
||||||
|
def!(y = vany_true(x)),
|
||||||
|
vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
narrow.custom_legalize(shuffle, "convert_shuffle");
|
narrow.custom_legalize(shuffle, "convert_shuffle");
|
||||||
narrow.custom_legalize(extractlane, "convert_extractlane");
|
narrow.custom_legalize(extractlane, "convert_extractlane");
|
||||||
narrow.custom_legalize(insertlane, "convert_insertlane");
|
narrow.custom_legalize(insertlane, "convert_insertlane");
|
||||||
|
|||||||
@@ -375,6 +375,10 @@ pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8];
|
|||||||
/// and saturate results (SSE2).
|
/// and saturate results (SSE2).
|
||||||
pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
|
pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
|
||||||
|
|
||||||
|
/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all
|
||||||
|
/// 0s (SSE4.1).
|
||||||
|
pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
|
||||||
|
|
||||||
/// Push r{16,32,64}.
|
/// Push r{16,32,64}.
|
||||||
pub static PUSH_REG: [u8; 1] = [0x50];
|
pub static PUSH_REG: [u8; 1] = [0x50];
|
||||||
|
|
||||||
|
|||||||
@@ -1623,6 +1623,22 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let s = &operand("s", b1);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"vany_true",
|
||||||
|
r#"
|
||||||
|
Reduce a vector to a scalar boolean.
|
||||||
|
|
||||||
|
Return a scalar boolean true if any lane in ``a`` is non-zero, false otherwise.
|
||||||
|
"#,
|
||||||
|
&formats.unary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![a])
|
||||||
|
.operands_out(vec![s]),
|
||||||
|
);
|
||||||
|
|
||||||
let x = &operand("x", &TxN.lane_of());
|
let x = &operand("x", &TxN.lane_of());
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
|
|||||||
@@ -25,3 +25,9 @@ ebb0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]):
|
|||||||
[-, %xmm3] v2 = band_not v0, v1 ; bin: 66 0f df de
|
[-, %xmm3] v2 = band_not v0, v1 ; bin: 66 0f df de
|
||||||
return v2
|
return v2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function %x86_ptest_f64x2(f64x2, f64x2) {
|
||||||
|
ebb0(v0: f64x2 [%xmm0], v1: f64x2 [%xmm2]):
|
||||||
|
[-, %rflags] v2 = x86_ptest v0, v1 ; bin: 66 0f 38 17 c2
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,3 +9,11 @@ ebb0(v0: b32x4):
|
|||||||
; nextln: v1 = bxor v2, v0
|
; nextln: v1 = bxor v2, v0
|
||||||
return v1
|
return v1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function %vany_true_b32x4(b32x4) -> b1 {
|
||||||
|
ebb0(v0: b32x4):
|
||||||
|
v1 = vany_true v0
|
||||||
|
; check: v2 = x86_ptest v0, v0
|
||||||
|
; nextln: v1 = trueif ne v2
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|||||||
@@ -21,3 +21,21 @@ ebb0:
|
|||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %vany_true_i16x8() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i16x8 [1 0 0 0 0 0 0 0]
|
||||||
|
v1 = vany_true v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %vany_true_b32x4() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.b32x4 [false false false false]
|
||||||
|
v1 = vany_true v0
|
||||||
|
v2 = bint.i32 v1
|
||||||
|
v3 = icmp_imm eq v2, 0
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|||||||
Reference in New Issue
Block a user