Add x86 SIMD vall_true
In order to implement SIMD's all_true (https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#all-lanes-true), we must legalize some instruction (I chose `vall_true`) to a comparison against 0 and a similar reduction as vany_true using `PTEST` and `SETNZ`. Since `icmp` only allows integers but `vall_true` could allow more vector types, `raw_bitcast` is used to convert the lane types into integers, e.g. b32x4 to i32x4. To do so without runtime type-checking, the `raw_bitcast` instruction (which emits no instruction) can now bitcast from any vector type to the same type, e.g. i32x4 to i32x4.
This commit is contained in:
@@ -39,6 +39,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
let fmax = insts.by_name("fmax");
|
let fmax = insts.by_name("fmax");
|
||||||
let fmin = insts.by_name("fmin");
|
let fmin = insts.by_name("fmin");
|
||||||
let iadd = insts.by_name("iadd");
|
let iadd = insts.by_name("iadd");
|
||||||
|
let icmp = insts.by_name("icmp");
|
||||||
let iconst = insts.by_name("iconst");
|
let iconst = insts.by_name("iconst");
|
||||||
let imul = insts.by_name("imul");
|
let imul = insts.by_name("imul");
|
||||||
let ineg = insts.by_name("ineg");
|
let ineg = insts.by_name("ineg");
|
||||||
@@ -62,6 +63,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
let urem = insts.by_name("urem");
|
let urem = insts.by_name("urem");
|
||||||
let ushr = insts.by_name("ushr");
|
let ushr = insts.by_name("ushr");
|
||||||
let vconst = insts.by_name("vconst");
|
let vconst = insts.by_name("vconst");
|
||||||
|
let vall_true = insts.by_name("vall_true");
|
||||||
let vany_true = insts.by_name("vany_true");
|
let vany_true = insts.by_name("vany_true");
|
||||||
|
|
||||||
let x86_bsf = x86_instructions.by_name("x86_bsf");
|
let x86_bsf = x86_instructions.by_name("x86_bsf");
|
||||||
@@ -459,6 +461,40 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD vall_true
|
||||||
|
let zeroes = constant(vec![0x00; 16]);
|
||||||
|
let eq = Literal::enumerator_for(&imm.intcc, "eq");
|
||||||
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
|
let vall_true = vall_true.bind(vector(ty, sse_vector_size));
|
||||||
|
if ty.is_int() {
|
||||||
|
// In the common case (Wasm's integer-only all_true), we do not require a bitcast.
|
||||||
|
narrow.legalize(
|
||||||
|
def!(y = vall_true(x)),
|
||||||
|
vec![
|
||||||
|
def!(a = vconst(zeroes)),
|
||||||
|
def!(c = icmp(eq, x, a)),
|
||||||
|
def!(d = x86_ptest(c, c)),
|
||||||
|
def!(y = trueif(eq, d)),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// However, to support other types we must bitcast them to an integer vector to use
|
||||||
|
// icmp.
|
||||||
|
let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16);
|
||||||
|
let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size));
|
||||||
|
narrow.legalize(
|
||||||
|
def!(y = vall_true(x)),
|
||||||
|
vec![
|
||||||
|
def!(a = vconst(zeroes)),
|
||||||
|
def!(b = raw_bitcast_to_int(x)),
|
||||||
|
def!(c = icmp(eq, b, a)),
|
||||||
|
def!(d = x86_ptest(c, c)),
|
||||||
|
def!(y = trueif(eq, d)),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
narrow.custom_legalize(shuffle, "convert_shuffle");
|
narrow.custom_legalize(shuffle, "convert_shuffle");
|
||||||
narrow.custom_legalize(extractlane, "convert_extractlane");
|
narrow.custom_legalize(extractlane, "convert_extractlane");
|
||||||
narrow.custom_legalize(insertlane, "convert_insertlane");
|
narrow.custom_legalize(insertlane, "convert_insertlane");
|
||||||
|
|||||||
@@ -1639,6 +1639,20 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![s]),
|
.operands_out(vec![s]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"vall_true",
|
||||||
|
r#"
|
||||||
|
Reduce a vector to a scalar boolean.
|
||||||
|
|
||||||
|
Return a scalar boolean true if all lanes in ``i`` are non-zero, false otherwise.
|
||||||
|
"#,
|
||||||
|
&formats.unary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![a])
|
||||||
|
.operands_out(vec![s]),
|
||||||
|
);
|
||||||
|
|
||||||
let x = &operand("x", &TxN.lane_of());
|
let x = &operand("x", &TxN.lane_of());
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
|
|||||||
@@ -17,3 +17,13 @@ ebb0(v0: b32x4):
|
|||||||
; nextln: v1 = trueif ne v2
|
; nextln: v1 = trueif ne v2
|
||||||
return v1
|
return v1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function %vall_true_i64x2(i64x2) -> b1 {
|
||||||
|
ebb0(v0: i64x2):
|
||||||
|
v1 = vall_true v0
|
||||||
|
; check: v2 = vconst.i64x2 0x00
|
||||||
|
; nextln: v3 = icmp eq v0, v2
|
||||||
|
; nextln: v4 = x86_ptest v3, v3
|
||||||
|
; nextln: v1 = trueif eq v4
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|||||||
@@ -39,3 +39,21 @@ ebb0:
|
|||||||
return v3
|
return v3
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %vall_true_i16x8() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i16x8 [1 0 0 0 0 0 0 0]
|
||||||
|
v1 = vall_true v0
|
||||||
|
v2 = bint.i32 v1
|
||||||
|
v3 = icmp_imm eq v2, 0
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %vall_true_b32x4() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.b32x4 [true true true true]
|
||||||
|
v1 = vall_true v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|||||||
Reference in New Issue
Block a user