Add x86 SIMD vall_true

In order to implement SIMD's all_true (https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#all-lanes-true), we must legalize some instruction (I chose `vall_true`) to a comparison against 0 and a similar reduction as vany_true using `PTEST` and `SETNZ`. Since `icmp` only allows integers but `vall_true` could allow more vector types, `raw_bitcast` is used to convert the lane types into integers, e.g. b32x4 to i32x4. To do so without runtime type-checking, the `raw_bitcast` instruction (which emits no instruction) can now bitcast from any vector type to the same type, e.g. i32x4 to i32x4.
This commit is contained in:
Andrew Brown
2019-10-18 16:21:06 -07:00
parent 65e18df12f
commit 879ccf871a
4 changed files with 78 additions and 0 deletions

View File

@@ -39,6 +39,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let fmax = insts.by_name("fmax"); let fmax = insts.by_name("fmax");
let fmin = insts.by_name("fmin"); let fmin = insts.by_name("fmin");
let iadd = insts.by_name("iadd"); let iadd = insts.by_name("iadd");
let icmp = insts.by_name("icmp");
let iconst = insts.by_name("iconst"); let iconst = insts.by_name("iconst");
let imul = insts.by_name("imul"); let imul = insts.by_name("imul");
let ineg = insts.by_name("ineg"); let ineg = insts.by_name("ineg");
@@ -62,6 +63,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let urem = insts.by_name("urem"); let urem = insts.by_name("urem");
let ushr = insts.by_name("ushr"); let ushr = insts.by_name("ushr");
let vconst = insts.by_name("vconst"); let vconst = insts.by_name("vconst");
let vall_true = insts.by_name("vall_true");
let vany_true = insts.by_name("vany_true"); let vany_true = insts.by_name("vany_true");
let x86_bsf = x86_instructions.by_name("x86_bsf"); let x86_bsf = x86_instructions.by_name("x86_bsf");
@@ -459,6 +461,40 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
); );
} }
// SIMD vall_true
let zeroes = constant(vec![0x00; 16]);
let eq = Literal::enumerator_for(&imm.intcc, "eq");
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let vall_true = vall_true.bind(vector(ty, sse_vector_size));
if ty.is_int() {
// In the common case (Wasm's integer-only all_true), we do not require a bitcast.
narrow.legalize(
def!(y = vall_true(x)),
vec![
def!(a = vconst(zeroes)),
def!(c = icmp(eq, x, a)),
def!(d = x86_ptest(c, c)),
def!(y = trueif(eq, d)),
],
);
} else {
// However, to support other types we must bitcast them to an integer vector to use
// icmp.
let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16);
let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size));
narrow.legalize(
def!(y = vall_true(x)),
vec![
def!(a = vconst(zeroes)),
def!(b = raw_bitcast_to_int(x)),
def!(c = icmp(eq, b, a)),
def!(d = x86_ptest(c, c)),
def!(y = trueif(eq, d)),
],
);
}
}
narrow.custom_legalize(shuffle, "convert_shuffle"); narrow.custom_legalize(shuffle, "convert_shuffle");
narrow.custom_legalize(extractlane, "convert_extractlane"); narrow.custom_legalize(extractlane, "convert_extractlane");
narrow.custom_legalize(insertlane, "convert_insertlane"); narrow.custom_legalize(insertlane, "convert_insertlane");

View File

@@ -1639,6 +1639,20 @@ pub(crate) fn define(
.operands_out(vec![s]), .operands_out(vec![s]),
); );
ig.push(
Inst::new(
"vall_true",
r#"
Reduce a vector to a scalar boolean.
Return a scalar boolean true if all lanes in ``i`` are non-zero, false otherwise.
"#,
&formats.unary,
)
.operands_in(vec![a])
.operands_out(vec![s]),
);
let x = &operand("x", &TxN.lane_of()); let x = &operand("x", &TxN.lane_of());
ig.push( ig.push(

View File

@@ -17,3 +17,13 @@ ebb0(v0: b32x4):
; nextln: v1 = trueif ne v2 ; nextln: v1 = trueif ne v2
return v1 return v1
} }
function %vall_true_i64x2(i64x2) -> b1 {
ebb0(v0: i64x2):
v1 = vall_true v0
; check: v2 = vconst.i64x2 0x00
; nextln: v3 = icmp eq v0, v2
; nextln: v4 = x86_ptest v3, v3
; nextln: v1 = trueif eq v4
return v1
}

View File

@@ -39,3 +39,21 @@ ebb0:
return v3 return v3
} }
; run ; run
function %vall_true_i16x8() -> b1 {
ebb0:
v0 = vconst.i16x8 [1 0 0 0 0 0 0 0]
v1 = vall_true v0
v2 = bint.i32 v1
v3 = icmp_imm eq v2, 0
return v3
}
; run
function %vall_true_b32x4() -> b1 {
ebb0:
v0 = vconst.b32x4 [true true true true]
v1 = vall_true v0
return v1
}
; run