Add x86 implementation of SIMD swizzle instruction
This commit is contained in:
@@ -338,7 +338,9 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||||||
let splat = insts.by_name("splat");
|
let splat = insts.by_name("splat");
|
||||||
let shuffle = insts.by_name("shuffle");
|
let shuffle = insts.by_name("shuffle");
|
||||||
let sshr = insts.by_name("sshr");
|
let sshr = insts.by_name("sshr");
|
||||||
|
let swizzle = insts.by_name("swizzle");
|
||||||
let trueif = insts.by_name("trueif");
|
let trueif = insts.by_name("trueif");
|
||||||
|
let uadd_sat = insts.by_name("uadd_sat");
|
||||||
let umax = insts.by_name("umax");
|
let umax = insts.by_name("umax");
|
||||||
let umin = insts.by_name("umin");
|
let umin = insts.by_name("umin");
|
||||||
let ushr_imm = insts.by_name("ushr_imm");
|
let ushr_imm = insts.by_name("ushr_imm");
|
||||||
@@ -375,6 +377,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||||||
let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
|
let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
|
||||||
let u128_zeroes = constant(vec![0x00; 16]);
|
let u128_zeroes = constant(vec![0x00; 16]);
|
||||||
let u128_ones = constant(vec![0xff; 16]);
|
let u128_ones = constant(vec![0xff; 16]);
|
||||||
|
let u128_seventies = constant(vec![0x70; 16]);
|
||||||
let a = var("a");
|
let a = var("a");
|
||||||
let b = var("b");
|
let b = var("b");
|
||||||
let c = var("c");
|
let c = var("c");
|
||||||
@@ -459,6 +462,21 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring
|
||||||
|
// mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion,
|
||||||
|
// see https://github.com/WebAssembly/simd/issues/93.
|
||||||
|
{
|
||||||
|
let swizzle = swizzle.bind(vector(I8, sse_vector_size));
|
||||||
|
narrow.legalize(
|
||||||
|
def!(a = swizzle(x, y)),
|
||||||
|
vec![
|
||||||
|
def!(b = vconst(u128_seventies)),
|
||||||
|
def!(c = uadd_sat(y, b)),
|
||||||
|
def!(a = x86_pshufb(x, c)),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// SIMD bnot
|
// SIMD bnot
|
||||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
let bnot = bnot.bind(vector(ty, sse_vector_size));
|
let bnot = bnot.bind(vector(ty, sse_vector_size));
|
||||||
|
|||||||
@@ -517,7 +517,36 @@ fn define_simd_lane_access(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
let x = &Operand::new("x", TxN).with_doc("SIMD vector to modify");
|
let I8x16 = &TypeVar::new(
|
||||||
|
"I8x16",
|
||||||
|
"A SIMD vector type consisting of 16 lanes of 8-bit integers",
|
||||||
|
TypeSetBuilder::new()
|
||||||
|
.ints(8..8)
|
||||||
|
.simd_lanes(16..16)
|
||||||
|
.includes_scalars(false)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
let x = &Operand::new("x", I8x16).with_doc("Vector to modify by re-arranging lanes");
|
||||||
|
let y = &Operand::new("y", I8x16).with_doc("Mask for re-arranging lanes");
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"swizzle",
|
||||||
|
r#"
|
||||||
|
Vector swizzle.
|
||||||
|
|
||||||
|
Returns a new vector with byte-width lanes selected from the lanes of the first input
|
||||||
|
vector ``x`` specified in the second input vector ``s``. The indices ``i`` in range
|
||||||
|
``[0, 15]`` select the ``i``-th element of ``x``. For indices outside of the range the
|
||||||
|
resulting lane is 0. Note that this operates on byte-width lanes.
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
|
let x = &Operand::new("x", TxN).with_doc("The vector to modify");
|
||||||
let y = &Operand::new("y", &TxN.lane_of()).with_doc("New lane value");
|
let y = &Operand::new("y", &TxN.lane_of()).with_doc("New lane value");
|
||||||
let Idx = &Operand::new("Idx", &imm.uimm8).with_doc("Lane index");
|
let Idx = &Operand::new("Idx", &imm.uimm8).with_doc("Lane index");
|
||||||
|
|
||||||
|
|||||||
@@ -83,3 +83,14 @@ block0:
|
|||||||
; nextln: v4 = vconst.i8x16 0x00
|
; nextln: v4 = vconst.i8x16 0x00
|
||||||
; nextln: v1 = x86_pshufb v3, v4
|
; nextln: v1 = x86_pshufb v3, v4
|
||||||
; nextln: return v1
|
; nextln: return v1
|
||||||
|
|
||||||
|
function %swizzle() -> i8x16 {
|
||||||
|
block0:
|
||||||
|
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
||||||
|
v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
||||||
|
v2 = swizzle.i8x16 v0, v1
|
||||||
|
; check: v3 = vconst.i8x16 0x70707070707070707070707070707070
|
||||||
|
; nextln: v4 = uadd_sat v1, v3
|
||||||
|
; nextln: v2 = x86_pshufb v0, v4
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|||||||
@@ -165,3 +165,29 @@ block0:
|
|||||||
return v8
|
return v8
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %swizzle() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
||||||
|
v1 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 42]
|
||||||
|
v2 = swizzle.i8x16 v0, v1 ; reverse the lanes, with over-large index 42 using lane 0
|
||||||
|
|
||||||
|
v3 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
|
||||||
|
v4 = icmp eq v2, v3
|
||||||
|
v5 = vall_true v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
; run:
|
||||||
|
|
||||||
|
function %swizzle_with_overflow() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
||||||
|
v1 = vconst.i8x16 [16 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
v2 = swizzle.i8x16 v0, v1 ; 250 should overflow but saturate so that the MSB is set (PSHUFB uses this to shuffle from lane 0)
|
||||||
|
|
||||||
|
v3 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
v4 = icmp eq v2, v3
|
||||||
|
v5 = vall_true v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
; run:
|
||||||
|
|||||||
Reference in New Issue
Block a user