Add unarrow instruction with x86 implementation
Adds a shared `unarrow` instruction in order to lower the Wasm SIMD specification's unsigned narrowing (see https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#integer-to-integer-narrowing). Additionally, this commit implements the instruction for x86 using PACKUSWB and PACKUSDW for the applicable encodings.
This commit is contained in:
@@ -1677,6 +1677,7 @@ fn define_simd(
|
|||||||
let uload32x2 = shared.by_name("uload32x2");
|
let uload32x2 = shared.by_name("uload32x2");
|
||||||
let uload32x2_complex = shared.by_name("uload32x2_complex");
|
let uload32x2_complex = shared.by_name("uload32x2_complex");
|
||||||
let snarrow = shared.by_name("snarrow");
|
let snarrow = shared.by_name("snarrow");
|
||||||
|
let unarrow = shared.by_name("unarrow");
|
||||||
let ushr_imm = shared.by_name("ushr_imm");
|
let ushr_imm = shared.by_name("ushr_imm");
|
||||||
let usub_sat = shared.by_name("usub_sat");
|
let usub_sat = shared.by_name("usub_sat");
|
||||||
let vconst = shared.by_name("vconst");
|
let vconst = shared.by_name("vconst");
|
||||||
@@ -1904,6 +1905,13 @@ fn define_simd(
|
|||||||
let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
|
let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
|
e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
|
||||||
}
|
}
|
||||||
|
for (ty, opcodes, isap) in &[
|
||||||
|
(I16, &PACKUSWB[..], None),
|
||||||
|
(I32, &PACKUSDW[..], Some(use_sse41_simd)),
|
||||||
|
] {
|
||||||
|
let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
|
||||||
|
e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
|
||||||
|
}
|
||||||
|
|
||||||
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
|
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
|
||||||
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
|
|||||||
@@ -314,7 +314,7 @@ pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e];
|
|||||||
/// xmm1 (SSSE3).
|
/// xmm1 (SSSE3).
|
||||||
pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];
|
pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];
|
||||||
|
|
||||||
/// Converts 8 packed signed word integers from xmm1 and from xxm2/m128 into 16 packed signed byte
|
/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte
|
||||||
/// integers in xmm1 using signed saturation (SSE2).
|
/// integers in xmm1 using signed saturation (SSE2).
|
||||||
pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
|
pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
|
||||||
|
|
||||||
@@ -322,6 +322,14 @@ pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
|
|||||||
/// word integers in xmm1 using signed saturation (SSE2).
|
/// word integers in xmm1 using signed saturation (SSE2).
|
||||||
pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b];
|
pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b];
|
||||||
|
|
||||||
|
/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte
|
||||||
|
/// integers in xmm1 using unsigned saturation (SSE2).
|
||||||
|
pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67];
|
||||||
|
|
||||||
|
/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed
|
||||||
|
/// word integers in xmm1 using unsigned saturation (SSE4.1).
|
||||||
|
pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b];
|
||||||
|
|
||||||
/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2).
|
/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2).
|
||||||
pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc];
|
pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc];
|
||||||
|
|
||||||
|
|||||||
@@ -3914,6 +3914,26 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"unarrow",
|
||||||
|
r#"
|
||||||
|
Combine `x` and `y` into a vector with twice the lanes but half the integer width while
|
||||||
|
saturating overflowing values to the unsigned maximum and minimum.
|
||||||
|
|
||||||
|
Note that all input lanes are considered signed: any negative lanes will overflow and be
|
||||||
|
replaced with the unsigned minimum, `0x00`.
|
||||||
|
|
||||||
|
The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
|
||||||
|
and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
|
||||||
|
returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
let IntTo = &TypeVar::new(
|
let IntTo = &TypeVar::new(
|
||||||
"IntTo",
|
"IntTo",
|
||||||
"A larger integer type with the same number of lanes",
|
"A larger integer type with the same number of lanes",
|
||||||
|
|||||||
@@ -2070,7 +2070,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
|
|
||||||
Opcode::AvgRound => unimplemented!(),
|
Opcode::AvgRound => unimplemented!(),
|
||||||
Opcode::Iabs => unimplemented!(),
|
Opcode::Iabs => unimplemented!(),
|
||||||
Opcode::Snarrow => unimplemented!(),
|
Opcode::Snarrow | Opcode::Unarrow => unimplemented!(),
|
||||||
Opcode::TlsValue => unimplemented!(),
|
Opcode::TlsValue => unimplemented!(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -118,8 +118,9 @@ block0(v0: i32x4 [%xmm7], v1: i32x4 [%xmm6]):
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
function %snarrow_i16x8(i16x8, i16x8) {
|
function %narrowing_i16x8(i16x8, i16x8) {
|
||||||
block0(v0: i16x8 [%xmm7], v1: i16x8 [%xmm8]):
|
block0(v0: i16x8 [%xmm7], v1: i16x8 [%xmm8]):
|
||||||
[-, %xmm7] v2 = snarrow v0, v1 ; bin: 66 41 0f 63 f8
|
[-, %xmm7] v2 = snarrow v0, v1 ; bin: 66 41 0f 63 f8
|
||||||
|
[-, %xmm7] v3 = unarrow v0, v1 ; bin: 66 41 0f 67 f8
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -212,3 +212,10 @@ block0(v0: i32x4, v1: i32x4):
|
|||||||
return v2
|
return v2
|
||||||
}
|
}
|
||||||
; run: %snarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 -1 0x7fff 4 5 -6 0xffff]
|
; run: %snarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 -1 0x7fff 4 5 -6 0xffff]
|
||||||
|
|
||||||
|
function %unarrow(i32x4, i32x4) -> i16x8 {
|
||||||
|
block0(v0: i32x4, v1: i32x4):
|
||||||
|
v2 = unarrow v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %unarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 0 0xffff 4 5 0 0]
|
||||||
|
|||||||
Reference in New Issue
Block a user