Add saturating addition with a SIMD encoding
This includes the new instructions `sadd_sat` and `uadd_sat` and only encodes the i8x16 and i16x8 types; these are what is needed for implementing the SIMD spec (see https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#saturating-integer-addition).
This commit is contained in:
@@ -464,6 +464,7 @@ pub(crate) fn define(
|
||||
let rotl_imm = shared.by_name("rotl_imm");
|
||||
let rotr = shared.by_name("rotr");
|
||||
let rotr_imm = shared.by_name("rotr_imm");
|
||||
let sadd_sat = shared.by_name("sadd_sat");
|
||||
let safepoint = shared.by_name("safepoint");
|
||||
let scalar_to_vector = shared.by_name("scalar_to_vector");
|
||||
let selectif = shared.by_name("selectif");
|
||||
@@ -490,6 +491,7 @@ pub(crate) fn define(
|
||||
let trueff = shared.by_name("trueff");
|
||||
let trueif = shared.by_name("trueif");
|
||||
let trunc = shared.by_name("trunc");
|
||||
let uadd_sat = shared.by_name("uadd_sat");
|
||||
let uextend = shared.by_name("uextend");
|
||||
let uload16 = shared.by_name("uload16");
|
||||
let uload16_complex = shared.by_name("uload16_complex");
|
||||
@@ -1939,6 +1941,24 @@ pub(crate) fn define(
|
||||
e.enc_32_64(iadd, rec_fa.opcodes(*opcodes));
|
||||
}
|
||||
|
||||
// SIMD integer saturating addition
|
||||
e.enc_32_64(
|
||||
sadd_sat.bind_vector_from_lane(I8, sse_vector_size),
|
||||
rec_fa.opcodes(&PADDSB),
|
||||
);
|
||||
e.enc_32_64(
|
||||
sadd_sat.bind_vector_from_lane(I16, sse_vector_size),
|
||||
rec_fa.opcodes(&PADDSW),
|
||||
);
|
||||
e.enc_32_64(
|
||||
uadd_sat.bind_vector_from_lane(I8, sse_vector_size),
|
||||
rec_fa.opcodes(&PADDUSB),
|
||||
);
|
||||
e.enc_32_64(
|
||||
uadd_sat.bind_vector_from_lane(I16, sse_vector_size),
|
||||
rec_fa.opcodes(&PADDUSW),
|
||||
);
|
||||
|
||||
// SIMD integer subtraction
|
||||
for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] {
|
||||
let isub = isub.bind_vector_from_lane(ty.clone(), sse_vector_size);
|
||||
|
||||
@@ -251,6 +251,18 @@ pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4];
|
||||
/// Add packed word integers from xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd];
|
||||
|
||||
/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec];
|
||||
|
||||
/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed];
|
||||
|
||||
/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
|
||||
|
||||
/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
|
||||
|
||||
/// Compare packed data for equal (SSE2).
|
||||
pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
|
||||
|
||||
|
||||
Reference in New Issue
Block a user