Add saturating addition with a SIMD encoding
This includes the new instructions `sadd_sat` and `uadd_sat` and only encodes the i8x16 and i16x8 types; these are what is needed for implementing the SIMD spec (see https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#saturating-integer-addition).
This commit is contained in:
@@ -464,6 +464,7 @@ pub(crate) fn define(
|
|||||||
let rotl_imm = shared.by_name("rotl_imm");
|
let rotl_imm = shared.by_name("rotl_imm");
|
||||||
let rotr = shared.by_name("rotr");
|
let rotr = shared.by_name("rotr");
|
||||||
let rotr_imm = shared.by_name("rotr_imm");
|
let rotr_imm = shared.by_name("rotr_imm");
|
||||||
|
let sadd_sat = shared.by_name("sadd_sat");
|
||||||
let safepoint = shared.by_name("safepoint");
|
let safepoint = shared.by_name("safepoint");
|
||||||
let scalar_to_vector = shared.by_name("scalar_to_vector");
|
let scalar_to_vector = shared.by_name("scalar_to_vector");
|
||||||
let selectif = shared.by_name("selectif");
|
let selectif = shared.by_name("selectif");
|
||||||
@@ -490,6 +491,7 @@ pub(crate) fn define(
|
|||||||
let trueff = shared.by_name("trueff");
|
let trueff = shared.by_name("trueff");
|
||||||
let trueif = shared.by_name("trueif");
|
let trueif = shared.by_name("trueif");
|
||||||
let trunc = shared.by_name("trunc");
|
let trunc = shared.by_name("trunc");
|
||||||
|
let uadd_sat = shared.by_name("uadd_sat");
|
||||||
let uextend = shared.by_name("uextend");
|
let uextend = shared.by_name("uextend");
|
||||||
let uload16 = shared.by_name("uload16");
|
let uload16 = shared.by_name("uload16");
|
||||||
let uload16_complex = shared.by_name("uload16_complex");
|
let uload16_complex = shared.by_name("uload16_complex");
|
||||||
@@ -1939,6 +1941,24 @@ pub(crate) fn define(
|
|||||||
e.enc_32_64(iadd, rec_fa.opcodes(*opcodes));
|
e.enc_32_64(iadd, rec_fa.opcodes(*opcodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD integer saturating addition
|
||||||
|
e.enc_32_64(
|
||||||
|
sadd_sat.bind_vector_from_lane(I8, sse_vector_size),
|
||||||
|
rec_fa.opcodes(&PADDSB),
|
||||||
|
);
|
||||||
|
e.enc_32_64(
|
||||||
|
sadd_sat.bind_vector_from_lane(I16, sse_vector_size),
|
||||||
|
rec_fa.opcodes(&PADDSW),
|
||||||
|
);
|
||||||
|
e.enc_32_64(
|
||||||
|
uadd_sat.bind_vector_from_lane(I8, sse_vector_size),
|
||||||
|
rec_fa.opcodes(&PADDUSB),
|
||||||
|
);
|
||||||
|
e.enc_32_64(
|
||||||
|
uadd_sat.bind_vector_from_lane(I16, sse_vector_size),
|
||||||
|
rec_fa.opcodes(&PADDUSW),
|
||||||
|
);
|
||||||
|
|
||||||
// SIMD integer subtraction
|
// SIMD integer subtraction
|
||||||
for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] {
|
for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] {
|
||||||
let isub = isub.bind_vector_from_lane(ty.clone(), sse_vector_size);
|
let isub = isub.bind_vector_from_lane(ty.clone(), sse_vector_size);
|
||||||
|
|||||||
@@ -251,6 +251,18 @@ pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4];
|
|||||||
/// Add packed word integers from xmm2/m128 and xmm1 (SSE2).
|
/// Add packed word integers from xmm2/m128 and xmm1 (SSE2).
|
||||||
pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd];
|
pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd];
|
||||||
|
|
||||||
|
/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||||
|
pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec];
|
||||||
|
|
||||||
|
/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||||
|
pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed];
|
||||||
|
|
||||||
|
/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||||
|
pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
|
||||||
|
|
||||||
|
/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||||
|
pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
|
||||||
|
|
||||||
/// Compare packed data for equal (SSE2).
|
/// Compare packed data for equal (SSE2).
|
||||||
pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
|
pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
|
||||||
|
|
||||||
|
|||||||
@@ -1690,6 +1690,38 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"uadd_sat",
|
||||||
|
r#"
|
||||||
|
Add with unsigned saturation.
|
||||||
|
|
||||||
|
This is similar to `iadd` but the operands are interpreted as unsigned integers and their
|
||||||
|
summed result, instead of wrapping, will be saturated to the highest unsigned integer for
|
||||||
|
the controlling type (e.g. `0xFF` for i8).
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"sadd_sat",
|
||||||
|
r#"
|
||||||
|
Add with signed saturation.
|
||||||
|
|
||||||
|
This is similar to `iadd` but the operands are interpreted as signed integers and their
|
||||||
|
summed result, instead of wrapping, will be saturated to the lowest or highest
|
||||||
|
signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). For example,
|
||||||
|
since an `iadd_ssat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be
|
||||||
|
clamped to `0x7F`.
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
Inst::new(
|
Inst::new(
|
||||||
"isub",
|
"isub",
|
||||||
|
|||||||
@@ -164,3 +164,29 @@ ebb0:
|
|||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %sadd_sat_i8x16() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
[-, %xmm2] v0 = vconst.i8x16 [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
[-, %xmm3] v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
|
||||||
|
|
||||||
|
[-, %xmm2] v2 = sadd_sat v0, v1 ; bin: 66 0f ec d3
|
||||||
|
v3 = extractlane v2, 0
|
||||||
|
v4 = icmp_imm eq v3, 127
|
||||||
|
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %uadd_sat_i16x8() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
[-, %xmm2] v0 = vconst.i16x8 [-1 0 0 0 0 0 0 0]
|
||||||
|
[-, %xmm3] v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1]
|
||||||
|
|
||||||
|
[-, %xmm2] v2 = uadd_sat v0, v1 ; bin: 66 0f dd d3
|
||||||
|
v3 = extractlane v2, 0
|
||||||
|
v4 = icmp_imm eq v3, 65535
|
||||||
|
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|||||||
@@ -1000,6 +1000,14 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
let (a, b) = state.pop2();
|
let (a, b) = state.pop2();
|
||||||
state.push1(builder.ins().iadd(a, b))
|
state.push1(builder.ins().iadd(a, b))
|
||||||
}
|
}
|
||||||
|
Operator::I8x16AddSaturateS | Operator::I16x8AddSaturateS => {
|
||||||
|
let (a, b) = state.pop2();
|
||||||
|
state.push1(builder.ins().sadd_sat(a, b))
|
||||||
|
}
|
||||||
|
Operator::I8x16AddSaturateU | Operator::I16x8AddSaturateU => {
|
||||||
|
let (a, b) = state.pop2();
|
||||||
|
state.push1(builder.ins().uadd_sat(a, b))
|
||||||
|
}
|
||||||
Operator::I8x16Sub | Operator::I16x8Sub | Operator::I32x4Sub | Operator::I64x2Sub => {
|
Operator::I8x16Sub | Operator::I16x8Sub | Operator::I32x4Sub | Operator::I64x2Sub => {
|
||||||
let (a, b) = state.pop2();
|
let (a, b) = state.pop2();
|
||||||
state.push1(builder.ins().isub(a, b))
|
state.push1(builder.ins().isub(a, b))
|
||||||
@@ -1064,8 +1072,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
| Operator::I8x16Shl
|
| Operator::I8x16Shl
|
||||||
| Operator::I8x16ShrS
|
| Operator::I8x16ShrS
|
||||||
| Operator::I8x16ShrU
|
| Operator::I8x16ShrU
|
||||||
| Operator::I8x16AddSaturateS
|
|
||||||
| Operator::I8x16AddSaturateU
|
|
||||||
| Operator::I8x16SubSaturateS
|
| Operator::I8x16SubSaturateS
|
||||||
| Operator::I8x16SubSaturateU
|
| Operator::I8x16SubSaturateU
|
||||||
| Operator::I8x16Mul
|
| Operator::I8x16Mul
|
||||||
@@ -1074,8 +1080,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
| Operator::I16x8Shl
|
| Operator::I16x8Shl
|
||||||
| Operator::I16x8ShrS
|
| Operator::I16x8ShrS
|
||||||
| Operator::I16x8ShrU
|
| Operator::I16x8ShrU
|
||||||
| Operator::I16x8AddSaturateS
|
|
||||||
| Operator::I16x8AddSaturateU
|
|
||||||
| Operator::I16x8SubSaturateS
|
| Operator::I16x8SubSaturateS
|
||||||
| Operator::I16x8SubSaturateU
|
| Operator::I16x8SubSaturateU
|
||||||
| Operator::I32x4AnyTrue
|
| Operator::I32x4AnyTrue
|
||||||
|
|||||||
Reference in New Issue
Block a user