Add saturating subtraction with a SIMD encoding
This includes the new instructions `ssub_sat` and `usub_sat` and only encodes the i8x16 and i16x8 types; these are what is needed for implementing the SIMD spec (see https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#saturating-integer-subtraction).
This commit is contained in:
@@ -480,6 +480,7 @@ pub(crate) fn define(
|
|||||||
let sqrt = shared.by_name("sqrt");
|
let sqrt = shared.by_name("sqrt");
|
||||||
let sshr = shared.by_name("sshr");
|
let sshr = shared.by_name("sshr");
|
||||||
let sshr_imm = shared.by_name("sshr_imm");
|
let sshr_imm = shared.by_name("sshr_imm");
|
||||||
|
let ssub_sat = shared.by_name("ssub_sat");
|
||||||
let stack_addr = shared.by_name("stack_addr");
|
let stack_addr = shared.by_name("stack_addr");
|
||||||
let store = shared.by_name("store");
|
let store = shared.by_name("store");
|
||||||
let store_complex = shared.by_name("store_complex");
|
let store_complex = shared.by_name("store_complex");
|
||||||
@@ -501,6 +502,7 @@ pub(crate) fn define(
|
|||||||
let uload8_complex = shared.by_name("uload8_complex");
|
let uload8_complex = shared.by_name("uload8_complex");
|
||||||
let ushr = shared.by_name("ushr");
|
let ushr = shared.by_name("ushr");
|
||||||
let ushr_imm = shared.by_name("ushr_imm");
|
let ushr_imm = shared.by_name("ushr_imm");
|
||||||
|
let usub_sat = shared.by_name("usub_sat");
|
||||||
let vconst = shared.by_name("vconst");
|
let vconst = shared.by_name("vconst");
|
||||||
let x86_bsf = x86.by_name("x86_bsf");
|
let x86_bsf = x86.by_name("x86_bsf");
|
||||||
let x86_bsr = x86.by_name("x86_bsr");
|
let x86_bsr = x86.by_name("x86_bsr");
|
||||||
@@ -1965,6 +1967,24 @@ pub(crate) fn define(
|
|||||||
e.enc_32_64(isub, rec_fa.opcodes(*opcodes));
|
e.enc_32_64(isub, rec_fa.opcodes(*opcodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD integer saturating subtraction
|
||||||
|
e.enc_32_64(
|
||||||
|
ssub_sat.bind_vector_from_lane(I8, sse_vector_size),
|
||||||
|
rec_fa.opcodes(&PSUBSB),
|
||||||
|
);
|
||||||
|
e.enc_32_64(
|
||||||
|
ssub_sat.bind_vector_from_lane(I16, sse_vector_size),
|
||||||
|
rec_fa.opcodes(&PSUBSW),
|
||||||
|
);
|
||||||
|
e.enc_32_64(
|
||||||
|
usub_sat.bind_vector_from_lane(I8, sse_vector_size),
|
||||||
|
rec_fa.opcodes(&PSUBUSB),
|
||||||
|
);
|
||||||
|
e.enc_32_64(
|
||||||
|
usub_sat.bind_vector_from_lane(I16, sse_vector_size),
|
||||||
|
rec_fa.opcodes(&PSUBUSW),
|
||||||
|
);
|
||||||
|
|
||||||
// SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
|
// SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
|
||||||
// and I64x2 and these are (at the time of writing) not necessary for WASM SIMD.
|
// and I64x2 and these are (at the time of writing) not necessary for WASM SIMD.
|
||||||
for (ty, opcodes, isap) in &[
|
for (ty, opcodes, isap) in &[
|
||||||
|
|||||||
@@ -326,6 +326,22 @@ pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa];
|
|||||||
/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2).
|
/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2).
|
||||||
pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb];
|
pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb];
|
||||||
|
|
||||||
|
/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1
|
||||||
|
/// and saturate results (SSE2).
|
||||||
|
pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8];
|
||||||
|
|
||||||
|
/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1
|
||||||
|
/// and saturate results (SSE2).
|
||||||
|
pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9];
|
||||||
|
|
||||||
|
/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1
|
||||||
|
/// and saturate results (SSE2).
|
||||||
|
pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8];
|
||||||
|
|
||||||
|
/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1
|
||||||
|
/// and saturate results (SSE2).
|
||||||
|
pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
|
||||||
|
|
||||||
/// Push r{16,32,64}.
|
/// Push r{16,32,64}.
|
||||||
pub static PUSH_REG: [u8; 1] = [0x50];
|
pub static PUSH_REG: [u8; 1] = [0x50];
|
||||||
|
|
||||||
|
|||||||
@@ -1736,6 +1736,36 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"usub_sat",
|
||||||
|
r#"
|
||||||
|
Subtract with unsigned saturation.
|
||||||
|
|
||||||
|
This is similar to `isub` but the operands are interpreted as unsigned integers and their
|
||||||
|
difference, instead of wrapping, will be saturated to the lowest unsigned integer for
|
||||||
|
the controlling type (e.g. `0x00` for i8).
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"ssub_sat",
|
||||||
|
r#"
|
||||||
|
Subtract with signed saturation.
|
||||||
|
|
||||||
|
This is similar to `isub` but the operands are interpreted as signed integers and their
|
||||||
|
difference, instead of wrapping, will be saturated to the lowest or highest
|
||||||
|
signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8).
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
Inst::new(
|
Inst::new(
|
||||||
"ineg",
|
"ineg",
|
||||||
|
|||||||
@@ -190,3 +190,31 @@ ebb0:
|
|||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %sub_sat_i8x16() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
[-, %xmm2] v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 120 == 0x80 == -128
|
||||||
|
[-, %xmm3] v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
|
||||||
|
|
||||||
|
[-, %xmm2] v2 = ssub_sat v0, v1 ; bin: 66 0f e8 d3
|
||||||
|
v3 = extractlane v2, 0
|
||||||
|
v4 = icmp_imm eq v3, 0x80 ; still -128, TODO it's unclear why I can't use -128 here
|
||||||
|
|
||||||
|
; now re-use 0x80 as an unsigned 128
|
||||||
|
[-, %xmm2] v5 = usub_sat v0, v2 ; bin: 66 0f d8 d2
|
||||||
|
v6 = extractlane v5, 0
|
||||||
|
v7 = icmp_imm eq v6, 0
|
||||||
|
|
||||||
|
v8 = band v4, v7
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %sub_sat_i16x8() {
|
||||||
|
ebb0:
|
||||||
|
[-, %xmm3] v0 = vconst.i16x8 [0 0 0 0 0 0 0 0]
|
||||||
|
[-, %xmm5] v1 = vconst.i16x8 [1 1 1 1 1 1 1 1]
|
||||||
|
[-, %xmm3] v2 = ssub_sat v0, v1 ; bin: 66 0f e9 dd
|
||||||
|
[-, %xmm3] v3 = usub_sat v0, v1 ; bin: 66 0f d9 dd
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|||||||
@@ -1012,6 +1012,14 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
let (a, b) = state.pop2();
|
let (a, b) = state.pop2();
|
||||||
state.push1(builder.ins().isub(a, b))
|
state.push1(builder.ins().isub(a, b))
|
||||||
}
|
}
|
||||||
|
Operator::I8x16SubSaturateS | Operator::I16x8SubSaturateS => {
|
||||||
|
let (a, b) = state.pop2();
|
||||||
|
state.push1(builder.ins().ssub_sat(a, b))
|
||||||
|
}
|
||||||
|
Operator::I8x16SubSaturateU | Operator::I16x8SubSaturateU => {
|
||||||
|
let (a, b) = state.pop2();
|
||||||
|
state.push1(builder.ins().usub_sat(a, b))
|
||||||
|
}
|
||||||
Operator::I8x16Neg | Operator::I16x8Neg | Operator::I32x4Neg | Operator::I64x2Neg => {
|
Operator::I8x16Neg | Operator::I16x8Neg | Operator::I32x4Neg | Operator::I64x2Neg => {
|
||||||
let a = state.pop1();
|
let a = state.pop1();
|
||||||
state.push1(builder.ins().ineg(a))
|
state.push1(builder.ins().ineg(a))
|
||||||
@@ -1072,16 +1080,12 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
| Operator::I8x16Shl
|
| Operator::I8x16Shl
|
||||||
| Operator::I8x16ShrS
|
| Operator::I8x16ShrS
|
||||||
| Operator::I8x16ShrU
|
| Operator::I8x16ShrU
|
||||||
| Operator::I8x16SubSaturateS
|
|
||||||
| Operator::I8x16SubSaturateU
|
|
||||||
| Operator::I8x16Mul
|
| Operator::I8x16Mul
|
||||||
| Operator::I16x8AnyTrue
|
| Operator::I16x8AnyTrue
|
||||||
| Operator::I16x8AllTrue
|
| Operator::I16x8AllTrue
|
||||||
| Operator::I16x8Shl
|
| Operator::I16x8Shl
|
||||||
| Operator::I16x8ShrS
|
| Operator::I16x8ShrS
|
||||||
| Operator::I16x8ShrU
|
| Operator::I16x8ShrU
|
||||||
| Operator::I16x8SubSaturateS
|
|
||||||
| Operator::I16x8SubSaturateU
|
|
||||||
| Operator::I32x4AnyTrue
|
| Operator::I32x4AnyTrue
|
||||||
| Operator::I32x4AllTrue
|
| Operator::I32x4AllTrue
|
||||||
| Operator::I32x4Shl
|
| Operator::I32x4Shl
|
||||||
|
|||||||
Reference in New Issue
Block a user