diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 680e0f0764..7a2d725030 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -464,6 +464,7 @@ pub(crate) fn define( let rotl_imm = shared.by_name("rotl_imm"); let rotr = shared.by_name("rotr"); let rotr_imm = shared.by_name("rotr_imm"); + let sadd_sat = shared.by_name("sadd_sat"); let safepoint = shared.by_name("safepoint"); let scalar_to_vector = shared.by_name("scalar_to_vector"); let selectif = shared.by_name("selectif"); @@ -490,6 +491,7 @@ pub(crate) fn define( let trueff = shared.by_name("trueff"); let trueif = shared.by_name("trueif"); let trunc = shared.by_name("trunc"); + let uadd_sat = shared.by_name("uadd_sat"); let uextend = shared.by_name("uextend"); let uload16 = shared.by_name("uload16"); let uload16_complex = shared.by_name("uload16_complex"); @@ -1939,6 +1941,24 @@ pub(crate) fn define( e.enc_32_64(iadd, rec_fa.opcodes(*opcodes)); } + // SIMD integer saturating addition + e.enc_32_64( + sadd_sat.bind_vector_from_lane(I8, sse_vector_size), + rec_fa.opcodes(&PADDSB), + ); + e.enc_32_64( + sadd_sat.bind_vector_from_lane(I16, sse_vector_size), + rec_fa.opcodes(&PADDSW), + ); + e.enc_32_64( + uadd_sat.bind_vector_from_lane(I8, sse_vector_size), + rec_fa.opcodes(&PADDUSB), + ); + e.enc_32_64( + uadd_sat.bind_vector_from_lane(I16, sse_vector_size), + rec_fa.opcodes(&PADDUSW), + ); + // SIMD integer subtraction for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] { let isub = isub.bind_vector_from_lane(ty.clone(), sse_vector_size); diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index 33b7d71c38..9d93f0cf14 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -251,6 +251,18 @@ pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4]; /// Add packed word integers from xmm2/m128 and xmm1 (SSE2). pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd]; +/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec]; + +/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed]; + +/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc]; + +/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd]; + /// Compare packed data for equal (SSE2). pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74]; diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 49c7900e9b..27eb3a3498 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -1690,6 +1690,38 @@ pub(crate) fn define( .operands_out(vec![a]), ); + ig.push( + Inst::new( + "uadd_sat", + r#" + Add with unsigned saturation. + + This is similar to `iadd` but the operands are interpreted as unsigned integers and their + summed result, instead of wrapping, will be saturated to the highest unsigned integer for + the controlling type (e.g. `0xFF` for i8). + "#, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "sadd_sat", + r#" + Add with signed saturation. + + This is similar to `iadd` but the operands are interpreted as signed integers and their + summed result, instead of wrapping, will be saturated to the lowest or highest + signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). For example, + since an `iadd_ssat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be + clamped to `0x7F`. + "#, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + ig.push( Inst::new( "isub", diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif index e2714a91dc..c82c5deb68 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif @@ -164,3 +164,29 @@ ebb0: return v4 } ; run + +function %sadd_sat_i8x16() -> b1 { +ebb0: +[-, %xmm2] v0 = vconst.i8x16 [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] +[-, %xmm3] v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + +[-, %xmm2] v2 = sadd_sat v0, v1 ; bin: 66 0f ec d3 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 127 + + return v4 +} +; run + +function %uadd_sat_i16x8() -> b1 { +ebb0: +[-, %xmm2] v0 = vconst.i16x8 [-1 0 0 0 0 0 0 0] +[-, %xmm3] v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] + +[-, %xmm2] v2 = uadd_sat v0, v1 ; bin: 66 0f dd d3 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 65535 + + return v4 +} +; run diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 8313212af9..85623f95fb 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1000,6 +1000,14 @@ pub fn translate_operator( let (a, b) = state.pop2(); state.push1(builder.ins().iadd(a, b)) } + Operator::I8x16AddSaturateS | Operator::I16x8AddSaturateS => { + let (a, b) = state.pop2(); + state.push1(builder.ins().sadd_sat(a, b)) + } + Operator::I8x16AddSaturateU | Operator::I16x8AddSaturateU => { + let (a, b) = state.pop2(); + state.push1(builder.ins().uadd_sat(a, b)) + } Operator::I8x16Sub | Operator::I16x8Sub | Operator::I32x4Sub | Operator::I64x2Sub => { let (a, b) = state.pop2(); state.push1(builder.ins().isub(a, b)) @@ -1064,8 +1072,6 @@ pub fn translate_operator( | Operator::I8x16Shl | Operator::I8x16ShrS | Operator::I8x16ShrU - | Operator::I8x16AddSaturateS - | Operator::I8x16AddSaturateU | Operator::I8x16SubSaturateS | Operator::I8x16SubSaturateU | Operator::I8x16Mul @@ -1074,8 +1080,6 @@ pub fn translate_operator( | Operator::I16x8Shl | Operator::I16x8ShrS | Operator::I16x8ShrU - | Operator::I16x8AddSaturateS - | Operator::I16x8AddSaturateU | Operator::I16x8SubSaturateS | Operator::I16x8SubSaturateU | Operator::I32x4AnyTrue