diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 01f25d9d7f..b752b6bfae 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1940,6 +1940,12 @@ pub(crate) fn define( e.enc_32_64(iadd, rec_fa.opcodes(*opcodes)); } + // SIMD integer subtraction + for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] { + let isub = isub.bind_vector_from_lane(ty.clone(), sse_vector_size); + e.enc_32_64(isub, rec_fa.opcodes(*opcodes)); + } + // SIMD icmp using PCMPEQ* let mut pcmpeq_mapping: HashMap)> = HashMap::new(); pcmpeq_mapping.insert(8, (&PCMPEQB, None)); diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index 12b60e532b..6ae740883c 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -294,6 +294,18 @@ pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00]; /// store the result in xmm1 (SSE2). pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70]; +/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2). +pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8]; + +/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2). +pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9]; + +/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2). +pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa]; + +/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2). +pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb]; + /// Push r{16,32,64}. pub static PUSH_REG: [u8; 1] = [0x50]; diff --git a/cranelift/filetests/filetests/isa/x86/iadd-simd.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif similarity index 57% rename from cranelift/filetests/filetests/isa/x86/iadd-simd.clif rename to cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif index 0884a97f55..64b6e854e7 100644 --- a/cranelift/filetests/filetests/isa/x86/iadd-simd.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif @@ -48,3 +48,40 @@ ebb0(v0: i64x2 [%xmm3], v1: i64x2 [%xmm4]): [-, %xmm3] v2 = iadd v0, v1 ; bin: 66 0f d4 dc return v2 } + +function %isub_i32x4() -> b1 { +ebb0: +[-, %xmm3] v0 = vconst.i32x4 [1 1 1 1] +[-, %xmm5] v1 = vconst.i32x4 [1 2 3 4] +[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f fa dd + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0 + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0xffffffff + ; TODO replace extractlanes with vector comparison + + v7 = band v4, v6 + return v7 +} + +; run + +function %isub_i64x2(i64x2, i64x2) -> i64x2 { +ebb0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm1]): +[-, %xmm0] v2 = isub v0, v1 ; bin: 66 0f fb c1 + return v2 +} + +function %isub_i16x8(i16x8, i16x8) -> i16x8 { +ebb0(v0: i16x8 [%xmm3], v1: i16x8 [%xmm4]): +[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f f9 dc + return v2 +} + +function %isub_i8x16(i8x16, i8x16) -> i8x16 { +ebb0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm4]): +[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f f8 dc + return v2 +} diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 3c004e363c..9dbd9d7463 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1000,6 +1000,10 @@ pub fn translate_operator( let (a, b) = state.pop2(); state.push1(builder.ins().iadd(a, b)) } + Operator::I8x16Sub | Operator::I16x8Sub | Operator::I32x4Sub | Operator::I64x2Sub => { + let (a, b) = state.pop2(); + state.push1(builder.ins().isub(a, b)) + } Operator::I8x16Eq | Operator::I8x16Ne | Operator::I8x16LtS @@ -1055,7 +1059,6 @@ pub fn translate_operator( | Operator::I8x16ShrU | Operator::I8x16AddSaturateS | Operator::I8x16AddSaturateU - | Operator::I8x16Sub | Operator::I8x16SubSaturateS | Operator::I8x16SubSaturateU | Operator::I8x16Mul @@ -1067,7 +1070,6 @@ pub fn translate_operator( | Operator::I16x8ShrU | Operator::I16x8AddSaturateS | Operator::I16x8AddSaturateU - | Operator::I16x8Sub | Operator::I16x8SubSaturateS | Operator::I16x8SubSaturateU | Operator::I16x8Mul @@ -1077,7 +1079,6 @@ pub fn translate_operator( | Operator::I32x4Shl | Operator::I32x4ShrS | Operator::I32x4ShrU - | Operator::I32x4Sub | Operator::I32x4Mul | Operator::I64x2Neg | Operator::I64x2AnyTrue @@ -1085,7 +1086,6 @@ pub fn translate_operator( | Operator::I64x2Shl | Operator::I64x2ShrS | Operator::I64x2ShrU - | Operator::I64x2Sub | Operator::F32x4Abs | Operator::F32x4Neg | Operator::F32x4Sqrt