diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index f24232f39a..81e2e48c58 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1560,6 +1560,7 @@ fn define_simd( let formats = &shared_defs.formats; // Shorthands for instructions. + let avg_round = shared.by_name("avg_round"); let bitcast = shared.by_name("bitcast"); let bor = shared.by_name("bor"); let bxor = shared.by_name("bxor"); @@ -1926,6 +1927,12 @@ fn define_simd( e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap); } + // SIMD integer average with rounding. + for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] { + let avgr = avg_round.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(avgr, rec_fa.opcodes(opcodes)); + } + // SIMD logical operations let band = shared.by_name("band"); let band_not = shared.by_name("band_not"); diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index 9006ce92cf..5bd4153414 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -317,6 +317,12 @@ pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb]; /// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2). pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf]; +/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2). +pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0]; + +/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2). +pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3]; + /// Compare packed data for equal (SSE2). pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74]; diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index d9ff3f04e3..471ad85a56 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -548,6 +548,32 @@ fn define_simd_arithmetic( .operands_in(vec![x, y]) .operands_out(vec![a]), ); + + let IxN = &TypeVar::new( + "IxN", + "A SIMD vector type containing integers", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + + let a = &Operand::new("a", IxN); + let x = &Operand::new("x", IxN); + let y = &Operand::new("y", IxN); + + ig.push( + Inst::new( + "avg_round", + r#" + Unsigned average with rounding: `a := (x + y + 1) // 2` + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); } #[allow(clippy::many_single_char_names)] @@ -627,7 +653,6 @@ pub(crate) fn define( .includes_scalars(false) .build(), ); - let Any = &TypeVar::new( "Any", "Any integer, float, boolean, or reference scalar or vector type", diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif index 9f5b4f0080..85797d9a4b 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif @@ -197,3 +197,15 @@ block0(v0: f64x2 [%xmm3], v1: f64x2 [%xmm5]): [-, %xmm3] v8 = sqrt v0 ; bin: 66 40 0f 51 db return } + +function %average_rounding_i8x16(i8x16, i8x16) { +block0(v0: i8x16 [%xmm6], v1: i8x16 [%xmm2]): +[-, %xmm6] v2 = avg_round v0, v1 ; bin: 66 0f e0 f2 + return +} + +function %average_rounding_i16x8(i16x8, i16x8) { +block0(v0: i16x8 [%xmm6], v1: i16x8 [%xmm2]): +[-, %xmm6] v2 = avg_round v0, v1 ; bin: 66 0f e3 f2 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif index 971f5c9bdb..3403815154 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif @@ -265,3 +265,17 @@ block0: return v4 } ; run + +function %average_rounding_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [0 0 0 1 42 19 -1 -1] + v1 = vconst.i16x8 [0 1 2 4 42 18 -1 0] + v2 = vconst.i16x8 [0 1 1 3 42 19 -1 -32768] ; -1 (0xffff) + 0 + 1 == -32768 (0x8000) + + v3 = avg_round v0, v1 + v4 = icmp eq v2, v3 + v5 = vall_true v4 + + return v5 +} +; run