diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 6bcc2b94f4..f38e4249bf 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -493,8 +493,8 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro ); } - // SIMD shift right (arithmetic) - for ty in &[I16, I32, I64] { + // SIMD shift right (arithmetic, i16x8 and i32x4) + for ty in &[I16, I32] { let sshr = sshr.bind(vector(*ty, sse_vector_size)); let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size)); narrow.legalize( @@ -502,6 +502,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))], ); } + // SIMD shift right (arithmetic, i8x16) { let sshr = sshr.bind(vector(I8, sse_vector_size)); let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size)); @@ -526,6 +527,25 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro ], ); } + // SIMD shift right (arithmetic, i64x2) + { + let sshr_vector = sshr.bind(vector(I64, sse_vector_size)); + let sshr_scalar_lane0 = sshr.bind(I64); + let sshr_scalar_lane1 = sshr.bind(I64); + narrow.legalize( + def!(z = sshr_vector(x, y)), + vec![ + // Use scalar operations to shift the first lane. + def!(a = extractlane(x, uimm8_zero)), + def!(b = sshr_scalar_lane0(a, y)), + def!(c = insertlane(x, uimm8_zero, b)), + // Do the same for the second lane. + def!(d = extractlane(x, uimm8_one)), + def!(e = sshr_scalar_lane1(d, y)), + def!(z = insertlane(c, uimm8_one, e)), + ], + ); + } // SIMD select for ty in ValueType::all_lane_types().filter(allowed_simd_type) { diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif index 0b14984ed6..102719351b 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif @@ -84,6 +84,20 @@ block0: return v2 } +function %sshr_i64x2() -> i64x2 { +block0: + v0 = iconst.i32 1 + v1 = vconst.i64x2 [1 2] + v2 = sshr v1, v0 + ; check: v3 = x86_pextr v1, 0 + ; nextln: v4 = sshr v3, v0 + ; nextln: v5 = x86_pinsr v1, 0, v4 + ; nextln: v6 = x86_pextr v1, 1 + ; nextln: v7 = sshr v6, v0 + ; nextln: v2 = x86_pinsr v5, 1, v7 + return v2 +} + function %bitselect_i16x8() -> i16x8 { block0: v0 = vconst.i16x8 [0 0 0 0 0 0 0 0] diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif index cceb63cddf..0f6ba31ed8 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif @@ -145,6 +145,16 @@ block0: } ; run +function %sshr_i64x2(i64x2, i32) -> i64x2 { +block0(v0:i64x2, v1:i32): + v2 = sshr v0, v1 + return v2 +} +; run: %sshr_i64x2([1 -1], 0) == [1 -1] +; run: %sshr_i64x2([1 -1], 1) == [0 -1] ; note the -1 shift result +; run: %sshr_i64x2([2 -2], 1) == [1 -1] +; run: %sshr_i64x2([0x80000000_00000000 0x7FFFFFFF_FFFFFFFF], 63) == [0xFFFFFFFF_FFFFFFFF 0] + function %bitselect_i8x16() -> b1 { block0: v0 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255] ; the selector vector