diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 0f380ccbfe..002963035f 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -553,6 +553,7 @@ pub(crate) fn define( let rec_copysp = r.template("copysp"); let rec_div = r.template("div"); let rec_debugtrap = r.recipe("debugtrap"); + let rec_f_ib = r.template("f_ib"); let rec_f32imm_z = r.template("f32imm_z"); let rec_f64imm_z = r.template("f64imm_z"); let rec_fa = r.template("fa"); @@ -2033,6 +2034,18 @@ pub(crate) fn define( e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes)); } + // SIMD immediate shift + for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] { + let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6)); + + let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2)); + + let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4)); + } + // SIMD integer comparisons { use IntCC::*; diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index a0d9c8d9c6..9006ce92cf 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -431,6 +431,18 @@ pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00]; /// store the result in xmm1 (SSE2). pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70]; +/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR +/// digit used in the ModR/M byte (SSE2). +pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71]; + +/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR +/// digit used in the ModR/M byte (SSE2). +pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72]; + +/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR +/// digit used in the ModR/M byte (SSE2). +pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73]; + /// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1]; diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index 6fcd0b4564..8d271238ea 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -792,6 +792,26 @@ pub(crate) fn define<'shared>( ), ); + recipes.add_template_recipe( + EncodingRecipeBuilder::new("f_ib", &formats.binary_imm, 2) + .operands_in(vec![fpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.binary_imm, + "imm", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + ); + // XX /n id with 32-bit immediate sign-extended. recipes.add_template_recipe( EncodingRecipeBuilder::new("r_id", &formats.binary_imm, 5) diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif index 2a6530f7b5..af8796863c 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif @@ -49,3 +49,57 @@ ebb0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): [-, %xmm4] v2 = x86_psra v0, v1 ; bin: 66 0f e2 e0 return v2 } + +function %ishl_imm_i16x8(i16x8) -> i16x8 { +ebb0(v0: i16x8 [%xmm2]): +[-, %xmm2] v2 = ishl_imm v0, 3 ; bin: 66 0f 71 f2 03 + return v2 +} + +function %ishl_imm_i32x4(i32x4) -> i32x4 { +ebb0(v0: i32x4 [%xmm4]): +[-, %xmm4] v2 = ishl_imm v0, 10 ; bin: 66 0f 72 f4 0a + return v2 +} + +function %ishl_imm_i64x2(i64x2) -> i64x2 { +ebb0(v0: i64x2 [%xmm6]): +[-, %xmm6] v2 = ishl_imm v0, 42 ; bin: 66 0f 73 f6 2a + return v2 +} + +function %ushr_imm_i16x8(i16x8) -> i16x8 { +ebb0(v0: i16x8 [%xmm2]): +[-, %xmm2] v2 = ushr_imm v0, 3 ; bin: 66 0f 71 d2 03 + return v2 +} + +function %ushr_imm_i32x4(i32x4) -> i32x4 { +ebb0(v0: i32x4 [%xmm4]): +[-, %xmm4] v2 = ushr_imm v0, 10 ; bin: 66 0f 72 d4 0a + return v2 +} + +function %ushr_imm_i64x2(i64x2) -> i64x2 { +ebb0(v0: i64x2 [%xmm6]): +[-, %xmm6] v2 = ushr_imm v0, 42 ; bin: 66 0f 73 d6 2a + return v2 +} + +function %sshr_imm_i16x8(i16x8) -> i16x8 { +ebb0(v0: i16x8 [%xmm2]): +[-, %xmm2] v2 = sshr_imm v0, 3 ; bin: 66 0f 71 e2 03 + return v2 +} + +function %sshr_imm_i32x4(i32x4) -> i32x4 { +ebb0(v0: i32x4 [%xmm4]): +[-, %xmm4] v2 = sshr_imm v0, 10 ; bin: 66 0f 72 e4 0a + return v2 +} + +function %sshr_imm_i64x2(i64x2) -> i64x2 { +ebb0(v0: i64x2 [%xmm6]): +[-, %xmm6] v2 = sshr_imm v0, 42 ; bin: 66 0f 73 e6 2a + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif index 0c6eac6a10..8ca92a756f 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif @@ -127,3 +127,39 @@ ebb0: return v11 } ; run + +function %sshr_imm_i32x4() -> b1 { +ebb0: + v1 = vconst.i32x4 [1 2 4 -8] + v2 = sshr_imm v1, 1 + + v3 = vconst.i32x4 [0 1 2 -4] + v4 = icmp eq v2, v3 + v5 = vall_true v4 + return v5 +} +; run + +function %sshr_imm_i16x8() -> b1 { +ebb0: + v1 = vconst.i16x8 [1 2 4 -8 0 0 0 0] + v2 = ushr_imm v1, 1 + + v3 = vconst.i16x8 [0 1 2 32764 0 0 0 0] ; -4 with MSB unset == 32764 + v4 = icmp eq v2, v3 + v5 = vall_true v4 + return v5 +} +; run + +function %ishl_imm_i64x2() -> b1 { +ebb0: + v1 = vconst.i64x2 [1 0] + v2 = ishl_imm v1, 1 + + v3 = vconst.i64x2 [2 0] + v4 = icmp eq v2, v3 + v5 = vall_true v4 + return v5 +} +; run