Add x86 SIMD immediate shifts

2019-11-11 11:55:04 -08:00
parent 6519a43b08
commit 1f17e35e95
5 changed files with 135 additions and 0 deletions
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -553,6 +553,7 @@ pub(crate) fn define(
    let rec_copysp = r.template("copysp");
    let rec_div = r.template("div");
    let rec_debugtrap = r.recipe("debugtrap");
+    let rec_f_ib = r.template("f_ib");
    let rec_f32imm_z = r.template("f32imm_z");
    let rec_f64imm_z = r.template("f64imm_z");
    let rec_fa = r.template("fa");
@@ -2033,6 +2034,18 @@ pub(crate) fn define(
        e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes));
    }

+    // SIMD immediate shift
+    for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] {
+        let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size));
+        e.enc_32_64(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6));
+
+        let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
+        e.enc_32_64(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
+
+        let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
+        e.enc_32_64(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
+    }
+
    // SIMD integer comparisons
    {
        use IntCC::*;
--- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs
+++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs
@@ -431,6 +431,18 @@ pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00];
 /// store the result in xmm1 (SSE2).
 pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70];

+/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
+/// digit used in the ModR/M byte (SSE2).
+pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71];
+
+/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
+/// digit used in the ModR/M byte (SSE2).
+pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72];
+
+/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
+/// digit used in the ModR/M byte (SSE2).
+pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73];
+
 /// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
 pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1];

--- a/cranelift/codegen/meta/src/isa/x86/recipes.rs
+++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs
@@ -792,6 +792,26 @@ pub(crate) fn define<'shared>(
                ),
        );

+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("f_ib", &formats.binary_imm, 2)
+                .operands_in(vec![fpr])
+                .operands_out(vec![0])
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    &*formats.binary_imm,
+                    "imm",
+                    8,
+                    0,
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put1(imm as u8);
+                    "#,
+                ),
+        );
+
        // XX /n id with 32-bit immediate sign-extended.
        recipes.add_template_recipe(
            EncodingRecipeBuilder::new("r_id", &formats.binary_imm, 5)
--- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif
@@ -49,3 +49,57 @@ ebb0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]):
 [-, %xmm4]  v2 = x86_psra v0, v1      ; bin: 66 0f e2 e0
            return v2
 }
+
+function %ishl_imm_i16x8(i16x8) -> i16x8 {
+ebb0(v0: i16x8 [%xmm2]):
+[-, %xmm2]  v2 = ishl_imm v0, 3      ; bin: 66 0f 71 f2 03
+            return v2
+}
+
+function %ishl_imm_i32x4(i32x4) -> i32x4 {
+ebb0(v0: i32x4 [%xmm4]):
+[-, %xmm4]  v2 = ishl_imm v0, 10     ; bin: 66 0f 72 f4 0a
+            return v2
+}
+
+function %ishl_imm_i64x2(i64x2) -> i64x2 {
+ebb0(v0: i64x2 [%xmm6]):
+[-, %xmm6]  v2 = ishl_imm v0, 42     ; bin: 66 0f 73 f6 2a
+            return v2
+}
+
+function %ushr_imm_i16x8(i16x8) -> i16x8 {
+ebb0(v0: i16x8 [%xmm2]):
+[-, %xmm2]  v2 = ushr_imm v0, 3      ; bin: 66 0f 71 d2 03
+            return v2
+}
+
+function %ushr_imm_i32x4(i32x4) -> i32x4 {
+ebb0(v0: i32x4 [%xmm4]):
+[-, %xmm4]  v2 = ushr_imm v0, 10     ; bin: 66 0f 72 d4 0a
+            return v2
+}
+
+function %ushr_imm_i64x2(i64x2) -> i64x2 {
+ebb0(v0: i64x2 [%xmm6]):
+[-, %xmm6]  v2 = ushr_imm v0, 42     ; bin: 66 0f 73 d6 2a
+            return v2
+}
+
+function %sshr_imm_i16x8(i16x8) -> i16x8 {
+ebb0(v0: i16x8 [%xmm2]):
+[-, %xmm2]  v2 = sshr_imm v0, 3      ; bin: 66 0f 71 e2 03
+            return v2
+}
+
+function %sshr_imm_i32x4(i32x4) -> i32x4 {
+ebb0(v0: i32x4 [%xmm4]):
+[-, %xmm4]  v2 = sshr_imm v0, 10     ; bin: 66 0f 72 e4 0a
+            return v2
+}
+
+function %sshr_imm_i64x2(i64x2) -> i64x2 {
+ebb0(v0: i64x2 [%xmm6]):
+[-, %xmm6]  v2 = sshr_imm v0, 42     ; bin: 66 0f 73 e6 2a
+            return v2
+}
--- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif
@@ -127,3 +127,39 @@ ebb0:
    return v11
 }
 ; run
+
+function %sshr_imm_i32x4() -> b1 {
+ebb0:
+    v1 = vconst.i32x4 [1 2 4 -8]
+    v2 = sshr_imm v1, 1
+
+    v3 = vconst.i32x4 [0 1 2 -4]
+    v4 = icmp eq v2, v3
+    v5 = vall_true v4
+    return v5
+}
+; run
+
+function %sshr_imm_i16x8() -> b1 {
+ebb0:
+    v1 = vconst.i16x8 [1 2 4 -8 0 0 0 0]
+    v2 = ushr_imm v1, 1
+
+    v3 = vconst.i16x8 [0 1 2 32764 0 0 0 0] ; -4 with MSB unset == 32764
+    v4 = icmp eq v2, v3
+    v5 = vall_true v4
+    return v5
+}
+; run
+
+function %ishl_imm_i64x2() -> b1 {
+ebb0:
+    v1 = vconst.i64x2 [1 0]
+    v2 = ishl_imm v1, 1
+
+    v3 = vconst.i64x2 [2 0]
+    v4 = icmp eq v2, v3
+    v5 = vall_true v4
+    return v5
+}
+; run