Fix scalar_to_vector: move not wide enough for 64-bit values (#1287)

Previously, the use of `enc_x86_64` emitted two 64-bit mode encodings for `scalar_to_vector.i64`, neither of which contained the REX.W bit telling `MOVD/MOVQ` to move 64 bits of data instead of 32 bits. Now, `scalar_to_vector.i64` will always use a sole 64-bit mode REX.W encoding and `scalar_to_vector` with other widths will have three encodings: a 32-bit mode move, a 64-bit mode move with no REX, and a 64-bit mode move with REX (but not REX.W).
2019-12-16 10:17:08 -08:00
parent fcb0593796
commit 0604ec480c
3 changed files with 20 additions and 3 deletions
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -1746,10 +1746,13 @@ pub(crate) fn define(
        } else {
            let template = rec_frurm.opcodes(&MOVD_LOAD_XMM);
            if ty.lane_bits() < 64 {
-                // no 32-bit encodings for 64-bit widths
                e.enc32(instruction.clone(), template.clone());
+                e.enc_x86_64(instruction, template);
+            } else {
+                // No 32-bit encodings for 64-bit widths.
+                assert_eq!(ty.lane_bits(), 64);
+                e.enc64(instruction, template.rex().w());
            }
-            e.enc_x86_64(instruction, template);
        }
    }

--- a/cranelift/filetests/filetests/isa/x86/scalar_to_vector-binemit.clif
+++ b/cranelift/filetests/filetests/isa/x86/scalar_to_vector-binemit.clif
@@ -27,6 +27,6 @@ ebb0:
 function %test_scalar_to_vector_i64() {
 ebb0:
 [-, %rdx]   v0 = iconst.i64 42
-[-, %xmm7]  v1 = scalar_to_vector.i64x2 v0    ; bin: 66 0f 6e fa
+[-, %xmm7]  v1 = scalar_to_vector.i64x2 v0    ; bin: 66 48 0f 6e fa
            return
 }
--- a/cranelift/filetests/filetests/isa/x86/simd-construction-run.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-construction-run.clif
@@ -0,0 +1,14 @@
+test run
+set enable_simd
+target x86_64 skylake
+
+function %splat_i64x2() -> b1 {
+ebb0:
+    v0 = iconst.i64 -1
+    v1 = splat.i64x2 v0
+    v2 = vconst.i64x2 [-1 -1]
+    v3 = icmp eq v1, v2
+    v8 = vall_true v3
+    return v8
+}
+; run