Don't incorrectly omit a REX prefix for some encodings of copy_to_ssa. Mozilla bug #1576969.

Also, as a ridealong fix, removes R32 encodings for x86_64 in `enc_r32_r64`, since the type `rXX` by definition only exists for targets with word size `XX` bits.
2019-09-03 05:33:18 +02:00
parent d09fc93fef
commit 98056aa05d
3 changed files with 75 additions and 21 deletions
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -150,6 +150,16 @@ impl PerCpuModeEncodings {
        self.enc64(inst.bind(I64), template.rex().w());
    }
    /// Add encodings for `inst.i32` to X86_32.
    /// Add encodings for `inst.i32` to X86_64 with a REX prefix.
    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
    fn enc_i32_i64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
        let inst: InstSpec = inst.into();
        self.enc32(inst.bind(I32), template.nonrex());
        self.enc64(inst.bind(I32), template.rex());
        self.enc64(inst.bind(I64), template.rex().w());
    }
    /// Add encodings for `inst.i32` to X86_32.
    /// Add encodings for `inst.i32` to X86_64 with and without REX.
    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
@@ -177,16 +187,10 @@ impl PerCpuModeEncodings {
    }
    /// Add encodings for `inst.r32` to X86_32.
    /// Add encodings for `inst.r32` to X86_64 with and without REX.
    /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
-    fn enc_r32_r64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+    fn enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
        let inst: InstSpec = inst.into();
        self.enc32(inst.bind_ref(R32), template.nonrex());
        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
        // reg-alloc would never use r8 and up.
        self.enc64(inst.bind_ref(R32), template.rex());
        self.enc64(inst.bind_ref(R32), template.nonrex());
        self.enc64(inst.bind_ref(R64), template.rex().w());
    }
@@ -247,6 +251,14 @@ impl PerCpuModeEncodings {
        self.enc_x86_64_instp(inst, template, instp);
    }
    /// Add two encodings for `inst`:
    /// - X86_32
    /// - X86_64 with the REX prefix.
    fn enc_both_rex_only(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
        self.enc32(inst.clone(), template.clone());
        self.enc64(inst, template.rex());
    }
    /// Add encodings for `inst.i32` to X86_32.
    /// Add encodings for `inst.i32` to X86_64 with and without REX.
    /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
@@ -622,7 +634,7 @@ pub fn define(
    e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(vec![0xf7]).rrr(4));
    e.enc_i32_i64(copy, rec_umr.opcodes(vec![0x89]));
-    e.enc_r32_r64(copy, rec_umr.opcodes(vec![0x89]));
+    e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(vec![0x89]));
    e.enc_both(copy.bind(B1), rec_umr.opcodes(vec![0x89]));
    e.enc_both(copy.bind(I8), rec_umr.opcodes(vec![0x89]));
    e.enc_both(copy.bind(I16), rec_umr.opcodes(vec![0x89]));
@@ -901,8 +913,8 @@ pub fn define(
    e.enc_i32_i64(spill, rec_spillSib32.opcodes(vec![0x89]));
    e.enc_i32_i64(regspill, rec_regspill32.opcodes(vec![0x89]));
-    e.enc_r32_r64(spill, rec_spillSib32.opcodes(vec![0x89]));
+    e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(vec![0x89]));
-    e.enc_r32_r64(regspill, rec_regspill32.opcodes(vec![0x89]));
+    e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(vec![0x89]));
    // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
    // constraining the permitted registers.
@@ -927,8 +939,8 @@ pub fn define(
    e.enc_i32_i64(fill, rec_fillSib32.opcodes(vec![0x8b]));
    e.enc_i32_i64(regfill, rec_regfill32.opcodes(vec![0x8b]));
-    e.enc_r32_r64(fill, rec_fillSib32.opcodes(vec![0x8b]));
+    e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(vec![0x8b]));
-    e.enc_r32_r64(regfill, rec_regfill32.opcodes(vec![0x8b]));
+    e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(vec![0x8b]));
    // No-op fills, created by late-stage redundant-fill removal.
    for &ty in &[I64, I32, I16, I8] {
@@ -964,20 +976,22 @@ pub fn define(
    e.enc64(copy_special, rec_copysp.opcodes(vec![0x89]).rex().w());
    e.enc32(copy_special, rec_copysp.opcodes(vec![0x89]));
-    // Copy to SSA
+    // Copy to SSA.  These have to be done with special _rex_only encoders, because the standard
-    e.enc_i32_i64(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89]));
+    // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account
-    e.enc_r32_r64(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89]));
+    // the source register, which is specified directly in the instruction.
-    e.enc_both(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(vec![0x89]));
+    e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89]));
-    e.enc_both(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(vec![0x89]));
+    e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89]));
-    e.enc_both(
+    e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(vec![0x89]));
    e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(vec![0x89]));
    e.enc_both_rex_only(
        copy_to_ssa.bind(I16),
        rec_umr_reg_to_ssa.opcodes(vec![0x89]),
    );
-    e.enc_both(
+    e.enc_both_rex_only(
        copy_to_ssa.bind(F64),
        rec_furm_reg_to_ssa.opcodes(vec![0xf2, 0x0f, 0x10]),
    );
-    e.enc_both(
+    e.enc_both_rex_only(
        copy_to_ssa.bind(F32),
        rec_furm_reg_to_ssa.opcodes(vec![0xf3, 0x0f, 0x10]),
    );
@@ -1808,7 +1822,7 @@ pub fn define(
    e.enc64(null.bind_ref(R64), rec_pu_id_ref.opcodes(vec![0xb8]));
    // is_null, implemented by testing whether the value is 0.
-    e.enc_r32_r64(is_null, rec_is_zero.opcodes(vec![0x85]));
+    e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(vec![0x85]));
    // safepoint instruction calls sink, no actual encoding.
    e.enc32_rec(safepoint, rec_safepoint, 0);
--- a/cranelift/filetests/filetests/isa/x86/binary64-float.clif
+++ b/cranelift/filetests/filetests/isa/x86/binary64-float.clif
@@ -96,6 +96,26 @@ ebb0:
    ; asm: movaps %xmm5, %xmm10
    [-,%xmm10]          v39 = copy v10                          ; bin: 44 0f 28 d5
    ; Copy to SSA
    ; asm: movsd %xmm0, %xmm15
    [-,%xmm15]          v400 = copy_to_ssa.f64 %xmm0            ; bin: f2 44 0f 10 f8
    ; asm: movsd %xmm15, %xmm0
    [-,%xmm0]           v401 = copy_to_ssa.f64 %xmm15           ; bin: f2 41 0f 10 c7
    ; asm: movsd %xmm7, %xmm6.  Unfortunately we get a redundant REX prefix.
    [-,%xmm6]           v402 = copy_to_ssa.f64 %xmm7            ; bin: f2 40 0f 10 f7
    ; asm: movsd %xmm11, %xmm14
    [-,%xmm14]          v403 = copy_to_ssa.f64 %xmm11           ; bin: f2 45 0f 10 f3
    ; asm: movss %xmm0, %xmm15
    [-,%xmm15]          v404 = copy_to_ssa.f32 %xmm0            ; bin: f3 44 0f 10 f8
    ; asm: movss %xmm15, %xmm0
    [-,%xmm0]           v405 = copy_to_ssa.f32 %xmm15           ; bin: f3 41 0f 10 c7
    ; asm: movss %xmm7, %xmm6.  Unfortunately we get a redundant REX prefix.
    [-,%xmm6]           v406 = copy_to_ssa.f32 %xmm7            ; bin: f3 40 0f 10 f7
    ; asm: movss %xmm11, %xmm14
    [-,%xmm14]          v407 = copy_to_ssa.f32 %xmm11           ; bin: f3 45 0f 10 f3
    ; Convert float to int.
    ; asm: cvttss2si %xmm5, %ecx
--- a/cranelift/filetests/filetests/isa/x86/binary64.clif
+++ b/cranelift/filetests/filetests/isa/x86/binary64.clif
@@ -179,6 +179,26 @@ ebb0:
    ; asm: movq %r10, %rsp
    copy_special %r10 -> %rsp                   ; bin: 4c 89 d4
    ; Copy to SSA
    ; asm: movq %rax, %r15
    [-,%r15]      v700 = copy_to_ssa.i64 %rax    ; bin: 49 89 c7
    ; asm: movq %r15, %rax
    [-,%rax]      v701 = copy_to_ssa.i64 %r15    ; bin: 4c 89 f8
    ; asm: movq %rdi, %rsi
    [-,%rsi]      v702 = copy_to_ssa.i64 %rdi    ; bin: 48 89 fe
    ; asm: movq %r11, %r14
    [-,%r14]      v703 = copy_to_ssa.i64 %r11    ; bin: 4d 89 de
    ; asm: movl %eax, %r15d
    [-,%r15]      v704 = copy_to_ssa.i32 %rax    ; bin: 41 89 c7
    ; asm: movl %r15d, %eax
    [-,%rax]      v705 = copy_to_ssa.i32 %r15    ; bin: 44 89 f8
    ; asm: movl %edi, %esi.  Unfortunately we get a redundant REX prefix.
    [-,%rsi]      v706 = copy_to_ssa.i32 %rdi    ; bin: 40 89 fe
    ; asm: movl %r11, %r14
    [-,%r14]      v707 = copy_to_ssa.i32 %r11    ; bin: 45 89 de
    ; Load/Store instructions.
    ; Register indirect addressing with no displacement.