From 98056aa05df22a9d518e3ec8741c509580179c70 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Tue, 3 Sep 2019 05:33:18 +0200 Subject: [PATCH] Don't incorrectly omit a REX prefix for some encodings of `copy_to_ssa`. Mozilla bug #1576969. Also, as a ridealong fix, removes R32 encodings for x86_64 in `enc_r32_r64`, since the type `rXX` by definition only exists for targets with word size `XX` bits. --- .../codegen/meta/src/isa/x86/encodings.rs | 56 ++++++++++++------- .../filetests/isa/x86/binary64-float.clif | 20 +++++++ .../filetests/filetests/isa/x86/binary64.clif | 20 +++++++ 3 files changed, 75 insertions(+), 21 deletions(-) diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index a5df9298e8..ac8e5e725f 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -150,6 +150,16 @@ impl PerCpuModeEncodings { self.enc64(inst.bind(I64), template.rex().w()); } + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with a REX prefix. + /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. + fn enc_i32_i64_rex_only(&mut self, inst: impl Into, template: Template) { + let inst: InstSpec = inst.into(); + self.enc32(inst.bind(I32), template.nonrex()); + self.enc64(inst.bind(I32), template.rex()); + self.enc64(inst.bind(I64), template.rex().w()); + } + /// Add encodings for `inst.i32` to X86_32. /// Add encodings for `inst.i32` to X86_64 with and without REX. /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. @@ -177,16 +187,10 @@ impl PerCpuModeEncodings { } /// Add encodings for `inst.r32` to X86_32. - /// Add encodings for `inst.r32` to X86_64 with and without REX. /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. - fn enc_r32_r64(&mut self, inst: impl Into, template: Template) { + fn enc_r32_r64_rex_only(&mut self, inst: impl Into, template: Template) { let inst: InstSpec = inst.into(); self.enc32(inst.bind_ref(R32), template.nonrex()); - - // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise - // reg-alloc would never use r8 and up. - self.enc64(inst.bind_ref(R32), template.rex()); - self.enc64(inst.bind_ref(R32), template.nonrex()); self.enc64(inst.bind_ref(R64), template.rex().w()); } @@ -247,6 +251,14 @@ impl PerCpuModeEncodings { self.enc_x86_64_instp(inst, template, instp); } + /// Add two encodings for `inst`: + /// - X86_32 + /// - X86_64 with the REX prefix. + fn enc_both_rex_only(&mut self, inst: impl Clone + Into, template: Template) { + self.enc32(inst.clone(), template.clone()); + self.enc64(inst, template.rex()); + } + /// Add encodings for `inst.i32` to X86_32. /// Add encodings for `inst.i32` to X86_64 with and without REX. /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit` @@ -622,7 +634,7 @@ pub fn define( e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(vec![0xf7]).rrr(4)); e.enc_i32_i64(copy, rec_umr.opcodes(vec![0x89])); - e.enc_r32_r64(copy, rec_umr.opcodes(vec![0x89])); + e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(vec![0x89])); e.enc_both(copy.bind(B1), rec_umr.opcodes(vec![0x89])); e.enc_both(copy.bind(I8), rec_umr.opcodes(vec![0x89])); e.enc_both(copy.bind(I16), rec_umr.opcodes(vec![0x89])); @@ -901,8 +913,8 @@ pub fn define( e.enc_i32_i64(spill, rec_spillSib32.opcodes(vec![0x89])); e.enc_i32_i64(regspill, rec_regspill32.opcodes(vec![0x89])); - e.enc_r32_r64(spill, rec_spillSib32.opcodes(vec![0x89])); - e.enc_r32_r64(regspill, rec_regspill32.opcodes(vec![0x89])); + e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(vec![0x89])); + e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(vec![0x89])); // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid // constraining the permitted registers. @@ -927,8 +939,8 @@ pub fn define( e.enc_i32_i64(fill, rec_fillSib32.opcodes(vec![0x8b])); e.enc_i32_i64(regfill, rec_regfill32.opcodes(vec![0x8b])); - e.enc_r32_r64(fill, rec_fillSib32.opcodes(vec![0x8b])); - e.enc_r32_r64(regfill, rec_regfill32.opcodes(vec![0x8b])); + e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(vec![0x8b])); + e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(vec![0x8b])); // No-op fills, created by late-stage redundant-fill removal. for &ty in &[I64, I32, I16, I8] { @@ -964,20 +976,22 @@ pub fn define( e.enc64(copy_special, rec_copysp.opcodes(vec![0x89]).rex().w()); e.enc32(copy_special, rec_copysp.opcodes(vec![0x89])); - // Copy to SSA - e.enc_i32_i64(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89])); - e.enc_r32_r64(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89])); - e.enc_both(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(vec![0x89])); - e.enc_both(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(vec![0x89])); - e.enc_both( + // Copy to SSA. These have to be done with special _rex_only encoders, because the standard + // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account + // the source register, which is specified directly in the instruction. + e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89])); + e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89])); + e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(vec![0x89])); + e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(vec![0x89])); + e.enc_both_rex_only( copy_to_ssa.bind(I16), rec_umr_reg_to_ssa.opcodes(vec![0x89]), ); - e.enc_both( + e.enc_both_rex_only( copy_to_ssa.bind(F64), rec_furm_reg_to_ssa.opcodes(vec![0xf2, 0x0f, 0x10]), ); - e.enc_both( + e.enc_both_rex_only( copy_to_ssa.bind(F32), rec_furm_reg_to_ssa.opcodes(vec![0xf3, 0x0f, 0x10]), ); @@ -1808,7 +1822,7 @@ pub fn define( e.enc64(null.bind_ref(R64), rec_pu_id_ref.opcodes(vec![0xb8])); // is_null, implemented by testing whether the value is 0. - e.enc_r32_r64(is_null, rec_is_zero.opcodes(vec![0x85])); + e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(vec![0x85])); // safepoint instruction calls sink, no actual encoding. e.enc32_rec(safepoint, rec_safepoint, 0); diff --git a/cranelift/filetests/filetests/isa/x86/binary64-float.clif b/cranelift/filetests/filetests/isa/x86/binary64-float.clif index 59e024a042..831079070d 100644 --- a/cranelift/filetests/filetests/isa/x86/binary64-float.clif +++ b/cranelift/filetests/filetests/isa/x86/binary64-float.clif @@ -96,6 +96,26 @@ ebb0: ; asm: movaps %xmm5, %xmm10 [-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5 + ; Copy to SSA + + ; asm: movsd %xmm0, %xmm15 + [-,%xmm15] v400 = copy_to_ssa.f64 %xmm0 ; bin: f2 44 0f 10 f8 + ; asm: movsd %xmm15, %xmm0 + [-,%xmm0] v401 = copy_to_ssa.f64 %xmm15 ; bin: f2 41 0f 10 c7 + ; asm: movsd %xmm7, %xmm6. Unfortunately we get a redundant REX prefix. + [-,%xmm6] v402 = copy_to_ssa.f64 %xmm7 ; bin: f2 40 0f 10 f7 + ; asm: movsd %xmm11, %xmm14 + [-,%xmm14] v403 = copy_to_ssa.f64 %xmm11 ; bin: f2 45 0f 10 f3 + + ; asm: movss %xmm0, %xmm15 + [-,%xmm15] v404 = copy_to_ssa.f32 %xmm0 ; bin: f3 44 0f 10 f8 + ; asm: movss %xmm15, %xmm0 + [-,%xmm0] v405 = copy_to_ssa.f32 %xmm15 ; bin: f3 41 0f 10 c7 + ; asm: movss %xmm7, %xmm6. Unfortunately we get a redundant REX prefix. + [-,%xmm6] v406 = copy_to_ssa.f32 %xmm7 ; bin: f3 40 0f 10 f7 + ; asm: movss %xmm11, %xmm14 + [-,%xmm14] v407 = copy_to_ssa.f32 %xmm11 ; bin: f3 45 0f 10 f3 + ; Convert float to int. ; asm: cvttss2si %xmm5, %ecx diff --git a/cranelift/filetests/filetests/isa/x86/binary64.clif b/cranelift/filetests/filetests/isa/x86/binary64.clif index 7742a24ee3..d1b862c85a 100644 --- a/cranelift/filetests/filetests/isa/x86/binary64.clif +++ b/cranelift/filetests/filetests/isa/x86/binary64.clif @@ -179,6 +179,26 @@ ebb0: ; asm: movq %r10, %rsp copy_special %r10 -> %rsp ; bin: 4c 89 d4 + ; Copy to SSA + + ; asm: movq %rax, %r15 + [-,%r15] v700 = copy_to_ssa.i64 %rax ; bin: 49 89 c7 + ; asm: movq %r15, %rax + [-,%rax] v701 = copy_to_ssa.i64 %r15 ; bin: 4c 89 f8 + ; asm: movq %rdi, %rsi + [-,%rsi] v702 = copy_to_ssa.i64 %rdi ; bin: 48 89 fe + ; asm: movq %r11, %r14 + [-,%r14] v703 = copy_to_ssa.i64 %r11 ; bin: 4d 89 de + + ; asm: movl %eax, %r15d + [-,%r15] v704 = copy_to_ssa.i32 %rax ; bin: 41 89 c7 + ; asm: movl %r15d, %eax + [-,%rax] v705 = copy_to_ssa.i32 %r15 ; bin: 44 89 f8 + ; asm: movl %edi, %esi. Unfortunately we get a redundant REX prefix. + [-,%rsi] v706 = copy_to_ssa.i32 %rdi ; bin: 40 89 fe + ; asm: movl %r11, %r14 + [-,%r14] v707 = copy_to_ssa.i32 %r11 ; bin: 45 89 de + ; Load/Store instructions. ; Register indirect addressing with no displacement.