From b1488decc4be6ca31c1698261707bb51a92d0a6d Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Wed, 29 May 2019 17:12:38 +0200 Subject: [PATCH] Only create copy_nop instructions for types for which an encoding exists. Issue #779. PR #773 detects, at reload time, `copy` instructions that copy a value from stack slot back to the same stack slot. It replaces them with `copy_nop` instructions that have a null encoding (hence producing no code). For x86_64, `copy_nop` encodings for the types I64, I32, F64 and F32 are provided. Unfortunately the code that detects the redundant copy doesn't check the type of the copied value, hence leaving itself open to the danger of creating a `copy_nop` instruction cannot be encoded (which is different from saying it has a null encoding). This patch: * Expands the x86_64 set of `copy_nop` encodings to: I64 I32 I16 I8 F64 and F32 * Adds encodings for the same for x86_32, rv64 and rv32. * In `visit_inst()` in `reload.rs`, checks the type of the copied value accordingly. * Adds comments explaining the above. --- .../meta-python/isa/riscv/encodings.py | 9 +++++++- .../codegen/meta-python/isa/riscv/recipes.py | 5 ++++ .../codegen/meta-python/isa/x86/encodings.py | 22 ++++++++++++++---- cranelift/codegen/src/regalloc/reload.rs | 17 +++++++++++--- .../filetests/regalloc/reload-779.clif | 23 +++++++++++++++++++ 5 files changed, 67 insertions(+), 9 deletions(-) create mode 100644 cranelift/filetests/filetests/regalloc/reload-779.clif diff --git a/cranelift/codegen/meta-python/isa/riscv/encodings.py b/cranelift/codegen/meta-python/isa/riscv/encodings.py index 801f5c6932..980a88ba00 100644 --- a/cranelift/codegen/meta-python/isa/riscv/encodings.py +++ b/cranelift/codegen/meta-python/isa/riscv/encodings.py @@ -3,12 +3,13 @@ RISC-V Encodings. """ from __future__ import absolute_import from base import instructions as base +from base import types from base.immediates import intcc from .defs import RV32, RV64 from .recipes import OPIMM, OPIMM32, OP, OP32, LUI, BRANCH, JALR, JAL from .recipes import LOAD, STORE from .recipes import R, Rshamt, Ricmp, Ii, Iz, Iicmp, Iret, Icall, Icopy -from .recipes import U, UJ, UJcall, SB, SBzero, GPsp, GPfi, Irmov +from .recipes import U, UJ, UJcall, SB, SBzero, GPsp, GPfi, Irmov, stacknull from .settings import use_m from cdsl.ast import Var from base.legalize import narrow, expand @@ -160,3 +161,9 @@ RV32.enc(base.copy.b1, Icopy, OPIMM(0b000)) RV64.enc(base.copy.b1, Icopy, OPIMM(0b000)) RV32.enc(base.regmove.b1, Irmov, OPIMM(0b000)) RV64.enc(base.regmove.b1, Irmov, OPIMM(0b000)) + +# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn +# into a no-op. +for ty in [types.i64, types.i32, types.i16, types.i8, types.f64, types.f32]: + RV64.enc(base.copy_nop.bind(ty), stacknull, 0) + RV32.enc(base.copy_nop.bind(ty), stacknull, 0) diff --git a/cranelift/codegen/meta-python/isa/riscv/recipes.py b/cranelift/codegen/meta-python/isa/riscv/recipes.py index 9808892b3c..ff27c31f8f 100644 --- a/cranelift/codegen/meta-python/isa/riscv/recipes.py +++ b/cranelift/codegen/meta-python/isa/riscv/recipes.py @@ -223,3 +223,8 @@ GPfi = EncRecipe( 'GPfi', Unary, base_size=4, ins=Stack(GPR), outs=GPR, emit='unimplemented!();') + +# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn +# into a no-op. +stacknull = EncRecipe('stacknull', Unary, base_size=0, + ins=Stack(GPR), outs=Stack(GPR), emit='') diff --git a/cranelift/codegen/meta-python/isa/x86/encodings.py b/cranelift/codegen/meta-python/isa/x86/encodings.py index 449e8b8e1b..8da4912e77 100644 --- a/cranelift/codegen/meta-python/isa/x86/encodings.py +++ b/cranelift/codegen/meta-python/isa/x86/encodings.py @@ -342,11 +342,23 @@ X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1)) X86_32.enc(base.copy_special, *r.copysp(0x89)) # Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn -# into a no-op. -X86_64.enc(base.copy_nop.i64, r.stacknull, 0) -X86_64.enc(base.copy_nop.i32, r.stacknull, 0) -X86_64.enc(base.copy_nop.f64, r.stacknull, 0) -X86_64.enc(base.copy_nop.f32, r.stacknull, 0) +# into a no-op. Ideally we could to make this encoding available for +# all types, and write `base.copy_nop.any`, but it appears that the +# controlling type variable must not polymorphic. So we make do with +# the following limited set, and guard the generating transformation in +# regalloc/reload.rs accordingly. +# +# The same encoding is generated for both the 64- and 32-bit architectures. +# Note that we can't use `enc_both` here, because that attempts to create a +# variant with a REX prefix in the 64-bit-architecture case. But since +# there's no actual instruction for the REX prefix to modify the meaning of, +# it will modify the meaning of whatever instruction happens to follow this +# one, which is obviously wrong. Note also that we can and indeed *must* +# claim that there's a 64-bit encoding for the 32-bit arch case, even though +# no such single instruction actually exists for the 32-bit arch case. +for ty in [types.i64, types.i32, types.i16, types.i8, types.f64, types.f32]: + X86_64.enc(base.copy_nop.bind(ty), r.stacknull, 0) + X86_32.enc(base.copy_nop.bind(ty), r.stacknull, 0) # Adjust SP down by a dynamic value (or up, with a negative operand). X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29)) diff --git a/cranelift/codegen/src/regalloc/reload.rs b/cranelift/codegen/src/regalloc/reload.rs index cc6aae59b8..11c1a5513d 100644 --- a/cranelift/codegen/src/regalloc/reload.rs +++ b/cranelift/codegen/src/regalloc/reload.rs @@ -222,18 +222,29 @@ impl<'a> Context<'a> { { let dst_vals = self.cur.func.dfg.inst_results(inst); if dst_vals.len() == 1 { + let dst_val = dst_vals[0]; let can_transform = match ( self.cur.func.locations[arg], - self.cur.func.locations[dst_vals[0]], + self.cur.func.locations[dst_val], ) { - (ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => src_slot == dst_slot, + (ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => { + src_slot == dst_slot && { + let src_ty = self.cur.func.dfg.value_type(arg); + let dst_ty = self.cur.func.dfg.value_type(dst_val); + debug_assert!(src_ty == dst_ty); + // This limits the transformation to copies of the + // types: I64 I32 I16 I8 F64 and F32, since that's + // the set of `copy_nop` encodings available. + src_ty.is_int() || src_ty.is_float() + } + } _ => false, }; if can_transform { // Convert the instruction into a `copy_nop`. self.cur.func.dfg.replace(inst).copy_nop(arg); let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); - debug_assert!(ok); + debug_assert!(ok, "copy_nop encoding missing for this type"); // And move on to the next insn. self.reloads.clear(); diff --git a/cranelift/filetests/filetests/regalloc/reload-779.clif b/cranelift/filetests/filetests/regalloc/reload-779.clif new file mode 100644 index 0000000000..c02464a11f --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/reload-779.clif @@ -0,0 +1,23 @@ +test compile +target x86_64 + +; Filed as https://github.com/CraneStation/cranelift/issues/779 +; +; The copy_nop optimisation to reload (see Issue 773) was creating +; copy_nop instructions for types for which there were no encoding. + +function u0:0(i64, i64, i64) system_v { + sig0 = () system_v + sig1 = (i16) system_v + fn1 = u0:94 sig0 + fn2 = u0:95 sig1 + +ebb0(v0: i64, v1: i64, v2: i64): + v3 = iconst.i16 0 + jump ebb1(v3) + +ebb1(v4: i16): + call fn1() + call fn2(v4) + jump ebb1(v4) +}