From 03368895feebdfb219b61ae51908992dbf6cb045 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Mon, 27 May 2019 11:55:23 +0200 Subject: [PATCH] Cranelift: Redundant stack-slot-to-stack-slot copy removal. PR#773. This is also https://bugzilla.mozilla.org/show_bug.cgi?id=1552737. Cranelift currently has a tendency to create redundant copies (self-copies) of values from a stack slot back to the same stack slot. This generates a pointless load and store and an unnecessary register use. The copies are created by `visit_inst` in regalloc/reload.rs. They appear to occur mostly, but not exclusively, at loop heads. It's unclear why this happens. This patch adds a special case to `visit_inst` to find such copies. They are converted into a new instruction, `copy_nop`, which takes and produces the same SSA names, so as not to break any of the SSA invariants, but which has a zero-length encoding, hence removing the copy at emission time. `copy_nop`s source and destination operands must be stack slots and of course the *same* stack slot. The verifier has been enhanced to check this, since misuse of `copy_nop` will likely lead to hard-to-find incorrect-code bugs. Attempts were made to write a standalone .clif test case. But these failed because it appears the .clif parser accepts but ignores location hints that are stack slots. So it's impossible to write, in clif, the exact form of `copy` instruction that triggers the transformation. --- cranelift/codegen/meta-python/base/formats.py | 2 + .../codegen/meta-python/base/instructions.py | 10 +++++ .../codegen/meta-python/isa/x86/encodings.py | 7 +++ .../codegen/meta-python/isa/x86/recipes.py | 3 ++ .../codegen/meta/src/shared/instructions.rs | 15 +++++++ cranelift/codegen/src/regalloc/reload.rs | 38 +++++++++++++++- cranelift/codegen/src/verifier/mod.rs | 44 ++++++++++++++++++- 7 files changed, 116 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/meta-python/base/formats.py b/cranelift/codegen/meta-python/base/formats.py index b63863eed0..b45ac26f74 100644 --- a/cranelift/codegen/meta-python/base/formats.py +++ b/cranelift/codegen/meta-python/base/formats.py @@ -75,6 +75,8 @@ TableAddr = InstructionFormat(table, VALUE, offset32) RegMove = InstructionFormat(VALUE, ('src', regunit), ('dst', regunit)) CopySpecial = InstructionFormat(('src', regunit), ('dst', regunit)) +CopyNop = InstructionFormat( + ('src', entities.stack_slot), ('dst', entities.stack_slot)) RegSpill = InstructionFormat( VALUE, ('src', regunit), ('dst', entities.stack_slot)) RegFill = InstructionFormat( diff --git a/cranelift/codegen/meta-python/base/instructions.py b/cranelift/codegen/meta-python/base/instructions.py index ebb7257f65..48adc399ce 100644 --- a/cranelift/codegen/meta-python/base/instructions.py +++ b/cranelift/codegen/meta-python/base/instructions.py @@ -764,6 +764,16 @@ copy_special = Instruction( ins=(src, dst), other_side_effects=True) +copy_nop = Instruction( + 'copy_nop', r""" + Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn + into a no-op. This instruction is for use only within Cranelift + itself. + + This instruction copies its input, preserving the value type. + """, + ins=x, outs=a) + delta = Operand('delta', Int) adjust_sp_down = Instruction( 'adjust_sp_down', r""" diff --git a/cranelift/codegen/meta-python/isa/x86/encodings.py b/cranelift/codegen/meta-python/isa/x86/encodings.py index 8b5f9cf0bc..cf25d5b2dd 100644 --- a/cranelift/codegen/meta-python/isa/x86/encodings.py +++ b/cranelift/codegen/meta-python/isa/x86/encodings.py @@ -341,6 +341,13 @@ enc_x86_64(x86.pop.i64, r.popq, 0x58) X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1)) X86_32.enc(base.copy_special, *r.copysp(0x89)) +# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn +# into a no-op. +X86_64.enc(base.copy_nop.i64, r.stacknull, 0) +X86_64.enc(base.copy_nop.i32, r.stacknull, 0) +X86_64.enc(base.copy_nop.f64, r.stacknull, 0) +X86_64.enc(base.copy_nop.f32, r.stacknull, 0) + # Adjust SP down by a dynamic value (or up, with a negative operand). X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29)) X86_64.enc(base.adjust_sp_down.i64, *r.adjustsp.rex(0x29, w=1)) diff --git a/cranelift/codegen/meta-python/isa/x86/recipes.py b/cranelift/codegen/meta-python/isa/x86/recipes.py index 76544b0e28..c596fcd108 100644 --- a/cranelift/codegen/meta-python/isa/x86/recipes.py +++ b/cranelift/codegen/meta-python/isa/x86/recipes.py @@ -295,6 +295,9 @@ def valid_scale(iform): # copies and no-op conversions. null = EncRecipe('null', Unary, base_size=0, ins=GPR, outs=0, emit='') +stacknull = EncRecipe('stacknull', Unary, base_size=0, ins=StackGPR32, + outs=StackGPR32, emit='') + debugtrap = EncRecipe('debugtrap', NullAry, base_size=1, ins=(), outs=(), emit=''' sink.put1(0xcc); diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index b0c89b510e..891dadd00a 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -1254,6 +1254,21 @@ pub fn define( .finish(format_registry), ); + ig.push( + Inst::new( + "copy_nop", + r#" + Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn + into a no-op. This instruction is for use only within Cranelift itself. + + This instruction copies its input, preserving the value type. + "#, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .finish(format_registry), + ); + let delta = &operand("delta", Int); ig.push( diff --git a/cranelift/codegen/src/regalloc/reload.rs b/cranelift/codegen/src/regalloc/reload.rs index efbe2204bd..cc6aae59b8 100644 --- a/cranelift/codegen/src/regalloc/reload.rs +++ b/cranelift/codegen/src/regalloc/reload.rs @@ -13,7 +13,7 @@ use crate::cursor::{Cursor, EncCursor}; use crate::dominator_tree::DominatorTree; use crate::entity::{SparseMap, SparseMapValue}; use crate::ir::{AbiParam, ArgumentLoc, InstBuilder}; -use crate::ir::{Ebb, Function, Inst, InstructionData, Opcode, Value}; +use crate::ir::{Ebb, Function, Inst, InstructionData, Opcode, Value, ValueLoc}; use crate::isa::RegClass; use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa}; use crate::regalloc::affinity::Affinity; @@ -211,6 +211,42 @@ impl<'a> Context<'a> { debug_assert!(self.candidates.is_empty()); self.find_candidates(inst, constraints); + // If we find a copy from a stack slot to the same stack slot, replace + // it with a `copy_nop` but otherwise ignore it. In particular, don't + // generate a reload immediately followed by a spill. The `copy_nop` + // has a zero-length encoding, so will disappear at emission time. + if let InstructionData::Unary { + opcode: Opcode::Copy, + arg, + } = self.cur.func.dfg[inst] + { + let dst_vals = self.cur.func.dfg.inst_results(inst); + if dst_vals.len() == 1 { + let can_transform = match ( + self.cur.func.locations[arg], + self.cur.func.locations[dst_vals[0]], + ) { + (ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => src_slot == dst_slot, + _ => false, + }; + if can_transform { + // Convert the instruction into a `copy_nop`. + self.cur.func.dfg.replace(inst).copy_nop(arg); + let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); + debug_assert!(ok); + + // And move on to the next insn. + self.reloads.clear(); + let _ = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); + self.cur.next_inst(); + self.candidates.clear(); + return; + } + } + } + + // Deal with all instructions not special-cased by the immediately + // preceding fragment. if let InstructionData::Unary { opcode: Opcode::Copy, .. diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index fd72315735..38a52f0934 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -65,8 +65,8 @@ use crate::ir; use crate::ir::entities::AnyEntity; use crate::ir::instructions::{BranchInfo, CallInfo, InstructionFormat, ResolvedConstraint}; use crate::ir::{ - types, ArgumentLoc, Ebb, FuncRef, Function, GlobalValue, Inst, JumpTable, Opcode, SigRef, - StackSlot, StackSlotKind, Type, Value, ValueDef, ValueList, ValueLoc, + types, ArgumentLoc, Ebb, FuncRef, Function, GlobalValue, Inst, InstructionData, JumpTable, + Opcode, SigRef, StackSlot, StackSlotKind, Type, Value, ValueDef, ValueList, ValueLoc, }; use crate::isa::TargetIsa; use crate::iterators::IteratorExtras; @@ -1079,6 +1079,9 @@ impl<'a> Verifier<'a> { self.typecheck_return(inst, errors).is_ok(); self.typecheck_special(inst, ctrl_type, errors).is_ok(); + // Misuses of copy_nop instructions are fatal + self.typecheck_copy_nop(inst, errors)?; + Ok(()) } @@ -1469,6 +1472,43 @@ impl<'a> Verifier<'a> { Ok(()) } + fn typecheck_copy_nop( + &self, + inst: Inst, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if let InstructionData::Unary { + opcode: Opcode::CopyNop, + arg, + } = self.func.dfg[inst] + { + let dst_vals = self.func.dfg.inst_results(inst); + if dst_vals.len() != 1 { + return fatal!(errors, inst, "copy_nop must produce exactly one result"); + } + let dst_val = dst_vals[0]; + if self.func.dfg.value_type(dst_val) != self.func.dfg.value_type(arg) { + return fatal!(errors, inst, "copy_nop src and dst types must be the same"); + } + let src_loc = self.func.locations[arg]; + let dst_loc = self.func.locations[dst_val]; + let locs_ok = match (src_loc, dst_loc) { + (ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => src_slot == dst_slot, + _ => false, + }; + if !locs_ok { + return fatal!( + errors, + inst, + "copy_nop must refer to identical stack slots, but found {:?} vs {:?}", + src_loc, + dst_loc + ); + } + } + Ok(()) + } + fn cfg_integrity( &self, cfg: &ControlFlowGraph,