Cranelift: Redundant stack-slot-to-stack-slot copy removal. PR#773.

This is also https://bugzilla.mozilla.org/show_bug.cgi?id=1552737.

Cranelift currently has a tendency to create redundant copies (self-copies) of
values from a stack slot back to the same stack slot.  This generates a
pointless load and store and an unnecessary register use.  The copies are
created by `visit_inst` in regalloc/reload.rs.  They appear to occur mostly,
but not exclusively, at loop heads.  It's unclear why this happens.

This patch adds a special case to `visit_inst` to find such copies.  They are
converted into a new instruction, `copy_nop`, which takes and produces the
same SSA names, so as not to break any of the SSA invariants, but which has a
zero-length encoding, hence removing the copy at emission time.

`copy_nop`s source and destination operands must be stack slots and of course
the *same* stack slot.  The verifier has been enhanced to check this, since
misuse of `copy_nop` will likely lead to hard-to-find incorrect-code bugs.

Attempts were made to write a standalone .clif test case.  But these failed
because it appears the .clif parser accepts but ignores location hints that
are stack slots.  So it's impossible to write, in clif, the exact form of
`copy` instruction that triggers the transformation.
This commit is contained in:
Julian Seward
2019-05-27 11:55:23 +02:00
committed by Benjamin Bouvier
parent 6935033c9e
commit 03368895fe
7 changed files with 116 additions and 3 deletions

View File

@@ -75,6 +75,8 @@ TableAddr = InstructionFormat(table, VALUE, offset32)
RegMove = InstructionFormat(VALUE, ('src', regunit), ('dst', regunit)) RegMove = InstructionFormat(VALUE, ('src', regunit), ('dst', regunit))
CopySpecial = InstructionFormat(('src', regunit), ('dst', regunit)) CopySpecial = InstructionFormat(('src', regunit), ('dst', regunit))
CopyNop = InstructionFormat(
('src', entities.stack_slot), ('dst', entities.stack_slot))
RegSpill = InstructionFormat( RegSpill = InstructionFormat(
VALUE, ('src', regunit), ('dst', entities.stack_slot)) VALUE, ('src', regunit), ('dst', entities.stack_slot))
RegFill = InstructionFormat( RegFill = InstructionFormat(

View File

@@ -764,6 +764,16 @@ copy_special = Instruction(
ins=(src, dst), ins=(src, dst),
other_side_effects=True) other_side_effects=True)
copy_nop = Instruction(
'copy_nop', r"""
Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
into a no-op. This instruction is for use only within Cranelift
itself.
This instruction copies its input, preserving the value type.
""",
ins=x, outs=a)
delta = Operand('delta', Int) delta = Operand('delta', Int)
adjust_sp_down = Instruction( adjust_sp_down = Instruction(
'adjust_sp_down', r""" 'adjust_sp_down', r"""

View File

@@ -341,6 +341,13 @@ enc_x86_64(x86.pop.i64, r.popq, 0x58)
X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1)) X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
X86_32.enc(base.copy_special, *r.copysp(0x89)) X86_32.enc(base.copy_special, *r.copysp(0x89))
# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
# into a no-op.
X86_64.enc(base.copy_nop.i64, r.stacknull, 0)
X86_64.enc(base.copy_nop.i32, r.stacknull, 0)
X86_64.enc(base.copy_nop.f64, r.stacknull, 0)
X86_64.enc(base.copy_nop.f32, r.stacknull, 0)
# Adjust SP down by a dynamic value (or up, with a negative operand). # Adjust SP down by a dynamic value (or up, with a negative operand).
X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29)) X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29))
X86_64.enc(base.adjust_sp_down.i64, *r.adjustsp.rex(0x29, w=1)) X86_64.enc(base.adjust_sp_down.i64, *r.adjustsp.rex(0x29, w=1))

View File

@@ -295,6 +295,9 @@ def valid_scale(iform):
# copies and no-op conversions. # copies and no-op conversions.
null = EncRecipe('null', Unary, base_size=0, ins=GPR, outs=0, emit='') null = EncRecipe('null', Unary, base_size=0, ins=GPR, outs=0, emit='')
stacknull = EncRecipe('stacknull', Unary, base_size=0, ins=StackGPR32,
outs=StackGPR32, emit='')
debugtrap = EncRecipe('debugtrap', NullAry, base_size=1, ins=(), outs=(), debugtrap = EncRecipe('debugtrap', NullAry, base_size=1, ins=(), outs=(),
emit=''' emit='''
sink.put1(0xcc); sink.put1(0xcc);

View File

@@ -1254,6 +1254,21 @@ pub fn define(
.finish(format_registry), .finish(format_registry),
); );
ig.push(
Inst::new(
"copy_nop",
r#"
Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
into a no-op. This instruction is for use only within Cranelift itself.
This instruction copies its input, preserving the value type.
"#,
)
.operands_in(vec![x])
.operands_out(vec![a])
.finish(format_registry),
);
let delta = &operand("delta", Int); let delta = &operand("delta", Int);
ig.push( ig.push(

View File

@@ -13,7 +13,7 @@ use crate::cursor::{Cursor, EncCursor};
use crate::dominator_tree::DominatorTree; use crate::dominator_tree::DominatorTree;
use crate::entity::{SparseMap, SparseMapValue}; use crate::entity::{SparseMap, SparseMapValue};
use crate::ir::{AbiParam, ArgumentLoc, InstBuilder}; use crate::ir::{AbiParam, ArgumentLoc, InstBuilder};
use crate::ir::{Ebb, Function, Inst, InstructionData, Opcode, Value}; use crate::ir::{Ebb, Function, Inst, InstructionData, Opcode, Value, ValueLoc};
use crate::isa::RegClass; use crate::isa::RegClass;
use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa}; use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa};
use crate::regalloc::affinity::Affinity; use crate::regalloc::affinity::Affinity;
@@ -211,6 +211,42 @@ impl<'a> Context<'a> {
debug_assert!(self.candidates.is_empty()); debug_assert!(self.candidates.is_empty());
self.find_candidates(inst, constraints); self.find_candidates(inst, constraints);
// If we find a copy from a stack slot to the same stack slot, replace
// it with a `copy_nop` but otherwise ignore it. In particular, don't
// generate a reload immediately followed by a spill. The `copy_nop`
// has a zero-length encoding, so will disappear at emission time.
if let InstructionData::Unary {
opcode: Opcode::Copy,
arg,
} = self.cur.func.dfg[inst]
{
let dst_vals = self.cur.func.dfg.inst_results(inst);
if dst_vals.len() == 1 {
let can_transform = match (
self.cur.func.locations[arg],
self.cur.func.locations[dst_vals[0]],
) {
(ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => src_slot == dst_slot,
_ => false,
};
if can_transform {
// Convert the instruction into a `copy_nop`.
self.cur.func.dfg.replace(inst).copy_nop(arg);
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
debug_assert!(ok);
// And move on to the next insn.
self.reloads.clear();
let _ = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
self.cur.next_inst();
self.candidates.clear();
return;
}
}
}
// Deal with all instructions not special-cased by the immediately
// preceding fragment.
if let InstructionData::Unary { if let InstructionData::Unary {
opcode: Opcode::Copy, opcode: Opcode::Copy,
.. ..

View File

@@ -65,8 +65,8 @@ use crate::ir;
use crate::ir::entities::AnyEntity; use crate::ir::entities::AnyEntity;
use crate::ir::instructions::{BranchInfo, CallInfo, InstructionFormat, ResolvedConstraint}; use crate::ir::instructions::{BranchInfo, CallInfo, InstructionFormat, ResolvedConstraint};
use crate::ir::{ use crate::ir::{
types, ArgumentLoc, Ebb, FuncRef, Function, GlobalValue, Inst, JumpTable, Opcode, SigRef, types, ArgumentLoc, Ebb, FuncRef, Function, GlobalValue, Inst, InstructionData, JumpTable,
StackSlot, StackSlotKind, Type, Value, ValueDef, ValueList, ValueLoc, Opcode, SigRef, StackSlot, StackSlotKind, Type, Value, ValueDef, ValueList, ValueLoc,
}; };
use crate::isa::TargetIsa; use crate::isa::TargetIsa;
use crate::iterators::IteratorExtras; use crate::iterators::IteratorExtras;
@@ -1079,6 +1079,9 @@ impl<'a> Verifier<'a> {
self.typecheck_return(inst, errors).is_ok(); self.typecheck_return(inst, errors).is_ok();
self.typecheck_special(inst, ctrl_type, errors).is_ok(); self.typecheck_special(inst, ctrl_type, errors).is_ok();
// Misuses of copy_nop instructions are fatal
self.typecheck_copy_nop(inst, errors)?;
Ok(()) Ok(())
} }
@@ -1469,6 +1472,43 @@ impl<'a> Verifier<'a> {
Ok(()) Ok(())
} }
fn typecheck_copy_nop(
&self,
inst: Inst,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
if let InstructionData::Unary {
opcode: Opcode::CopyNop,
arg,
} = self.func.dfg[inst]
{
let dst_vals = self.func.dfg.inst_results(inst);
if dst_vals.len() != 1 {
return fatal!(errors, inst, "copy_nop must produce exactly one result");
}
let dst_val = dst_vals[0];
if self.func.dfg.value_type(dst_val) != self.func.dfg.value_type(arg) {
return fatal!(errors, inst, "copy_nop src and dst types must be the same");
}
let src_loc = self.func.locations[arg];
let dst_loc = self.func.locations[dst_val];
let locs_ok = match (src_loc, dst_loc) {
(ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => src_slot == dst_slot,
_ => false,
};
if !locs_ok {
return fatal!(
errors,
inst,
"copy_nop must refer to identical stack slots, but found {:?} vs {:?}",
src_loc,
dst_loc
);
}
}
Ok(())
}
fn cfg_integrity( fn cfg_integrity(
&self, &self,
cfg: &ControlFlowGraph, cfg: &ControlFlowGraph,