From bce8af97e32cbfdbcda6654949ace248f51efb88 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 19 Apr 2018 14:03:38 -0700 Subject: [PATCH] Add an instruction shrinking pass. When an instruction has multiple valid encodings, such as with and without a REX prefix on x86-64, Cretonne typically picks the encoding which gives the register allocator the most flexibility, which is typically the longest encoding. This patch adds a pass that runs after register allocation that picks the smallest encoding, working within the constraints of the register allocator's choices. The result is smaller and easier to read encodings. In the future, we may want to merge this pass into the relaxation pass, or possibly fold it into the final encoding step, however for now, a discrete pass will suffice. --- .../filetests/isa/x86/prologue-epilogue.cton | 2 +- cranelift/filetests/isa/x86/shrink.cton | 29 +++++++++++ lib/codegen/src/binemit/mod.rs | 2 + lib/codegen/src/binemit/shrink.rs | 49 +++++++++++++++++++ lib/codegen/src/context.rs | 13 ++++- 5 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 cranelift/filetests/isa/x86/shrink.cton create mode 100644 lib/codegen/src/binemit/shrink.rs diff --git a/cranelift/filetests/isa/x86/prologue-epilogue.cton b/cranelift/filetests/isa/x86/prologue-epilogue.cton index 76aa459f94..07880f272b 100644 --- a/cranelift/filetests/isa/x86/prologue-epilogue.cton +++ b/cranelift/filetests/isa/x86/prologue-epilogue.cton @@ -228,4 +228,4 @@ ebb4: ; check: function %divert ; check: regmove v5, %rcx -> %rbx -; check: [RexOp1popq#58,%rbx] v15 = x86_pop.i64 +; check: [Op1popq#58,%rbx] v15 = x86_pop.i64 diff --git a/cranelift/filetests/isa/x86/shrink.cton b/cranelift/filetests/isa/x86/shrink.cton new file mode 100644 index 0000000000..1dc5fbcd42 --- /dev/null +++ b/cranelift/filetests/isa/x86/shrink.cton @@ -0,0 +1,29 @@ +test binemit +set is_64bit=1 +set opt_level=best +isa x86 + +; Test that instruction shrinking eliminates REX prefixes when possible. + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/shrink.cton | llvm-mc -show-encoding -triple=x86_64 +; + +function %test_shrinking(i32) -> i32 { +ebb0(v0: i32 [ %rdi ]): + ; asm: movl $0x2,%eax +[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 + ; asm: subl %ecx,%edi +[-,%rdi] v2 = isub v0, v1 ; bin: 29 cf + return v2 +} + +function %test_not_shrinking(i32) -> i32 { +ebb0(v0: i32 [ %r8 ]): + ; asm: movl $0x2,%eax +[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 + ; asm: subl %ecx,%edi +[-,%r8] v2 = isub v0, v1 ; bin: 41 29 c8 + return v2 +} diff --git a/lib/codegen/src/binemit/mod.rs b/lib/codegen/src/binemit/mod.rs index 9752901ce9..653e38c9ed 100644 --- a/lib/codegen/src/binemit/mod.rs +++ b/lib/codegen/src/binemit/mod.rs @@ -5,9 +5,11 @@ mod memorysink; mod relaxation; +mod shrink; pub use self::memorysink::{MemoryCodeSink, RelocSink, TrapSink, NullTrapSink}; pub use self::relaxation::relax_branches; +pub use self::shrink::shrink_instructions; pub use regalloc::RegDiversions; use ir::{ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode}; diff --git a/lib/codegen/src/binemit/shrink.rs b/lib/codegen/src/binemit/shrink.rs new file mode 100644 index 0000000000..0ae15584cb --- /dev/null +++ b/lib/codegen/src/binemit/shrink.rs @@ -0,0 +1,49 @@ +//! Instruction shrinking. +//! +//! Sometimes there are multiple valid encodings for a given instruction. Cretonne often initially +//! chooses the largest one, because this typically provides the register allocator the most +//! flexibility. However, once register allocation is done, this is no longer important, and we +//! can switch to smaller encodings when possible. + +use ir::Function; +use isa::TargetIsa; +use regalloc::RegDiversions; + +/// Pick the smallest valid encodings for instructions. +pub fn shrink_instructions(func: &mut Function, isa: &TargetIsa) { + let encinfo = isa.encoding_info(); + let mut divert = RegDiversions::new(); + + for ebb in func.layout.ebbs() { + divert.clear(); + for inst in func.layout.ebb_insts(ebb) { + let enc = func.encodings[inst]; + if enc.is_legal() { + let ctrl_type = func.dfg.ctrl_typevar(inst); + + // Pick the last encoding with constraints that are satisfied. + let best_enc = isa.legal_encodings(func, &func.dfg[inst], ctrl_type) + .filter(|e| { + encinfo.constraints[e.recipe()].satisfied(inst, &divert, &func) + }) + .min_by_key(|e| encinfo.bytes(*e)) + .unwrap(); + + if best_enc != enc { + func.encodings[inst] = best_enc; + + dbg!( + "Shrunk [{}] to [{}] in {}, reducing the size from {} to {}", + encinfo.display(enc), + encinfo.display(best_enc), + func.dfg.display_inst(inst, isa), + encinfo.bytes(enc), + encinfo.bytes(best_enc) + ); + } + + } + divert.apply(&func.dfg[inst]); + } + } +} diff --git a/lib/codegen/src/context.rs b/lib/codegen/src/context.rs index 95e78ff128..42fde1cc4a 100644 --- a/lib/codegen/src/context.rs +++ b/lib/codegen/src/context.rs @@ -9,7 +9,7 @@ //! contexts concurrently. Typically, you would have one context per compilation thread and only a //! single ISA instance. -use binemit::{relax_branches, CodeOffset, MemoryCodeSink, RelocSink, TrapSink}; +use binemit::{relax_branches, shrink_instructions, CodeOffset, MemoryCodeSink, RelocSink, TrapSink}; use dce::do_dce; use dominator_tree::DominatorTree; use flowgraph::ControlFlowGraph; @@ -140,6 +140,9 @@ impl Context { } self.regalloc(isa)?; self.prologue_epilogue(isa)?; + if isa.flags().opt_level() == OptLevel::Best { + self.shrink_instructions(isa)?; + } self.relax_branches(isa) } @@ -294,6 +297,14 @@ impl Context { Ok(()) } + /// Run the instruction shrinking pass. + pub fn shrink_instructions(&mut self, isa: &TargetIsa) -> CtonResult { + shrink_instructions(&mut self.func, isa); + self.verify_if(isa)?; + self.verify_locations_if(isa)?; + Ok(()) + } + /// Run the branch relaxation pass and return the final code size. pub fn relax_branches(&mut self, isa: &TargetIsa) -> Result { let code_size = relax_branches(&mut self.func, isa)?;