diff --git a/cranelift/filetests/isa/x86/prologue-epilogue.cton b/cranelift/filetests/isa/x86/prologue-epilogue.cton index 76aa459f94..07880f272b 100644 --- a/cranelift/filetests/isa/x86/prologue-epilogue.cton +++ b/cranelift/filetests/isa/x86/prologue-epilogue.cton @@ -228,4 +228,4 @@ ebb4: ; check: function %divert ; check: regmove v5, %rcx -> %rbx -; check: [RexOp1popq#58,%rbx] v15 = x86_pop.i64 +; check: [Op1popq#58,%rbx] v15 = x86_pop.i64 diff --git a/cranelift/filetests/isa/x86/shrink.cton b/cranelift/filetests/isa/x86/shrink.cton new file mode 100644 index 0000000000..1dc5fbcd42 --- /dev/null +++ b/cranelift/filetests/isa/x86/shrink.cton @@ -0,0 +1,29 @@ +test binemit +set is_64bit=1 +set opt_level=best +isa x86 + +; Test that instruction shrinking eliminates REX prefixes when possible. + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/shrink.cton | llvm-mc -show-encoding -triple=x86_64 +; + +function %test_shrinking(i32) -> i32 { +ebb0(v0: i32 [ %rdi ]): + ; asm: movl $0x2,%eax +[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 + ; asm: subl %ecx,%edi +[-,%rdi] v2 = isub v0, v1 ; bin: 29 cf + return v2 +} + +function %test_not_shrinking(i32) -> i32 { +ebb0(v0: i32 [ %r8 ]): + ; asm: movl $0x2,%eax +[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 + ; asm: subl %ecx,%edi +[-,%r8] v2 = isub v0, v1 ; bin: 41 29 c8 + return v2 +} diff --git a/lib/codegen/src/binemit/mod.rs b/lib/codegen/src/binemit/mod.rs index 9752901ce9..653e38c9ed 100644 --- a/lib/codegen/src/binemit/mod.rs +++ b/lib/codegen/src/binemit/mod.rs @@ -5,9 +5,11 @@ mod memorysink; mod relaxation; +mod shrink; pub use self::memorysink::{MemoryCodeSink, RelocSink, TrapSink, NullTrapSink}; pub use self::relaxation::relax_branches; +pub use self::shrink::shrink_instructions; pub use regalloc::RegDiversions; use ir::{ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode}; diff --git a/lib/codegen/src/binemit/shrink.rs b/lib/codegen/src/binemit/shrink.rs new file mode 100644 index 0000000000..0ae15584cb --- /dev/null +++ b/lib/codegen/src/binemit/shrink.rs @@ -0,0 +1,49 @@ +//! Instruction shrinking. +//! +//! Sometimes there are multiple valid encodings for a given instruction. Cretonne often initially +//! chooses the largest one, because this typically provides the register allocator the most +//! flexibility. However, once register allocation is done, this is no longer important, and we +//! can switch to smaller encodings when possible. + +use ir::Function; +use isa::TargetIsa; +use regalloc::RegDiversions; + +/// Pick the smallest valid encodings for instructions. +pub fn shrink_instructions(func: &mut Function, isa: &TargetIsa) { + let encinfo = isa.encoding_info(); + let mut divert = RegDiversions::new(); + + for ebb in func.layout.ebbs() { + divert.clear(); + for inst in func.layout.ebb_insts(ebb) { + let enc = func.encodings[inst]; + if enc.is_legal() { + let ctrl_type = func.dfg.ctrl_typevar(inst); + + // Pick the last encoding with constraints that are satisfied. + let best_enc = isa.legal_encodings(func, &func.dfg[inst], ctrl_type) + .filter(|e| { + encinfo.constraints[e.recipe()].satisfied(inst, &divert, &func) + }) + .min_by_key(|e| encinfo.bytes(*e)) + .unwrap(); + + if best_enc != enc { + func.encodings[inst] = best_enc; + + dbg!( + "Shrunk [{}] to [{}] in {}, reducing the size from {} to {}", + encinfo.display(enc), + encinfo.display(best_enc), + func.dfg.display_inst(inst, isa), + encinfo.bytes(enc), + encinfo.bytes(best_enc) + ); + } + + } + divert.apply(&func.dfg[inst]); + } + } +} diff --git a/lib/codegen/src/context.rs b/lib/codegen/src/context.rs index 95e78ff128..42fde1cc4a 100644 --- a/lib/codegen/src/context.rs +++ b/lib/codegen/src/context.rs @@ -9,7 +9,7 @@ //! contexts concurrently. Typically, you would have one context per compilation thread and only a //! single ISA instance. -use binemit::{relax_branches, CodeOffset, MemoryCodeSink, RelocSink, TrapSink}; +use binemit::{relax_branches, shrink_instructions, CodeOffset, MemoryCodeSink, RelocSink, TrapSink}; use dce::do_dce; use dominator_tree::DominatorTree; use flowgraph::ControlFlowGraph; @@ -140,6 +140,9 @@ impl Context { } self.regalloc(isa)?; self.prologue_epilogue(isa)?; + if isa.flags().opt_level() == OptLevel::Best { + self.shrink_instructions(isa)?; + } self.relax_branches(isa) } @@ -294,6 +297,14 @@ impl Context { Ok(()) } + /// Run the instruction shrinking pass. + pub fn shrink_instructions(&mut self, isa: &TargetIsa) -> CtonResult { + shrink_instructions(&mut self.func, isa); + self.verify_if(isa)?; + self.verify_locations_if(isa)?; + Ok(()) + } + /// Run the branch relaxation pass and return the final code size. pub fn relax_branches(&mut self, isa: &TargetIsa) -> Result { let code_size = relax_branches(&mut self.func, isa)?;