From de8d44d0e580d6fb90cdf893882f7b8ffb82314e Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 9 Aug 2022 10:38:48 -0700 Subject: [PATCH] Cranelift: MachBuffer: apply branch peephole opts one last time at buffer tail. (#4652) The `MachBuffer` applies a set of peephole-optimization rules to do branch threading, leverage fallthrough paths, eliminate empty blocks, and flip conditional branches where needed to make branches more efficient starting from naive always-branch-at-end-of-BB code. This works by applying the rules at every label-bind, which is equivalent to applying them at the end of every basic block, where branches are usually inserted. However, this misses one case: the end of the buffer! Currently we don't optimize any redundant or foldable branches at the very end of the machine code. This usually doesn't matter when the function ends in an epilogue with `ret` as the last instruction. However, when cold blocks exist, it can actually matter. Thanks to @mchesser for pointing out this issue in #4636. --- cranelift/codegen/src/isa/x64/mod.rs | 7 +++---- cranelift/codegen/src/machinst/buffer.rs | 4 ++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 9a16a757a6..8c7415d88a 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -329,7 +329,7 @@ mod test { // 00000018 81E834120000 sub eax,0x1234 // 0000001E 4401C0 add eax,r8d // 00000021 85FF test edi,edi - // 00000023 0F851D000000 jnz near 0x46 + // 00000023 0F8505000000 jnz near 0x2e // 00000029 4889EC mov rsp,rbp // 0000002C 5D pop rbp // 0000002D C3 ret @@ -338,13 +338,12 @@ mod test { // 00000038 4585C0 test r8d,r8d // 0000003B 0F85EDFFFFFF jnz near 0x2e // 00000041 E9CFFFFFFF jmp 0x15 - // 00000046 E9E3FFFFFF jmp 0x2e let golden = vec![ 85, 72, 137, 229, 129, 199, 52, 18, 0, 0, 133, 255, 15, 132, 28, 0, 0, 0, 73, 137, 248, - 72, 137, 248, 129, 232, 52, 18, 0, 0, 68, 1, 192, 133, 255, 15, 133, 29, 0, 0, 0, 72, + 72, 137, 248, 129, 232, 52, 18, 0, 0, 68, 1, 192, 133, 255, 15, 133, 5, 0, 0, 0, 72, 137, 236, 93, 195, 73, 137, 248, 65, 129, 192, 52, 18, 0, 0, 69, 133, 192, 15, 133, - 237, 255, 255, 255, 233, 207, 255, 255, 255, 233, 227, 255, 255, 255, + 237, 255, 255, 255, 233, 207, 255, 255, 255, ]; assert_eq!(code, &golden[..]); diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index 0f915a96dd..084346cc4e 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -1214,6 +1214,10 @@ impl MachBuffer { pub fn finish(mut self) -> MachBufferFinalized { let _tt = timing::vcode_emit_finish(); + // Do any optimizations on branches at tail of buffer, as if we + // had bound one last label. + self.optimize_branches(); + self.finish_emission_maybe_forcing_veneers(false); let mut srclocs = self.srclocs;