Cranelift: MachBuffer: apply branch peephole opts one last time at buffer tail. (#4652)
The `MachBuffer` applies a set of peephole-optimization rules to do branch threading, leverage fallthrough paths, eliminate empty blocks, and flip conditional branches where needed to make branches more efficient starting from naive always-branch-at-end-of-BB code. This works by applying the rules at every label-bind, which is equivalent to applying them at the end of every basic block, where branches are usually inserted. However, this misses one case: the end of the buffer! Currently we don't optimize any redundant or foldable branches at the very end of the machine code. This usually doesn't matter when the function ends in an epilogue with `ret` as the last instruction. However, when cold blocks exist, it can actually matter. Thanks to @mchesser for pointing out this issue in #4636.
This commit is contained in:
@@ -329,7 +329,7 @@ mod test {
|
||||
// 00000018 81E834120000 sub eax,0x1234
|
||||
// 0000001E 4401C0 add eax,r8d
|
||||
// 00000021 85FF test edi,edi
|
||||
// 00000023 0F851D000000 jnz near 0x46
|
||||
// 00000023 0F8505000000 jnz near 0x2e
|
||||
// 00000029 4889EC mov rsp,rbp
|
||||
// 0000002C 5D pop rbp
|
||||
// 0000002D C3 ret
|
||||
@@ -338,13 +338,12 @@ mod test {
|
||||
// 00000038 4585C0 test r8d,r8d
|
||||
// 0000003B 0F85EDFFFFFF jnz near 0x2e
|
||||
// 00000041 E9CFFFFFFF jmp 0x15
|
||||
// 00000046 E9E3FFFFFF jmp 0x2e
|
||||
|
||||
let golden = vec![
|
||||
85, 72, 137, 229, 129, 199, 52, 18, 0, 0, 133, 255, 15, 132, 28, 0, 0, 0, 73, 137, 248,
|
||||
72, 137, 248, 129, 232, 52, 18, 0, 0, 68, 1, 192, 133, 255, 15, 133, 29, 0, 0, 0, 72,
|
||||
72, 137, 248, 129, 232, 52, 18, 0, 0, 68, 1, 192, 133, 255, 15, 133, 5, 0, 0, 0, 72,
|
||||
137, 236, 93, 195, 73, 137, 248, 65, 129, 192, 52, 18, 0, 0, 69, 133, 192, 15, 133,
|
||||
237, 255, 255, 255, 233, 207, 255, 255, 255, 233, 227, 255, 255, 255,
|
||||
237, 255, 255, 255, 233, 207, 255, 255, 255,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden[..]);
|
||||
|
||||
@@ -1214,6 +1214,10 @@ impl<I: VCodeInst> MachBuffer<I> {
|
||||
pub fn finish(mut self) -> MachBufferFinalized {
|
||||
let _tt = timing::vcode_emit_finish();
|
||||
|
||||
// Do any optimizations on branches at tail of buffer, as if we
|
||||
// had bound one last label.
|
||||
self.optimize_branches();
|
||||
|
||||
self.finish_emission_maybe_forcing_veneers(false);
|
||||
|
||||
let mut srclocs = self.srclocs;
|
||||
|
||||
Reference in New Issue
Block a user