Fix MachBuffer branch optimization.

This patch fixes a subtle bug that occurred in the MachBuffer branch
optimization: in tracking labels at the current buffer tail using a
sorted-by-offset array, the code did not update this array properly when
redirecting labels. As a result, the dead-branch removal was unsafe,
because not every label pointing to a branch is guaranteed to be
redirected properly first.

Discovered while doing performance testing: bz2 silently took a wrong
branch and exited compression early. (Eek!)

To address this problem, this patch adopts a slightly simpler data
structure: we only track the labels *at the current buffer tail*, and
*at the start of each branch*, and we're careful to update these
appropriately to maintain the invariants. I'm pretty confident that this
is correct now, but we should (still) fuzz it a bunch, because wrong
control flow scares me a nonzero amount. I should probably also actually
write out a formal proof that these data-structure updates are correct.
The optimizations are important for performance (removing useless empty
blocks, and taking advantage of any fallthrough opportunities at all),
so I don't think we would want to drop them entirely.
This commit is contained in:
Chris Fallin
2020-05-19 18:01:53 -07:00
parent 5c39b74eb8
commit e11094b28b
3 changed files with 103 additions and 71 deletions

View File

@@ -1163,17 +1163,13 @@ impl MachInstEmit for Inst {
} }
&Inst::Jump { ref dest } => { &Inst::Jump { ref dest } => {
let off = sink.cur_offset(); let off = sink.cur_offset();
// Emit the jump itself. // Indicate that the jump uses a label, if so, so that a fixup can occur later.
sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
// After the jump has been emitted, indicate that it uses a
// label, if so, so that a fixup can occur later. This happens
// after we emit the bytes because the fixup might occur right
// away (so the bytes must actually exist now).
if let Some(l) = dest.as_label() { if let Some(l) = dest.as_label() {
sink.use_label_at_offset(off, l, LabelUse::Branch26); sink.use_label_at_offset(off, l, LabelUse::Branch26);
let cur_off = sink.cur_offset(); sink.add_uncond_branch(off, off + 4, l);
sink.add_uncond_branch(off, cur_off, l);
} }
// Emit the jump itself.
sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
} }
&Inst::Ret => { &Inst::Ret => {
sink.put4(0xd65f03c0); sink.put4(0xd65f03c0);
@@ -1208,28 +1204,27 @@ impl MachInstEmit for Inst {
} => { } => {
// Conditional part first. // Conditional part first.
let cond_off = sink.cur_offset(); let cond_off = sink.cur_offset();
sink.put4(enc_conditional_br(taken, kind));
if let Some(l) = taken.as_label() { if let Some(l) = taken.as_label() {
sink.use_label_at_offset(cond_off, l, LabelUse::Branch19); sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
let cur_off = sink.cur_offset();
let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes(); let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
sink.add_cond_branch(cond_off, cur_off, l, &inverted[..]); sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
} }
// Unconditional part. sink.put4(enc_conditional_br(taken, kind));
// Unconditional part next.
let uncond_off = sink.cur_offset(); let uncond_off = sink.cur_offset();
sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
if let Some(l) = not_taken.as_label() { if let Some(l) = not_taken.as_label() {
sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26); sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
let cur_off = sink.cur_offset(); sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
sink.add_uncond_branch(uncond_off, cur_off, l);
} }
sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
} }
&Inst::OneWayCondBr { target, kind } => { &Inst::OneWayCondBr { target, kind } => {
let off = sink.cur_offset(); let off = sink.cur_offset();
sink.put4(enc_conditional_br(target, kind));
if let Some(l) = target.as_label() { if let Some(l) = target.as_label() {
sink.use_label_at_offset(off, l, LabelUse::Branch19); sink.use_label_at_offset(off, l, LabelUse::Branch19);
} }
sink.put4(enc_conditional_br(target, kind));
} }
&Inst::IndirectBr { rn, .. } => { &Inst::IndirectBr { rn, .. } => {
sink.put4(enc_br(rn)); sink.put4(enc_br(rn));
@@ -1307,12 +1302,12 @@ impl MachInstEmit for Inst {
for &target in targets.iter() { for &target in targets.iter() {
let word_off = sink.cur_offset(); let word_off = sink.cur_offset();
let off_into_table = word_off - jt_off; let off_into_table = word_off - jt_off;
sink.put4(off_into_table);
sink.use_label_at_offset( sink.use_label_at_offset(
word_off, word_off,
target.as_label().unwrap(), target.as_label().unwrap(),
LabelUse::PCRel32, LabelUse::PCRel32,
); );
sink.put4(off_into_table);
} }
// Lowering produces an EmitIsland before using a JTSequence, so we can safely // Lowering produces an EmitIsland before using a JTSequence, so we can safely

View File

@@ -812,14 +812,14 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
let disp = dest.as_offset32_or_zero() - 5; let disp = dest.as_offset32_or_zero() - 5;
let disp = disp as u32; let disp = disp as u32;
let br_start = sink.cur_offset(); let br_start = sink.cur_offset();
sink.put1(0xE9); let br_disp_off = br_start + 1;
let br_disp_off = sink.cur_offset(); let br_end = br_start + 5;
sink.put4(disp);
let br_end = sink.cur_offset();
if let Some(l) = dest.as_label() { if let Some(l) = dest.as_label() {
sink.use_label_at_offset(br_disp_off, l, LabelUse::Rel32); sink.use_label_at_offset(br_disp_off, l, LabelUse::Rel32);
sink.add_uncond_branch(br_start, br_end, l); sink.add_uncond_branch(br_start, br_end, l);
} }
sink.put1(0xE9);
sink.put4(disp);
} }
Inst::JmpCondSymm { Inst::JmpCondSymm {
cc, cc,
@@ -835,31 +835,31 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
let taken_disp = taken.as_offset32_or_zero() - 6; let taken_disp = taken.as_offset32_or_zero() - 6;
let taken_disp = taken_disp as u32; let taken_disp = taken_disp as u32;
let cond_start = sink.cur_offset(); let cond_start = sink.cur_offset();
sink.put1(0x0F); let cond_disp_off = cond_start + 2;
sink.put1(0x80 + cc.get_enc()); let cond_end = cond_start + 6;
let cond_disp_off = sink.cur_offset();
sink.put4(taken_disp);
let cond_end = sink.cur_offset();
if let Some(l) = taken.as_label() { if let Some(l) = taken.as_label() {
sink.use_label_at_offset(cond_disp_off, l, LabelUse::Rel32); sink.use_label_at_offset(cond_disp_off, l, LabelUse::Rel32);
let inverted: [u8; 6] = let inverted: [u8; 6] =
[0x0F, 0x80 + (cc.invert().get_enc()), 0xFA, 0xFF, 0xFF, 0xFF]; [0x0F, 0x80 + (cc.invert().get_enc()), 0xFA, 0xFF, 0xFF, 0xFF];
sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]); sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]);
} }
sink.put1(0x0F);
sink.put1(0x80 + cc.get_enc());
sink.put4(taken_disp);
// Unconditional part. // Unconditional part.
let nt_disp = not_taken.as_offset32_or_zero() - 5; let nt_disp = not_taken.as_offset32_or_zero() - 5;
let nt_disp = nt_disp as u32; let nt_disp = nt_disp as u32;
let uncond_start = sink.cur_offset(); let uncond_start = sink.cur_offset();
sink.put1(0xE9); let uncond_disp_off = uncond_start + 1;
let uncond_disp_off = sink.cur_offset(); let uncond_end = uncond_start + 5;
sink.put4(nt_disp);
let uncond_end = sink.cur_offset();
if let Some(l) = not_taken.as_label() { if let Some(l) = not_taken.as_label() {
sink.use_label_at_offset(uncond_disp_off, l, LabelUse::Rel32); sink.use_label_at_offset(uncond_disp_off, l, LabelUse::Rel32);
sink.add_uncond_branch(uncond_start, uncond_end, l); sink.add_uncond_branch(uncond_start, uncond_end, l);
} }
sink.put1(0xE9);
sink.put4(nt_disp);
} }
Inst::JmpUnknown { target } => { Inst::JmpUnknown { target } => {
match target { match target {

View File

@@ -162,8 +162,10 @@ pub struct MachBuffer<I: VCodeInst> {
/// Latest branches, to facilitate in-place editing for better fallthrough /// Latest branches, to facilitate in-place editing for better fallthrough
/// behavior and empty-block removal. /// behavior and empty-block removal.
latest_branches: SmallVec<[MachBranch; 4]>, latest_branches: SmallVec<[MachBranch; 4]>,
/// All labels, in offset order. /// All labels at the current offset (emission tail). For correctness, this
labels_by_offset: SmallVec<[(MachLabel, CodeOffset); 16]>, /// *must* be complete, because we rely on it to update labels when we
/// truncate branches.
labels_at_tail: SmallVec<[MachLabel; 4]>,
} }
/// A `MachBuffer` once emission is completed: holds generated code and records, /// A `MachBuffer` once emission is completed: holds generated code and records,
@@ -223,7 +225,7 @@ impl<I: VCodeInst> MachBuffer<I> {
island_deadline: UNKNOWN_LABEL_OFFSET, island_deadline: UNKNOWN_LABEL_OFFSET,
island_worst_case_size: 0, island_worst_case_size: 0,
latest_branches: SmallVec::new(), latest_branches: SmallVec::new(),
labels_by_offset: SmallVec::new(), labels_at_tail: SmallVec::new(),
} }
} }
@@ -236,6 +238,7 @@ impl<I: VCodeInst> MachBuffer<I> {
pub fn put1(&mut self, value: u8) { pub fn put1(&mut self, value: u8) {
trace!("MachBuffer: put byte @ {}: {:x}", self.cur_offset(), value); trace!("MachBuffer: put byte @ {}: {:x}", self.cur_offset(), value);
self.data.push(value); self.data.push(value);
self.labels_at_tail.clear();
} }
/// Add 2 bytes. /// Add 2 bytes.
@@ -247,6 +250,7 @@ impl<I: VCodeInst> MachBuffer<I> {
); );
let bytes = value.to_le_bytes(); let bytes = value.to_le_bytes();
self.data.extend_from_slice(&bytes[..]); self.data.extend_from_slice(&bytes[..]);
self.labels_at_tail.clear();
} }
/// Add 4 bytes. /// Add 4 bytes.
@@ -258,6 +262,7 @@ impl<I: VCodeInst> MachBuffer<I> {
); );
let bytes = value.to_le_bytes(); let bytes = value.to_le_bytes();
self.data.extend_from_slice(&bytes[..]); self.data.extend_from_slice(&bytes[..]);
self.labels_at_tail.clear();
} }
/// Add 8 bytes. /// Add 8 bytes.
@@ -269,6 +274,7 @@ impl<I: VCodeInst> MachBuffer<I> {
); );
let bytes = value.to_le_bytes(); let bytes = value.to_le_bytes();
self.data.extend_from_slice(&bytes[..]); self.data.extend_from_slice(&bytes[..]);
self.labels_at_tail.clear();
} }
/// Add a slice of bytes. /// Add a slice of bytes.
@@ -279,6 +285,7 @@ impl<I: VCodeInst> MachBuffer<I> {
data.len() data.len()
); );
self.data.extend_from_slice(data); self.data.extend_from_slice(data);
self.labels_at_tail.clear();
} }
/// Reserve appended space and return a mutable slice referring to it. /// Reserve appended space and return a mutable slice referring to it.
@@ -287,6 +294,7 @@ impl<I: VCodeInst> MachBuffer<I> {
let off = self.data.len(); let off = self.data.len();
let new_len = self.data.len() + len; let new_len = self.data.len() + len;
self.data.resize(new_len, 0); self.data.resize(new_len, 0);
self.labels_at_tail.clear();
&mut self.data[off..] &mut self.data[off..]
} }
@@ -327,7 +335,7 @@ impl<I: VCodeInst> MachBuffer<I> {
); );
let offset = self.cur_offset(); let offset = self.cur_offset();
self.label_offsets[label.0 as usize] = offset; self.label_offsets[label.0 as usize] = offset;
self.labels_by_offset.push((label, offset)); self.labels_at_tail.push(label);
self.optimize_branches(); self.optimize_branches();
} }
@@ -345,9 +353,8 @@ impl<I: VCodeInst> MachBuffer<I> {
/// branch-instruction format) at the current offset. This is like a /// branch-instruction format) at the current offset. This is like a
/// relocation, but handled internally. /// relocation, but handled internally.
/// ///
/// Because the offset of the label may already be known and the patch may /// This can be called before the branch is actually emitted; fixups will
/// happen immediately, the buffer must already contain bytes at `offset` up /// not happen until an island is emitted or the buffer is finished.
/// to `offset + kind.patch_size()`.
pub fn use_label_at_offset(&mut self, offset: CodeOffset, label: MachLabel, kind: I::LabelUse) { pub fn use_label_at_offset(&mut self, offset: CodeOffset, label: MachLabel, kind: I::LabelUse) {
trace!( trace!(
"MachBuffer: use_label_at_offset: offset {} label {:?} kind {:?}", "MachBuffer: use_label_at_offset: offset {} label {:?} kind {:?}",
@@ -355,7 +362,6 @@ impl<I: VCodeInst> MachBuffer<I> {
label, label,
kind kind
); );
debug_assert!(offset + kind.patch_size() <= self.cur_offset());
// Add the fixup, and update the worst-case island size based on a // Add the fixup, and update the worst-case island size based on a
// veneer for this label use. // veneer for this label use.
@@ -377,7 +383,17 @@ impl<I: VCodeInst> MachBuffer<I> {
/// Inform the buffer of an unconditional branch at the given offset, /// Inform the buffer of an unconditional branch at the given offset,
/// targetting the given label. May be used to optimize branches. /// targetting the given label. May be used to optimize branches.
/// The last added label-use must correspond to this branch. /// The last added label-use must correspond to this branch.
/// This must be called when the current offset is equal to `start`; i.e.,
/// before actually emitting the branch. This implies that for a branch that
/// uses a label and is eligible for optimizations by the MachBuffer, the
/// proper sequence is:
///
/// - Call `use_label_at_offset()` to emit the fixup record.
/// - Call `add_uncond_branch()` to make note of the branch.
/// - Emit the bytes for the branch's machine code.
pub fn add_uncond_branch(&mut self, start: CodeOffset, end: CodeOffset, target: MachLabel) { pub fn add_uncond_branch(&mut self, start: CodeOffset, end: CodeOffset, target: MachLabel) {
assert!(self.cur_offset() == start);
debug_assert!(end > start);
assert!(!self.fixup_records.is_empty()); assert!(!self.fixup_records.is_empty());
let fixup = self.fixup_records.len() - 1; let fixup = self.fixup_records.len() - 1;
self.latest_branches.push(MachBranch { self.latest_branches.push(MachBranch {
@@ -386,6 +402,7 @@ impl<I: VCodeInst> MachBuffer<I> {
target, target,
fixup, fixup,
inverted: None, inverted: None,
labels_at_this_branch: self.labels_at_tail.clone(),
}); });
} }
@@ -399,6 +416,8 @@ impl<I: VCodeInst> MachBuffer<I> {
target: MachLabel, target: MachLabel,
inverted: &[u8], inverted: &[u8],
) { ) {
assert!(self.cur_offset() == start);
debug_assert!(end > start);
assert!(!self.fixup_records.is_empty()); assert!(!self.fixup_records.is_empty());
let fixup = self.fixup_records.len() - 1; let fixup = self.fixup_records.len() - 1;
let inverted = Some(SmallVec::from(inverted)); let inverted = Some(SmallVec::from(inverted));
@@ -408,6 +427,7 @@ impl<I: VCodeInst> MachBuffer<I> {
target, target,
fixup, fixup,
inverted, inverted,
labels_at_this_branch: self.labels_at_tail.clone(),
}); });
} }
@@ -422,22 +442,18 @@ impl<I: VCodeInst> MachBuffer<I> {
b, b,
cur_off cur_off
); );
for &mut (l, ref mut off) in self.labels_by_offset.iter_mut().rev() { for &l in &self.labels_at_tail {
if *off > cur_off {
*off = cur_off;
trace!(" -> label {:?} reassigned to {}", l, cur_off);
self.label_offsets[l.0 as usize] = cur_off; self.label_offsets[l.0 as usize] = cur_off;
} else {
break;
}
} }
self.labels_at_tail
.extend(b.labels_at_this_branch.into_iter());
} }
fn optimize_branches(&mut self) { fn optimize_branches(&mut self) {
trace!( trace!(
"enter optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}", "enter optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}",
self.latest_branches, self.latest_branches,
self.labels_by_offset, self.labels_at_tail,
self.fixup_records self.fixup_records
); );
while let Some(b) = self.latest_branches.last() { while let Some(b) = self.latest_branches.last() {
@@ -451,6 +467,17 @@ impl<I: VCodeInst> MachBuffer<I> {
break; break;
} }
// For any branch, conditional or unconditional:
// - If the target is a label at the current offset, then remove
// the conditional branch, and reset all labels that targetted
// the current offset (end of branch) to the truncated
// end-of-code.
if self.resolve_label_offset(b.target) == cur_off {
trace!("branch with target == cur off; truncating");
self.truncate_last_branch();
continue;
}
// If latest is an unconditional branch: // If latest is an unconditional branch:
// - For each label at this point, make the label an alias of // - For each label at this point, make the label an alias of
// the branch target. We can now assume below that the // the branch target. We can now assume below that the
@@ -458,8 +485,8 @@ impl<I: VCodeInst> MachBuffer<I> {
// are free to remove it in an optimization. // are free to remove it in an optimization.
// - If there is a prior unconditional branch that ends just before // - If there is a prior unconditional branch that ends just before
// this one begins, then we can truncate this branch, because it is // this one begins, then we can truncate this branch, because it is
// entirely unreachable (due to above). Trim the end of the // entirely unreachable (due to above). Do so and continue around
// `labels_by_offset` array and continue around the loop. // the loop.
// - If there is a prior conditional branch whose target label // - If there is a prior conditional branch whose target label
// resolves to the current offset (branches around the // resolves to the current offset (branches around the
// unconditional branch), then remove the unconditional branch, // unconditional branch), then remove the unconditional branch,
@@ -467,20 +494,35 @@ impl<I: VCodeInst> MachBuffer<I> {
// conditional instead. // conditional instead.
if b.is_uncond() { if b.is_uncond() {
// Set any label equal to current branch's start as an alias of // Set any label equal to current branch's start as an alias of
// the branch's target. // the branch's target, if the target is not the branch itself
for &(l, off) in self.labels_by_offset.iter().rev() { // (i.e., an infinite loop).
trace!(" -> uncond: latest label {:?} at off {}", l, off); if self.resolve_label_offset(b.target) != b.start {
if off > b.start { let redirected = b.labels_at_this_branch.len();
continue; for &l in &b.labels_at_this_branch {
} else if off == b.start { trace!(
trace!(" -> setting alias to {:?}", b.target); " -> label at start of branch {:?} redirected to target {:?}",
l,
b.target
);
self.label_aliases[l.0 as usize] = b.target; self.label_aliases[l.0 as usize] = b.target;
} else { // NOTE: we continue to ensure the invariant that labels
break; // pointing to tail of buffer are in `labels_at_tail`
} // because we already ensured above that the last branch
// cannot have a target of `cur_off`; so we never have
// to put the label into `labels_at_tail` when moving it
// here.
} }
// Maintain invariant: all branches have been redirected
// and are no longer pointing at the start of this branch.
let mut_b = self.latest_branches.last_mut().unwrap();
mut_b.labels_at_this_branch.clear();
// If the branch target is the next offset, if redirected > 0 {
trace!(" -> after label redirects, restarting loop");
continue;
}
}
let b = self.latest_branches.last().unwrap();
// Examine any immediately preceding branch. // Examine any immediately preceding branch.
if self.latest_branches.len() > 1 { if self.latest_branches.len() > 1 {
@@ -524,16 +566,6 @@ impl<I: VCodeInst> MachBuffer<I> {
} }
} }
// For any branch, conditional or unconditional:
// - If the target is a label at the current offset, then remove
// the conditional branch, and reset all labels that targetted
// the current offset (end of branch) to the truncated
// end-of-code.
if self.resolve_label_offset(b.target) == cur_off {
trace!("branch with target == cur off; truncating");
self.truncate_last_branch();
}
// If we couldn't do anything with the last branch, then break. // If we couldn't do anything with the last branch, then break.
break; break;
} }
@@ -543,7 +575,7 @@ impl<I: VCodeInst> MachBuffer<I> {
trace!( trace!(
"leave optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}", "leave optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}",
self.latest_branches, self.latest_branches,
self.labels_by_offset, self.labels_at_tail,
self.fixup_records self.fixup_records
); );
} }
@@ -929,6 +961,11 @@ struct MachBranch {
target: MachLabel, target: MachLabel,
fixup: usize, fixup: usize,
inverted: Option<SmallVec<[u8; 8]>>, inverted: Option<SmallVec<[u8; 8]>>,
/// All labels pointing to the start of this branch. For correctness, this
/// *must* be complete: we rely on being able to redirect all labels that
/// could jump to this branch before removing it, if it is otherwise
/// unreachable.
labels_at_this_branch: SmallVec<[MachLabel; 4]>,
} }
impl MachBranch { impl MachBranch {