Merge pull request #3709 from cfallin/cold-blocks-dead-code-bug
Cranelift: Fix cold-blocks-related lowering bug.
This commit is contained in:
@@ -179,3 +179,106 @@ fn isa_constructor(
|
|||||||
let backend = X64Backend::new_with_flags(triple, shared_flags, isa_flags);
|
let backend = X64Backend::new_with_flags(triple, shared_flags, isa_flags);
|
||||||
Box::new(backend)
|
Box::new(backend)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
use crate::cursor::{Cursor, FuncCursor};
|
||||||
|
use crate::ir::types::*;
|
||||||
|
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
|
||||||
|
use crate::isa::CallConv;
|
||||||
|
use crate::settings;
|
||||||
|
use crate::settings::Configurable;
|
||||||
|
use core::str::FromStr;
|
||||||
|
use target_lexicon::Triple;
|
||||||
|
|
||||||
|
/// We have to test cold blocks by observing final machine code,
|
||||||
|
/// rather than VCode, because the VCode orders blocks in lowering
|
||||||
|
/// order, not emission order. (The exact difference between the
|
||||||
|
/// two is that cold blocks are sunk in the latter.) We might as
|
||||||
|
/// well do the test here, where we have a backend to use.
|
||||||
|
#[test]
|
||||||
|
fn test_cold_blocks() {
|
||||||
|
let name = ExternalName::testcase("test0");
|
||||||
|
let mut sig = Signature::new(CallConv::SystemV);
|
||||||
|
sig.params.push(AbiParam::new(I32));
|
||||||
|
sig.returns.push(AbiParam::new(I32));
|
||||||
|
let mut func = Function::with_name_signature(name, sig);
|
||||||
|
|
||||||
|
let bb0 = func.dfg.make_block();
|
||||||
|
let arg0 = func.dfg.append_block_param(bb0, I32);
|
||||||
|
let bb1 = func.dfg.make_block();
|
||||||
|
let bb2 = func.dfg.make_block();
|
||||||
|
let bb3 = func.dfg.make_block();
|
||||||
|
let bb1_param = func.dfg.append_block_param(bb1, I32);
|
||||||
|
let bb3_param = func.dfg.append_block_param(bb3, I32);
|
||||||
|
|
||||||
|
let mut pos = FuncCursor::new(&mut func);
|
||||||
|
|
||||||
|
pos.insert_block(bb0);
|
||||||
|
let v0 = pos.ins().iconst(I32, 0x1234);
|
||||||
|
let v1 = pos.ins().iadd(arg0, v0);
|
||||||
|
pos.ins().brnz(v1, bb1, &[v1]);
|
||||||
|
pos.ins().jump(bb2, &[]);
|
||||||
|
|
||||||
|
pos.insert_block(bb1);
|
||||||
|
let v2 = pos.ins().isub(v1, v0);
|
||||||
|
let v3 = pos.ins().iadd(v2, bb1_param);
|
||||||
|
pos.ins().brnz(v1, bb2, &[]);
|
||||||
|
pos.ins().jump(bb3, &[v3]);
|
||||||
|
|
||||||
|
pos.func.layout.set_cold(bb2);
|
||||||
|
pos.insert_block(bb2);
|
||||||
|
let v4 = pos.ins().iadd(v1, v0);
|
||||||
|
pos.ins().brnz(v4, bb2, &[]);
|
||||||
|
pos.ins().jump(bb1, &[v4]);
|
||||||
|
|
||||||
|
pos.insert_block(bb3);
|
||||||
|
pos.ins().return_(&[bb3_param]);
|
||||||
|
|
||||||
|
let mut shared_flags_builder = settings::builder();
|
||||||
|
shared_flags_builder.set("opt_level", "none").unwrap();
|
||||||
|
shared_flags_builder.set("enable_verifier", "true").unwrap();
|
||||||
|
let shared_flags = settings::Flags::new(shared_flags_builder);
|
||||||
|
let isa_flags = x64_settings::Flags::new(&shared_flags, x64_settings::builder());
|
||||||
|
let backend = X64Backend::new_with_flags(
|
||||||
|
Triple::from_str("x86_64").unwrap(),
|
||||||
|
shared_flags,
|
||||||
|
isa_flags,
|
||||||
|
);
|
||||||
|
let result = backend
|
||||||
|
.compile_function(&mut func, /* want_disasm = */ false)
|
||||||
|
.unwrap();
|
||||||
|
let code = result.buffer.data();
|
||||||
|
|
||||||
|
// 00000000 55 push rbp
|
||||||
|
// 00000001 4889E5 mov rbp,rsp
|
||||||
|
// 00000004 4889FE mov rsi,rdi
|
||||||
|
// 00000007 81C634120000 add esi,0x1234
|
||||||
|
// 0000000D 85F6 test esi,esi
|
||||||
|
// 0000000F 0F841B000000 jz near 0x30
|
||||||
|
// 00000015 4889F7 mov rdi,rsi
|
||||||
|
// 00000018 4889F0 mov rax,rsi
|
||||||
|
// 0000001B 81E834120000 sub eax,0x1234
|
||||||
|
// 00000021 01F8 add eax,edi
|
||||||
|
// 00000023 85F6 test esi,esi
|
||||||
|
// 00000025 0F8505000000 jnz near 0x30
|
||||||
|
// 0000002B 4889EC mov rsp,rbp
|
||||||
|
// 0000002E 5D pop rbp
|
||||||
|
// 0000002F C3 ret
|
||||||
|
// 00000030 4889F7 mov rdi,rsi <--- cold block
|
||||||
|
// 00000033 81C734120000 add edi,0x1234
|
||||||
|
// 00000039 85FF test edi,edi
|
||||||
|
// 0000003B 0F85EFFFFFFF jnz near 0x30
|
||||||
|
// 00000041 E9D2FFFFFF jmp 0x18
|
||||||
|
|
||||||
|
let golden = vec![
|
||||||
|
85, 72, 137, 229, 72, 137, 254, 129, 198, 52, 18, 0, 0, 133, 246, 15, 132, 27, 0, 0, 0,
|
||||||
|
72, 137, 247, 72, 137, 240, 129, 232, 52, 18, 0, 0, 1, 248, 133, 246, 15, 133, 5, 0, 0,
|
||||||
|
0, 72, 137, 236, 93, 195, 72, 137, 247, 129, 199, 52, 18, 0, 0, 133, 255, 15, 133, 239,
|
||||||
|
255, 255, 255, 233, 210, 255, 255, 255,
|
||||||
|
];
|
||||||
|
|
||||||
|
assert_eq!(code, &golden[..]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -99,6 +99,13 @@ pub struct BlockLoweringOrder {
|
|||||||
/// blocks.
|
/// blocks.
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
orig_map: SecondaryMap<Block, Option<BlockIndex>>,
|
orig_map: SecondaryMap<Block, Option<BlockIndex>>,
|
||||||
|
/// Cold blocks. These blocks are not reordered in the
|
||||||
|
/// `lowered_order` above; the lowered order must respect RPO
|
||||||
|
/// (uses after defs) in order for lowering to be
|
||||||
|
/// correct. Instead, this set is used to provide `is_cold()`,
|
||||||
|
/// which is used by VCode emission to sink the blocks at the last
|
||||||
|
/// moment (when we actually emit bytes into the MachBuffer).
|
||||||
|
cold_blocks: FxHashSet<BlockIndex>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The origin of a block in the lowered block-order: either an original CLIF
|
/// The origin of a block in the lowered block-order: either an original CLIF
|
||||||
@@ -378,31 +385,28 @@ impl BlockLoweringOrder {
|
|||||||
|
|
||||||
postorder.reverse();
|
postorder.reverse();
|
||||||
let mut rpo = postorder;
|
let mut rpo = postorder;
|
||||||
|
|
||||||
// Step 3: sink any cold blocks to the end of the
|
|
||||||
// function. Put the "deferred last" block truly at the end;
|
|
||||||
// this is a correctness requirement (for fallthrough
|
|
||||||
// returns).
|
|
||||||
rpo.sort_by_key(|block| {
|
|
||||||
block
|
|
||||||
.0
|
|
||||||
.orig_block()
|
|
||||||
.map(|block| f.layout.is_cold(block))
|
|
||||||
.unwrap_or(false)
|
|
||||||
});
|
|
||||||
|
|
||||||
if let Some(d) = deferred_last {
|
if let Some(d) = deferred_last {
|
||||||
rpo.push(d);
|
rpo.push(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 4: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
|
// Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
|
||||||
let mut lowered_order = vec![];
|
let mut lowered_order = vec![];
|
||||||
|
let mut cold_blocks = FxHashSet::default();
|
||||||
let mut lowered_succ_ranges = vec![];
|
let mut lowered_succ_ranges = vec![];
|
||||||
let mut lb_to_bindex = FxHashMap::default();
|
let mut lb_to_bindex = FxHashMap::default();
|
||||||
for (block, succ_range) in rpo.into_iter() {
|
for (block, succ_range) in rpo.into_iter() {
|
||||||
lb_to_bindex.insert(block, lowered_order.len() as BlockIndex);
|
let index = lowered_order.len() as BlockIndex;
|
||||||
|
lb_to_bindex.insert(block, index);
|
||||||
lowered_order.push(block);
|
lowered_order.push(block);
|
||||||
lowered_succ_ranges.push(succ_range);
|
lowered_succ_ranges.push(succ_range);
|
||||||
|
|
||||||
|
if block
|
||||||
|
.orig_block()
|
||||||
|
.map(|b| f.layout.is_cold(b))
|
||||||
|
.unwrap_or(false)
|
||||||
|
{
|
||||||
|
cold_blocks.insert(index);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let lowered_succ_indices = lowered_succs
|
let lowered_succ_indices = lowered_succs
|
||||||
@@ -424,6 +428,7 @@ impl BlockLoweringOrder {
|
|||||||
lowered_succ_indices,
|
lowered_succ_indices,
|
||||||
lowered_succ_ranges,
|
lowered_succ_ranges,
|
||||||
orig_map,
|
orig_map,
|
||||||
|
cold_blocks,
|
||||||
};
|
};
|
||||||
log::trace!("BlockLoweringOrder: {:?}", result);
|
log::trace!("BlockLoweringOrder: {:?}", result);
|
||||||
result
|
result
|
||||||
@@ -439,6 +444,11 @@ impl BlockLoweringOrder {
|
|||||||
let range = self.lowered_succ_ranges[block as usize];
|
let range = self.lowered_succ_ranges[block as usize];
|
||||||
&self.lowered_succ_indices[range.0..range.1]
|
&self.lowered_succ_indices[range.0..range.1]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Determine whether the given lowered-block index is cold.
|
||||||
|
pub fn is_cold(&self, block: BlockIndex) -> bool {
|
||||||
|
self.cold_blocks.contains(&block)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@@ -470,11 +470,24 @@ impl<I: VCodeInst> VCode<I> {
|
|||||||
let mut inst_ends = vec![0; self.insts.len()];
|
let mut inst_ends = vec![0; self.insts.len()];
|
||||||
let mut label_insn_iix = vec![0; self.num_blocks()];
|
let mut label_insn_iix = vec![0; self.num_blocks()];
|
||||||
|
|
||||||
|
// Construct the final order we emit code in: cold blocks at the end.
|
||||||
|
let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
|
||||||
|
let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
|
||||||
|
for block in 0..self.num_blocks() {
|
||||||
|
let block = block as BlockIndex;
|
||||||
|
if self.block_order.is_cold(block) {
|
||||||
|
cold_blocks.push(block);
|
||||||
|
} else {
|
||||||
|
final_order.push(block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final_order.extend(cold_blocks);
|
||||||
|
|
||||||
|
// Emit blocks.
|
||||||
let mut safepoint_idx = 0;
|
let mut safepoint_idx = 0;
|
||||||
let mut cur_srcloc = None;
|
let mut cur_srcloc = None;
|
||||||
let mut last_offset = None;
|
let mut last_offset = None;
|
||||||
for block in 0..self.num_blocks() {
|
for block in final_order {
|
||||||
let block = block as BlockIndex;
|
|
||||||
let new_offset = I::align_basic_block(buffer.cur_offset());
|
let new_offset = I::align_basic_block(buffer.cur_offset());
|
||||||
while new_offset > buffer.cur_offset() {
|
while new_offset > buffer.cur_offset() {
|
||||||
// Pad with NOPs up to the aligned block offset.
|
// Pad with NOPs up to the aligned block offset.
|
||||||
|
|||||||
Reference in New Issue
Block a user