Reuse the DominatorTree postorder travesal in BlockLoweringOrder (#5843)
* Rework the blockorder module to reuse the dom tree's cfg postorder * Update domtree tests * Treat br_table with an empty jump table as multiple block exits * Bless tests * Change branch_idx to succ_idx and fix the comment
This commit is contained in:
@@ -140,7 +140,7 @@ impl Context {
|
||||
|
||||
self.optimize(isa)?;
|
||||
|
||||
isa.compile_function(&self.func, self.want_disasm)
|
||||
isa.compile_function(&self.func, &self.domtree, self.want_disasm)
|
||||
}
|
||||
|
||||
/// Optimize the function, performing all compilation steps up to
|
||||
|
||||
@@ -311,9 +311,17 @@ impl DominatorTree {
|
||||
self.nodes[block].rpo_number = SEEN;
|
||||
self.stack.push((Visit::Last, block));
|
||||
if let Some(inst) = func.stencil.layout.last_inst(block) {
|
||||
// Heuristic: chase the children in reverse. This puts the first
|
||||
// successor block first in the postorder, all other things being
|
||||
// equal, which tends to prioritize loop backedges over out-edges,
|
||||
// putting the edge-block closer to the loop body and minimizing
|
||||
// live-ranges in linear instruction space. This heuristic doesn't have
|
||||
// any effect on the computation of dominators, and is purely for other
|
||||
// consumers of the postorder we cache here.
|
||||
for block in func.stencil.dfg.insts[inst]
|
||||
.branch_destination(&func.stencil.dfg.jump_tables)
|
||||
.iter()
|
||||
.rev()
|
||||
{
|
||||
let succ = block.block(&func.stencil.dfg.value_lists);
|
||||
|
||||
@@ -641,7 +649,7 @@ mod tests {
|
||||
// return
|
||||
// } block2
|
||||
// } block0
|
||||
assert_eq!(dt.cfg_postorder(), &[trap_block, block2, block0]);
|
||||
assert_eq!(dt.cfg_postorder(), &[block2, trap_block, block0]);
|
||||
|
||||
let v2_def = cur.func.dfg.value_def(v2).unwrap_inst();
|
||||
assert!(!dt.dominates(v2_def, block0, &cur.func.layout));
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
//! ARM 64-bit Instruction Set Architecture.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::{Function, Type};
|
||||
use crate::isa::aarch64::settings as aarch64_settings;
|
||||
@@ -56,11 +57,12 @@ impl AArch64Backend {
|
||||
fn compile_vcode(
|
||||
&self,
|
||||
func: &Function,
|
||||
domtree: &DominatorTree,
|
||||
) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
|
||||
let emit_info = EmitInfo::new(self.flags.clone());
|
||||
let sigs = SigSet::new::<abi::AArch64MachineDeps>(func, &self.flags)?;
|
||||
let abi = abi::AArch64Callee::new(func, self, &self.isa_flags, &sigs)?;
|
||||
compile::compile::<AArch64Backend>(func, self, abi, emit_info, sigs)
|
||||
compile::compile::<AArch64Backend>(func, domtree, self, abi, emit_info, sigs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,9 +70,10 @@ impl TargetIsa for AArch64Backend {
|
||||
fn compile_function(
|
||||
&self,
|
||||
func: &Function,
|
||||
domtree: &DominatorTree,
|
||||
want_disasm: bool,
|
||||
) -> CodegenResult<CompiledCodeStencil> {
|
||||
let (vcode, regalloc_result) = self.compile_vcode(func)?;
|
||||
let (vcode, regalloc_result) = self.compile_vcode(func, domtree)?;
|
||||
|
||||
let emit_result = vcode.emit(
|
||||
®alloc_result,
|
||||
@@ -241,6 +244,8 @@ pub fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{AbiParam, Function, InstBuilder, JumpTableData, Signature, UserFuncName};
|
||||
use crate::isa::CallConv;
|
||||
@@ -275,7 +280,12 @@ mod test {
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
let domtree = DominatorTree::with_function(&func, &cfg);
|
||||
let buffer = backend
|
||||
.compile_function(&mut func, &domtree, false)
|
||||
.unwrap()
|
||||
.buffer;
|
||||
let code = buffer.data();
|
||||
|
||||
// To update this comment, write the golden bytes to a file, and run the following command
|
||||
@@ -328,8 +338,10 @@ mod test {
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
let domtree = DominatorTree::with_function(&func, &cfg);
|
||||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
.compile_function(&mut func, &domtree, /* want_disasm = */ false)
|
||||
.unwrap();
|
||||
let code = result.buffer.data();
|
||||
|
||||
@@ -340,21 +352,22 @@ mod test {
|
||||
// 0: 52824689 mov w9, #0x1234 // #4660
|
||||
// 4: 0b09000b add w11, w0, w9
|
||||
// 8: 2a0b03ea mov w10, w11
|
||||
// c: b50000aa cbnz x10, 0x20
|
||||
// 10: 5282468c mov w12, #0x1234 // #4660
|
||||
// 14: 0b0c016e add w14, w11, w12
|
||||
// 18: 2a0e03ed mov w13, w14
|
||||
// 1c: b5ffffad cbnz x13, 0x10
|
||||
// 20: 2a0b03e0 mov w0, w11
|
||||
// 24: b5ffff60 cbnz x0, 0x10
|
||||
// 28: 52824681 mov w1, #0x1234 // #4660
|
||||
// 2c: 4b010160 sub w0, w11, w1
|
||||
// 30: d65f03c0 ret
|
||||
// c: b40000ca cbz x10, 0x24
|
||||
// 10: 2a0b03ed mov w13, w11
|
||||
// 14: b500008d cbnz x13, 0x24
|
||||
// 18: 5282468e mov w14, #0x1234 // #4660
|
||||
// 1c: 4b0e0160 sub w0, w11, w14
|
||||
// 20: d65f03c0 ret
|
||||
// 24: 5282468f mov w15, #0x1234 // #4660
|
||||
// 28: 0b0f0161 add w1, w11, w15
|
||||
// 2c: 2a0103e0 mov w0, w1
|
||||
// 30: b5ffffa0 cbnz x0, 0x24
|
||||
// 34: 17fffff7 b 0x10
|
||||
|
||||
let golden = vec![
|
||||
137, 70, 130, 82, 11, 0, 9, 11, 234, 3, 11, 42, 170, 0, 0, 181, 140, 70, 130, 82, 110,
|
||||
1, 12, 11, 237, 3, 14, 42, 173, 255, 255, 181, 224, 3, 11, 42, 96, 255, 255, 181, 129,
|
||||
70, 130, 82, 96, 1, 1, 75, 192, 3, 95, 214,
|
||||
137, 70, 130, 82, 11, 0, 9, 11, 234, 3, 11, 42, 202, 0, 0, 180, 237, 3, 11, 42, 141, 0,
|
||||
0, 181, 142, 70, 130, 82, 96, 1, 14, 75, 192, 3, 95, 214, 143, 70, 130, 82, 97, 1, 15,
|
||||
11, 224, 3, 1, 42, 160, 255, 255, 181, 247, 255, 255, 23,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden[..]);
|
||||
@@ -409,8 +422,10 @@ mod test {
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
let domtree = DominatorTree::with_function(&func, &cfg);
|
||||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
.compile_function(&mut func, &domtree, /* want_disasm = */ false)
|
||||
.unwrap();
|
||||
let code = result.buffer.data();
|
||||
|
||||
@@ -419,7 +434,7 @@ mod test {
|
||||
// > aarch64-linux-gnu-objdump -b binary -D <file> -m aarch64
|
||||
//
|
||||
// 0: 7100081f cmp w0, #0x2
|
||||
// 4: 54000122 b.cs 0x28 // b.hs, b.nlast
|
||||
// 4: 540001a2 b.cs 0x38 // b.hs, b.nlast
|
||||
// 8: 9a8023e8 csel x8, xzr, x0, cs // cs = hs, nlast
|
||||
// c: d503229f csdb
|
||||
// 10: 10000087 adr x7, 0x20
|
||||
@@ -427,18 +442,18 @@ mod test {
|
||||
// 18: 8b0800e7 add x7, x7, x8
|
||||
// 1c: d61f00e0 br x7
|
||||
// 20: 00000010 udf #16
|
||||
// 24: 00000018 udf #24
|
||||
// 28: 52800060 mov w0, #0x3 // #3
|
||||
// 24: 00000008 udf #8
|
||||
// 28: 52800040 mov w0, #0x2 // #2
|
||||
// 2c: d65f03c0 ret
|
||||
// 30: 52800020 mov w0, #0x1 // #1
|
||||
// 34: d65f03c0 ret
|
||||
// 38: 52800040 mov w0, #0x2 // #2
|
||||
// 38: 52800060 mov w0, #0x3 // #3
|
||||
// 3c: d65f03c0 ret
|
||||
|
||||
let golden = vec![
|
||||
31, 8, 0, 113, 34, 1, 0, 84, 232, 35, 128, 154, 159, 34, 3, 213, 135, 0, 0, 16, 232,
|
||||
88, 168, 184, 231, 0, 8, 139, 224, 0, 31, 214, 16, 0, 0, 0, 24, 0, 0, 0, 96, 0, 128,
|
||||
82, 192, 3, 95, 214, 32, 0, 128, 82, 192, 3, 95, 214, 64, 0, 128, 82, 192, 3, 95, 214,
|
||||
31, 8, 0, 113, 162, 1, 0, 84, 232, 35, 128, 154, 159, 34, 3, 213, 135, 0, 0, 16, 232,
|
||||
88, 168, 184, 231, 0, 8, 139, 224, 0, 31, 214, 16, 0, 0, 0, 8, 0, 0, 0, 64, 0, 128, 82,
|
||||
192, 3, 95, 214, 32, 0, 128, 82, 192, 3, 95, 214, 96, 0, 128, 82, 192, 3, 95, 214,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden[..]);
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
//! The configured target ISA trait object is a `Box<TargetIsa>` which can be used for multiple
|
||||
//! concurrent function compilations.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
pub use crate::isa::call_conv::CallConv;
|
||||
|
||||
use crate::flowgraph;
|
||||
@@ -252,6 +253,7 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
|
||||
fn compile_function(
|
||||
&self,
|
||||
func: &Function,
|
||||
domtree: &DominatorTree,
|
||||
want_disasm: bool,
|
||||
) -> CodegenResult<CompiledCodeStencil>;
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
//! risc-v 64-bit Instruction Set Architecture.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::ir;
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::Function;
|
||||
@@ -56,11 +57,12 @@ impl Riscv64Backend {
|
||||
fn compile_vcode(
|
||||
&self,
|
||||
func: &Function,
|
||||
domtree: &DominatorTree,
|
||||
) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
|
||||
let emit_info = EmitInfo::new(self.flags.clone(), self.isa_flags.clone());
|
||||
let sigs = SigSet::new::<abi::Riscv64MachineDeps>(func, &self.flags)?;
|
||||
let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?;
|
||||
compile::compile::<Riscv64Backend>(func, self, abi, emit_info, sigs)
|
||||
compile::compile::<Riscv64Backend>(func, domtree, self, abi, emit_info, sigs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,9 +70,10 @@ impl TargetIsa for Riscv64Backend {
|
||||
fn compile_function(
|
||||
&self,
|
||||
func: &Function,
|
||||
domtree: &DominatorTree,
|
||||
want_disasm: bool,
|
||||
) -> CodegenResult<CompiledCodeStencil> {
|
||||
let (vcode, regalloc_result) = self.compile_vcode(func)?;
|
||||
let (vcode, regalloc_result) = self.compile_vcode(func, domtree)?;
|
||||
|
||||
let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug);
|
||||
let emit_result = vcode.emit(
|
||||
@@ -216,6 +219,8 @@ pub fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{AbiParam, Function, InstBuilder, Signature, UserFuncName};
|
||||
use crate::isa::CallConv;
|
||||
@@ -250,7 +255,9 @@ mod test {
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let buffer = backend.compile_function(&mut func, true).unwrap();
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
let domtree = DominatorTree::with_function(&func, &cfg);
|
||||
let buffer = backend.compile_function(&mut func, &domtree, true).unwrap();
|
||||
let code = buffer.buffer.data();
|
||||
|
||||
// To update this comment, write the golden bytes to a file, and run the following command
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
//! IBM Z 64-bit Instruction Set Architecture.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::{Function, Type};
|
||||
use crate::isa::s390x::settings as s390x_settings;
|
||||
@@ -56,11 +57,12 @@ impl S390xBackend {
|
||||
fn compile_vcode(
|
||||
&self,
|
||||
func: &Function,
|
||||
domtree: &DominatorTree,
|
||||
) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
|
||||
let emit_info = EmitInfo::new(self.isa_flags.clone());
|
||||
let sigs = SigSet::new::<abi::S390xMachineDeps>(func, &self.flags)?;
|
||||
let abi = abi::S390xCallee::new(func, self, &self.isa_flags, &sigs)?;
|
||||
compile::compile::<S390xBackend>(func, self, abi, emit_info, sigs)
|
||||
compile::compile::<S390xBackend>(func, domtree, self, abi, emit_info, sigs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,10 +70,11 @@ impl TargetIsa for S390xBackend {
|
||||
fn compile_function(
|
||||
&self,
|
||||
func: &Function,
|
||||
domtree: &DominatorTree,
|
||||
want_disasm: bool,
|
||||
) -> CodegenResult<CompiledCodeStencil> {
|
||||
let flags = self.flags();
|
||||
let (vcode, regalloc_result) = self.compile_vcode(func)?;
|
||||
let (vcode, regalloc_result) = self.compile_vcode(func, domtree)?;
|
||||
|
||||
let emit_result = vcode.emit(®alloc_result, want_disasm, flags.machine_code_cfg_info());
|
||||
let frame_size = emit_result.frame_size;
|
||||
@@ -213,6 +216,8 @@ pub fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::UserFuncName;
|
||||
use crate::ir::{AbiParam, Function, InstBuilder, Signature};
|
||||
@@ -248,8 +253,10 @@ mod test {
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
let domtree = DominatorTree::with_function(&func, &cfg);
|
||||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
.compile_function(&mut func, &domtree, /* want_disasm = */ false)
|
||||
.unwrap();
|
||||
let code = result.buffer.data();
|
||||
|
||||
@@ -297,8 +304,10 @@ mod test {
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
let domtree = DominatorTree::with_function(&func, &cfg);
|
||||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
.compile_function(&mut func, &domtree, /* want_disasm = */ false)
|
||||
.unwrap();
|
||||
let code = result.buffer.data();
|
||||
|
||||
@@ -310,19 +319,20 @@ mod test {
|
||||
//
|
||||
// 0: a7 2a 12 34 ahi %r2,4660
|
||||
// 4: a7 2e 00 00 chi %r2,0
|
||||
// 8: c0 64 00 00 00 0b jglh 0x1e
|
||||
// e: ec 32 12 34 00 d8 ahik %r3,%r2,4660
|
||||
// 14: a7 3e 00 00 chi %r3,0
|
||||
// 18: c0 64 ff ff ff fb jglh 0xe
|
||||
// 1e: a7 2e 00 00 chi %r2,0
|
||||
// 22: c0 64 ff ff ff f6 jglh 0xe
|
||||
// 28: a7 2a ed cc ahi %r2,-4660
|
||||
// 2c: 07 fe br %r14
|
||||
// 8: c0 94 00 00 00 0b jgnlh 0x1e
|
||||
// e: a7 2e 00 00 chi %r2,0
|
||||
// 12: c0 64 00 00 00 06 jglh 0x1e
|
||||
// 18: a7 2a ed cc ahi %r2,-4660
|
||||
// 1c: 07 fe br %r14
|
||||
// 1e: ec 32 12 34 00 d8 ahik %r3,%r2,4660
|
||||
// 24: a7 3e 00 00 chi %r3,0
|
||||
// 28: c0 64 ff ff ff fb jglh 0x1e
|
||||
// 2e: c0 f4 ff ff ff f0 jg 0xe
|
||||
|
||||
let golden = vec![
|
||||
167, 42, 18, 52, 167, 46, 0, 0, 192, 100, 0, 0, 0, 11, 236, 50, 18, 52, 0, 216, 167,
|
||||
62, 0, 0, 192, 100, 255, 255, 255, 251, 167, 46, 0, 0, 192, 100, 255, 255, 255, 246,
|
||||
167, 42, 237, 204, 7, 254,
|
||||
167, 42, 18, 52, 167, 46, 0, 0, 192, 148, 0, 0, 0, 11, 167, 46, 0, 0, 192, 100, 0, 0,
|
||||
0, 6, 167, 42, 237, 204, 7, 254, 236, 50, 18, 52, 0, 216, 167, 62, 0, 0, 192, 100, 255,
|
||||
255, 255, 251, 192, 244, 255, 255, 255, 240,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden[..]);
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
pub use self::inst::{args, EmitInfo, EmitState, Inst};
|
||||
|
||||
use super::{OwnedTargetIsa, TargetIsa};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::ir::{condcodes::IntCC, Function, Type};
|
||||
#[cfg(feature = "unwind")]
|
||||
use crate::isa::unwind::systemv;
|
||||
@@ -48,13 +49,14 @@ impl X64Backend {
|
||||
fn compile_vcode(
|
||||
&self,
|
||||
func: &Function,
|
||||
domtree: &DominatorTree,
|
||||
) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
|
||||
// This performs lowering to VCode, register-allocates the code, computes
|
||||
// block layout and finalizes branches. The result is ready for binary emission.
|
||||
let emit_info = EmitInfo::new(self.flags.clone(), self.x64_flags.clone());
|
||||
let sigs = SigSet::new::<abi::X64ABIMachineSpec>(func, &self.flags)?;
|
||||
let abi = abi::X64Callee::new(&func, self, &self.x64_flags, &sigs)?;
|
||||
compile::compile::<Self>(&func, self, abi, emit_info, sigs)
|
||||
let abi = abi::X64Callee::new(func, self, &self.x64_flags, &sigs)?;
|
||||
compile::compile::<Self>(func, domtree, self, abi, emit_info, sigs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,9 +64,10 @@ impl TargetIsa for X64Backend {
|
||||
fn compile_function(
|
||||
&self,
|
||||
func: &Function,
|
||||
domtree: &DominatorTree,
|
||||
want_disasm: bool,
|
||||
) -> CodegenResult<CompiledCodeStencil> {
|
||||
let (vcode, regalloc_result) = self.compile_vcode(func)?;
|
||||
let (vcode, regalloc_result) = self.compile_vcode(func, domtree)?;
|
||||
|
||||
let emit_result = vcode.emit(
|
||||
®alloc_result,
|
||||
@@ -231,6 +234,8 @@ fn isa_constructor(
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::{types::*, RelSourceLoc, SourceLoc, UserFuncName, ValueLabel, ValueLabelStart};
|
||||
use crate::ir::{AbiParam, Function, InstBuilder, JumpTableData, Signature};
|
||||
use crate::isa::CallConv;
|
||||
@@ -341,8 +346,10 @@ mod test {
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
let domtree = DominatorTree::with_function(&func, &cfg);
|
||||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
.compile_function(&mut func, &domtree, /* want_disasm = */ false)
|
||||
.unwrap();
|
||||
let code = result.buffer.data();
|
||||
|
||||
@@ -355,27 +362,28 @@ mod test {
|
||||
// 4: 48 89 fe mov rsi,rdi
|
||||
// 7: 81 c6 34 12 00 00 add esi,0x1234
|
||||
// d: 85 f6 test esi,esi
|
||||
// f: 0f 84 1c 00 00 00 je 0x31
|
||||
// f: 0f 84 21 00 00 00 je 0x36
|
||||
// 15: 49 89 f0 mov r8,rsi
|
||||
// 18: 48 89 f0 mov rax,rsi
|
||||
// 1b: 81 e8 34 12 00 00 sub eax,0x1234
|
||||
// 21: 44 01 c0 add eax,r8d
|
||||
// 24: 85 f6 test esi,esi
|
||||
// 26: 0f 85 05 00 00 00 jne 0x31
|
||||
// 26: 0f 85 0a 00 00 00 jne 0x36
|
||||
// 2c: 48 89 ec mov rsp,rbp
|
||||
// 2f: 5d pop rbp
|
||||
// 30: c3 ret
|
||||
// 31: 49 89 f0 mov r8,rsi
|
||||
// 34: 41 81 c0 34 12 00 00 add r8d,0x1234
|
||||
// 3b: 45 85 c0 test r8d,r8d
|
||||
// 3e: 0f 85 ed ff ff ff jne 0x31
|
||||
// 44: e9 cf ff ff ff jmp 0x18
|
||||
// 31: e9 e2 ff ff ff jmp 0x18
|
||||
// 36: 49 89 f0 mov r8,rsi
|
||||
// 39: 41 81 c0 34 12 00 00 add r8d,0x1234
|
||||
// 40: 45 85 c0 test r8d,r8d
|
||||
// 43: 0f 84 cf ff ff ff je 0x18
|
||||
// 49: e9 e8 ff ff ff jmp 0x36
|
||||
|
||||
let golden = vec![
|
||||
85, 72, 137, 229, 72, 137, 254, 129, 198, 52, 18, 0, 0, 133, 246, 15, 132, 28, 0, 0, 0,
|
||||
73, 137, 240, 72, 137, 240, 129, 232, 52, 18, 0, 0, 68, 1, 192, 133, 246, 15, 133, 5,
|
||||
0, 0, 0, 72, 137, 236, 93, 195, 73, 137, 240, 65, 129, 192, 52, 18, 0, 0, 69, 133, 192,
|
||||
15, 133, 237, 255, 255, 255, 233, 207, 255, 255, 255,
|
||||
85, 72, 137, 229, 72, 137, 254, 129, 198, 52, 18, 0, 0, 133, 246, 15, 132, 33, 0, 0, 0,
|
||||
73, 137, 240, 72, 137, 240, 129, 232, 52, 18, 0, 0, 68, 1, 192, 133, 246, 15, 133, 10,
|
||||
0, 0, 0, 72, 137, 236, 93, 195, 233, 226, 255, 255, 255, 73, 137, 240, 65, 129, 192,
|
||||
52, 18, 0, 0, 69, 133, 192, 15, 132, 207, 255, 255, 255, 233, 232, 255, 255, 255,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden[..]);
|
||||
@@ -450,8 +458,10 @@ mod test {
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
let domtree = DominatorTree::with_function(&func, &cfg);
|
||||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
.compile_function(&mut func, &domtree, /* want_disasm = */ false)
|
||||
.unwrap();
|
||||
let code = result.buffer.data();
|
||||
|
||||
@@ -462,7 +472,7 @@ mod test {
|
||||
// 0: 55 push rbp
|
||||
// 1: 48 89 e5 mov rbp,rsp
|
||||
// 4: 83 ff 02 cmp edi,0x2
|
||||
// 7: 0f 83 27 00 00 00 jae 0x34
|
||||
// 7: 0f 83 3b 00 00 00 jae 0x48
|
||||
// d: 44 8b d7 mov r10d,edi
|
||||
// 10: 41 b9 00 00 00 00 mov r9d,0x0
|
||||
// 16: 4d 0f 43 d1 cmovae r10,r9
|
||||
@@ -472,9 +482,9 @@ mod test {
|
||||
// 29: 41 ff e1 jmp r9
|
||||
// 2c: 12 00 adc al,BYTE PTR [rax]
|
||||
// 2e: 00 00 add BYTE PTR [rax],al
|
||||
// 30: 1c 00 sbb al,0x0
|
||||
// 30: 08 00 or BYTE PTR [rax],al
|
||||
// 32: 00 00 add BYTE PTR [rax],al
|
||||
// 34: b8 03 00 00 00 mov eax,0x3
|
||||
// 34: b8 02 00 00 00 mov eax,0x2
|
||||
// 39: 48 89 ec mov rsp,rbp
|
||||
// 3c: 5d pop rbp
|
||||
// 3d: c3 ret
|
||||
@@ -482,16 +492,16 @@ mod test {
|
||||
// 43: 48 89 ec mov rsp,rbp
|
||||
// 46: 5d pop rbp
|
||||
// 47: c3 ret
|
||||
// 48: b8 02 00 00 00 mov eax,0x2
|
||||
// 48: b8 03 00 00 00 mov eax,0x3
|
||||
// 4d: 48 89 ec mov rsp,rbp
|
||||
// 50: 5d pop rbp
|
||||
// 51: c3 ret
|
||||
|
||||
let golden = vec![
|
||||
85, 72, 137, 229, 131, 255, 2, 15, 131, 39, 0, 0, 0, 68, 139, 215, 65, 185, 0, 0, 0, 0,
|
||||
85, 72, 137, 229, 131, 255, 2, 15, 131, 59, 0, 0, 0, 68, 139, 215, 65, 185, 0, 0, 0, 0,
|
||||
77, 15, 67, 209, 76, 141, 13, 11, 0, 0, 0, 79, 99, 84, 145, 0, 77, 1, 209, 65, 255,
|
||||
225, 18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0,
|
||||
72, 137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195,
|
||||
225, 18, 0, 0, 0, 8, 0, 0, 0, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0,
|
||||
72, 137, 236, 93, 195, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden[..]);
|
||||
|
||||
@@ -417,25 +417,23 @@ mod tests {
|
||||
}
|
||||
|
||||
let mut loop_analysis = LoopAnalysis::new();
|
||||
let mut cfg = ControlFlowGraph::new();
|
||||
let mut domtree = DominatorTree::new();
|
||||
cfg.compute(&func);
|
||||
domtree.compute(&func, &cfg);
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
let domtree = DominatorTree::with_function(&func, &cfg);
|
||||
loop_analysis.compute(&func, &cfg, &domtree);
|
||||
|
||||
let loops = loop_analysis.loops().collect::<Vec<Loop>>();
|
||||
assert_eq!(loops.len(), 3);
|
||||
assert_eq!(loop_analysis.loop_header(loops[0]), block0);
|
||||
assert_eq!(loop_analysis.loop_header(loops[1]), block1);
|
||||
assert_eq!(loop_analysis.loop_header(loops[2]), block3);
|
||||
assert_eq!(loop_analysis.loop_header(loops[1]), block3);
|
||||
assert_eq!(loop_analysis.loop_header(loops[2]), block1);
|
||||
assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
|
||||
assert_eq!(loop_analysis.loop_parent(loops[2]), Some(loops[0]));
|
||||
assert_eq!(loop_analysis.loop_parent(loops[0]), None);
|
||||
assert_eq!(loop_analysis.is_in_loop(block0, loops[0]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block1, loops[1]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block2, loops[1]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block3, loops[2]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block4, loops[2]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block1, loops[2]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block2, loops[2]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block3, loops[1]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block4, loops[1]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(block5, loops[0]), true);
|
||||
assert_eq!(loop_analysis.loop_level(block0).level(), 1);
|
||||
assert_eq!(loop_analysis.loop_level(block1).level(), 2);
|
||||
|
||||
@@ -34,27 +34,18 @@
|
||||
//! +--------------+
|
||||
//! / \
|
||||
//! +--------------+ +--------------+
|
||||
//! | (edge 0->1) | |(edge 0->2) |
|
||||
//! | (edge 0->1) | | (edge 0->2) |
|
||||
//! | CLIF block 1 | | CLIF block 2 |
|
||||
//! | (edge 1->3) | | (edge 2->3) |
|
||||
//! +--------------+ +--------------+
|
||||
//! \ /
|
||||
//! +-----------+ +-----------+
|
||||
//! |(edge 1->3)| |(edge 2->3)|
|
||||
//! +-----------+ +-----------+
|
||||
//! \ /
|
||||
//! \ /
|
||||
//! \ /
|
||||
//! +------------+
|
||||
//! |CLIF block 3|
|
||||
//! +------------+
|
||||
//! ```
|
||||
//!
|
||||
//! (note that the edges into CLIF blocks 1 and 2 could be merged with those
|
||||
//! blocks' original bodies, but the out-edges could not because for simplicity
|
||||
//! in the successor-function definition, we only ever merge an edge onto one
|
||||
//! side of an original CLIF block.)
|
||||
//!
|
||||
//! Each `LoweredBlock` names just an original CLIF block, an original CLIF
|
||||
//! block prepended or appended with an edge block (never both, though), or just
|
||||
//! an edge block.
|
||||
//! Each `LoweredBlock` names just an original CLIF block, or just an edge block.
|
||||
//!
|
||||
//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
|
||||
//! (never actually materialized, just defined by a "successors" function), and
|
||||
@@ -69,6 +60,7 @@
|
||||
//! branch editing that in practice elides empty blocks and simplifies some of
|
||||
//! the other redundancies that this scheme produces.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::fx::{FxHashMap, FxHashSet};
|
||||
use crate::inst_predicates::visit_block_succs;
|
||||
@@ -84,21 +76,11 @@ pub struct BlockLoweringOrder {
|
||||
/// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
|
||||
/// see [LoweredBlock] for details.
|
||||
lowered_order: Vec<LoweredBlock>,
|
||||
/// Successors for all lowered blocks, in one serialized vector. Indexed by
|
||||
/// the ranges in `lowered_succ_ranges`.
|
||||
#[allow(dead_code)]
|
||||
lowered_succs: Vec<(Inst, LoweredBlock)>,
|
||||
/// BlockIndex values for successors for all lowered blocks, in the same
|
||||
/// order as `lowered_succs`.
|
||||
lowered_succ_indices: Vec<(Inst, BlockIndex)>,
|
||||
/// Ranges in `lowered_succs` giving the successor lists for each lowered
|
||||
/// BlockIndex values for successors for all lowered blocks, indexing `lowered_order`.
|
||||
lowered_succ_indices: Vec<BlockIndex>,
|
||||
/// Ranges in `lowered_succ_indices` giving the successor lists for each lowered
|
||||
/// block. Indexed by lowering-order index (`BlockIndex`).
|
||||
lowered_succ_ranges: Vec<(usize, usize)>,
|
||||
/// Mapping from CLIF BB to BlockIndex (index in lowered order). Note that
|
||||
/// some CLIF BBs may not be lowered; in particular, we skip unreachable
|
||||
/// blocks.
|
||||
#[allow(dead_code)]
|
||||
orig_map: SecondaryMap<Block, Option<BlockIndex>>,
|
||||
lowered_succ_ranges: Vec<(Option<Inst>, std::ops::Range<usize>)>,
|
||||
/// Cold blocks. These blocks are not reordered in the
|
||||
/// `lowered_order` above; the lowered order must respect RPO
|
||||
/// (uses after defs) in order for lowering to be
|
||||
@@ -110,390 +92,198 @@ pub struct BlockLoweringOrder {
|
||||
indirect_branch_targets: FxHashSet<BlockIndex>,
|
||||
}
|
||||
|
||||
/// The origin of a block in the lowered block-order: either an original CLIF
|
||||
/// block, or an inserted edge-block, or a combination of the two if an edge is
|
||||
/// non-critical.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum LoweredBlock {
|
||||
/// Block in original CLIF, with no merged edge-blocks.
|
||||
/// Block in original CLIF.
|
||||
Orig {
|
||||
/// Original CLIF block.
|
||||
block: Block,
|
||||
},
|
||||
/// Block in the original CLIF, plus edge-block to one succ (which is the
|
||||
/// one successor of the original block).
|
||||
OrigAndEdge {
|
||||
/// The original CLIF block contained in this lowered block.
|
||||
block: Block,
|
||||
/// The edge (jump) instruction transitioning from this block
|
||||
/// to the next, i.e., corresponding to the included edge-block. This
|
||||
/// will be an instruction in `block`.
|
||||
edge_inst: Inst,
|
||||
/// The successor index in this edge, to distinguish multiple
|
||||
/// edges between the same block pair.
|
||||
succ_idx: usize,
|
||||
/// The successor CLIF block.
|
||||
succ: Block,
|
||||
},
|
||||
/// Block in the original CLIF, preceded by edge-block from one pred (which
|
||||
/// is the one pred of the original block).
|
||||
EdgeAndOrig {
|
||||
/// The previous CLIF block, i.e., the edge block's predecessor.
|
||||
|
||||
/// Critical edge between two CLIF blocks.
|
||||
CriticalEdge {
|
||||
/// The predecessor block.
|
||||
pred: Block,
|
||||
/// The edge (jump) instruction corresponding to the included
|
||||
/// edge-block. This will be an instruction in `pred`.
|
||||
edge_inst: Inst,
|
||||
/// The successor index in this edge, to distinguish multiple
|
||||
/// edges between the same block pair.
|
||||
succ_idx: usize,
|
||||
/// The original CLIF block included in this lowered block.
|
||||
block: Block,
|
||||
},
|
||||
/// Split critical edge between two CLIF blocks. This lowered block does not
|
||||
/// correspond to any original CLIF blocks; it only serves as an insertion
|
||||
/// point for work to happen on the transition from `pred` to `succ`.
|
||||
Edge {
|
||||
/// The predecessor CLIF block.
|
||||
pred: Block,
|
||||
/// The edge (jump) instruction corresponding to this edge's transition.
|
||||
/// This will be an instruction in `pred`.
|
||||
edge_inst: Inst,
|
||||
/// The successor index in this edge, to distinguish multiple
|
||||
/// edges between the same block pair.
|
||||
succ_idx: usize,
|
||||
/// The successor CLIF block.
|
||||
|
||||
/// The successor block.
|
||||
succ: Block,
|
||||
|
||||
/// The index of this branch in the successor edges from `pred`, following the same
|
||||
/// indexing order as `inst_predicates::visit_block_succs`. This is used to distinguish
|
||||
/// multiple edges between the same CLIF blocks.
|
||||
succ_idx: u32,
|
||||
},
|
||||
}
|
||||
|
||||
impl LoweredBlock {
|
||||
/// The associated original (CLIF) block included in this lowered block, if
|
||||
/// any.
|
||||
pub fn orig_block(self) -> Option<Block> {
|
||||
/// Unwrap an `Orig` block.
|
||||
pub fn orig_block(&self) -> Option<Block> {
|
||||
match self {
|
||||
LoweredBlock::Orig { block, .. }
|
||||
| LoweredBlock::OrigAndEdge { block, .. }
|
||||
| LoweredBlock::EdgeAndOrig { block, .. } => Some(block),
|
||||
LoweredBlock::Edge { .. } => None,
|
||||
&LoweredBlock::Orig { block } => Some(block),
|
||||
&LoweredBlock::CriticalEdge { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// The associated in-edge, if any.
|
||||
/// The associated in-edge predecessor, if this is a critical edge.
|
||||
#[cfg(test)]
|
||||
pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
|
||||
pub fn in_edge(&self) -> Option<Block> {
|
||||
match self {
|
||||
LoweredBlock::EdgeAndOrig {
|
||||
pred,
|
||||
edge_inst,
|
||||
block,
|
||||
..
|
||||
} => Some((pred, edge_inst, block)),
|
||||
_ => None,
|
||||
&LoweredBlock::CriticalEdge { pred, .. } => Some(pred),
|
||||
&LoweredBlock::Orig { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// the associated out-edge, if any. Also includes edge-only blocks.
|
||||
/// The associated out-edge successor, if this is a critical edge.
|
||||
#[cfg(test)]
|
||||
pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
|
||||
pub fn out_edge(&self) -> Option<Block> {
|
||||
match self {
|
||||
LoweredBlock::OrigAndEdge {
|
||||
block,
|
||||
edge_inst,
|
||||
succ,
|
||||
..
|
||||
} => Some((block, edge_inst, succ)),
|
||||
LoweredBlock::Edge {
|
||||
pred,
|
||||
edge_inst,
|
||||
succ,
|
||||
..
|
||||
} => Some((pred, edge_inst, succ)),
|
||||
_ => None,
|
||||
&LoweredBlock::CriticalEdge { succ, .. } => Some(succ),
|
||||
&LoweredBlock::Orig { .. } => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BlockLoweringOrder {
|
||||
/// Compute and return a lowered block order for `f`.
|
||||
pub fn new(f: &Function) -> BlockLoweringOrder {
|
||||
pub fn new(f: &Function, domtree: &DominatorTree) -> BlockLoweringOrder {
|
||||
trace!("BlockLoweringOrder: function body {:?}", f);
|
||||
|
||||
// Make sure that we have an entry block, and the entry block is
|
||||
// not marked as cold. (The verifier ensures this as well, but
|
||||
// the user may not have run the verifier, and this property is
|
||||
// critical to avoid a miscompile, so we assert it here too.)
|
||||
let entry = f.layout.entry_block().expect("Must have entry block");
|
||||
assert!(!f.layout.is_cold(entry));
|
||||
|
||||
// Step 1: compute the in-edge and out-edge count of every block.
|
||||
let mut block_in_count = SecondaryMap::with_default(0);
|
||||
let mut block_out_count = SecondaryMap::with_default(0);
|
||||
|
||||
// Cache the block successors to avoid re-examining branches below.
|
||||
let mut block_succs: SmallVec<[(Inst, usize, Block); 128]> = SmallVec::new();
|
||||
let mut block_succ_range = SecondaryMap::with_default((0, 0));
|
||||
// Block successors are stored as `LoweredBlocks` to simplify the construction of
|
||||
// `lowered_succs` in the final result. Initially, all entries are `Orig` values, and are
|
||||
// updated to be `CriticalEdge` when those cases are identified in step 2 below.
|
||||
let mut block_succs: SmallVec<[LoweredBlock; 128]> = SmallVec::new();
|
||||
let mut block_succ_range = SecondaryMap::with_default(0..0);
|
||||
|
||||
let mut indirect_branch_target_clif_blocks = FxHashSet::default();
|
||||
|
||||
for block in f.layout.blocks() {
|
||||
let block_succ_start = block_succs.len();
|
||||
let mut succ_idx = 0;
|
||||
visit_block_succs(f, block, |inst, succ, from_table| {
|
||||
let start = block_succs.len();
|
||||
visit_block_succs(f, block, |_, succ, from_table| {
|
||||
block_out_count[block] += 1;
|
||||
block_in_count[succ] += 1;
|
||||
block_succs.push((inst, succ_idx, succ));
|
||||
succ_idx += 1;
|
||||
block_succs.push(LoweredBlock::Orig { block: succ });
|
||||
|
||||
if from_table {
|
||||
indirect_branch_target_clif_blocks.insert(succ);
|
||||
}
|
||||
});
|
||||
let block_succ_end = block_succs.len();
|
||||
block_succ_range[block] = (block_succ_start, block_succ_end);
|
||||
|
||||
// Ensure that blocks terminated by br_table instructions with an empty jump table are
|
||||
// still treated like conditional blocks from the point of view of critical edge
|
||||
// splitting.
|
||||
if let Some(inst) = f.layout.last_inst(block) {
|
||||
if f.dfg.insts[inst].opcode() == Opcode::Return {
|
||||
// Implicit output edge for any return.
|
||||
block_out_count[block] += 1;
|
||||
if Opcode::BrTable == f.dfg.insts[inst].opcode() {
|
||||
block_out_count[block] = block_out_count[block].max(2);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Implicit input edge for entry block.
|
||||
block_in_count[entry] += 1;
|
||||
|
||||
// All blocks ending in conditional branches or br_tables must
|
||||
// have edge-moves inserted at the top of successor blocks,
|
||||
// not at the end of themselves. This is because the moves
|
||||
// would have to be inserted prior to the branch's register
|
||||
// use; but RA2's model is that the moves happen *on* the
|
||||
// edge, after every def/use in the block. RA2 will check for
|
||||
// "branch register use safety" and panic if such a problem
|
||||
// occurs. To avoid this, we force the below algorithm to
|
||||
// never merge the edge block onto the end of a block that
|
||||
// ends in a conditional branch. We do this by "faking" more
|
||||
// than one successor, even if there is only one.
|
||||
//
|
||||
// (One might ask, isn't that always the case already? It
|
||||
// could not be, in cases of br_table with no table and just a
|
||||
// default label, for example.)
|
||||
for block in f.layout.blocks() {
|
||||
if let Some(inst) = f.layout.last_inst(block) {
|
||||
// If the block has a branch with any "fixed args"
|
||||
// (not blockparam args) ...
|
||||
if f.dfg.insts[inst].opcode().is_branch() && f.dfg.inst_fixed_args(inst).len() > 0 {
|
||||
// ... then force a minimum successor count of
|
||||
// two, so the below algorithm cannot put
|
||||
// edge-moves on the end of the block.
|
||||
block_out_count[block] = std::cmp::max(2, block_out_count[block]);
|
||||
}
|
||||
}
|
||||
let end = block_succs.len();
|
||||
block_succ_range[block] = start..end;
|
||||
}
|
||||
|
||||
// Here we define the implicit CLIF-plus-edges graph. There are
|
||||
// conceptually two such graphs: the original, with every edge explicit,
|
||||
// and the merged one, with blocks (represented by `LoweredBlock`
|
||||
// values) that contain original CLIF blocks, edges, or both. This
|
||||
// function returns a lowered block's successors as per the latter, with
|
||||
// consideration to edge-block merging.
|
||||
//
|
||||
// Note that there is a property of the block-merging rules below
|
||||
// that is very important to ensure we don't miss any lowered blocks:
|
||||
// any block in the implicit CLIF-plus-edges graph will *only* be
|
||||
// included in one block in the merged graph.
|
||||
//
|
||||
// This, combined with the property that every edge block is reachable
|
||||
// only from one predecessor (and hence cannot be reached by a DFS
|
||||
// backedge), means that it is sufficient in our DFS below to track
|
||||
// visited-bits per original CLIF block only, not per edge. This greatly
|
||||
// simplifies the data structures (no need to keep a sparse hash-set of
|
||||
// (block, block) tuples).
|
||||
let compute_lowered_succs = |ret: &mut Vec<(Inst, LoweredBlock)>, block: LoweredBlock| {
|
||||
let start_idx = ret.len();
|
||||
match block {
|
||||
LoweredBlock::Orig { block } | LoweredBlock::EdgeAndOrig { block, .. } => {
|
||||
// At an orig block; successors are always edge blocks,
|
||||
// possibly with orig blocks following.
|
||||
let range = block_succ_range[block];
|
||||
for &(edge_inst, succ_idx, succ) in &block_succs[range.0..range.1] {
|
||||
if block_in_count[succ] == 1 {
|
||||
ret.push((
|
||||
edge_inst,
|
||||
LoweredBlock::EdgeAndOrig {
|
||||
pred: block,
|
||||
edge_inst,
|
||||
succ_idx,
|
||||
block: succ,
|
||||
},
|
||||
));
|
||||
} else {
|
||||
ret.push((
|
||||
edge_inst,
|
||||
LoweredBlock::Edge {
|
||||
pred: block,
|
||||
edge_inst,
|
||||
succ_idx,
|
||||
succ,
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
LoweredBlock::Edge {
|
||||
succ, edge_inst, ..
|
||||
}
|
||||
| LoweredBlock::OrigAndEdge {
|
||||
succ, edge_inst, ..
|
||||
} => {
|
||||
// At an edge block; successors are always orig blocks,
|
||||
// possibly with edge blocks following.
|
||||
if block_out_count[succ] == 1 {
|
||||
let range = block_succ_range[succ];
|
||||
// check if the one succ is a real CFG edge (vs.
|
||||
// implicit return succ).
|
||||
if range.1 - range.0 > 0 {
|
||||
debug_assert!(range.1 - range.0 == 1);
|
||||
let (succ_edge_inst, succ_succ_idx, succ_succ) = block_succs[range.0];
|
||||
ret.push((
|
||||
edge_inst,
|
||||
LoweredBlock::OrigAndEdge {
|
||||
block: succ,
|
||||
edge_inst: succ_edge_inst,
|
||||
succ_idx: succ_succ_idx,
|
||||
succ: succ_succ,
|
||||
},
|
||||
));
|
||||
} else {
|
||||
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
|
||||
}
|
||||
} else {
|
||||
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
|
||||
}
|
||||
}
|
||||
}
|
||||
let end_idx = ret.len();
|
||||
(start_idx, end_idx)
|
||||
};
|
||||
// Step 2: walk the postorder from the domtree in reverse to produce our desired node
|
||||
// lowering order, identifying critical edges to split along the way.
|
||||
|
||||
// Build the explicit LoweredBlock-to-LoweredBlock successors list.
|
||||
let mut lowered_succs = vec![];
|
||||
let mut lowered_succ_indices = vec![];
|
||||
|
||||
// Step 2: Compute RPO traversal of the implicit CLIF-plus-edge-block graph. Use an
|
||||
// explicit stack so we don't overflow the real stack with a deep DFS.
|
||||
#[derive(Debug)]
|
||||
struct StackEntry {
|
||||
this: LoweredBlock,
|
||||
succs: (usize, usize), // range in lowered_succs
|
||||
cur_succ: usize, // index in lowered_succs
|
||||
}
|
||||
|
||||
let mut stack: SmallVec<[StackEntry; 16]> = SmallVec::new();
|
||||
let mut visited = FxHashSet::default();
|
||||
let mut postorder = vec![];
|
||||
|
||||
// Add the entry block.
|
||||
//
|
||||
// FIXME(cfallin): we might be able to use OrigAndEdge. Find a
|
||||
// way to not special-case the entry block here.
|
||||
let block = LoweredBlock::Orig { block: entry };
|
||||
visited.insert(block);
|
||||
let range = compute_lowered_succs(&mut lowered_succs, block);
|
||||
lowered_succ_indices.resize(lowered_succs.len(), 0);
|
||||
stack.push(StackEntry {
|
||||
this: block,
|
||||
succs: range,
|
||||
cur_succ: range.1,
|
||||
});
|
||||
|
||||
while !stack.is_empty() {
|
||||
let stack_entry = stack.last_mut().unwrap();
|
||||
let range = stack_entry.succs;
|
||||
if stack_entry.cur_succ == range.0 {
|
||||
postorder.push((stack_entry.this, range));
|
||||
stack.pop();
|
||||
} else {
|
||||
// Heuristic: chase the children in reverse. This puts the first
|
||||
// successor block first in RPO, all other things being equal,
|
||||
// which tends to prioritize loop backedges over out-edges,
|
||||
// putting the edge-block closer to the loop body and minimizing
|
||||
// live-ranges in linear instruction space.
|
||||
let next = lowered_succs[stack_entry.cur_succ - 1].1;
|
||||
stack_entry.cur_succ -= 1;
|
||||
if visited.contains(&next) {
|
||||
continue;
|
||||
}
|
||||
visited.insert(next);
|
||||
let range = compute_lowered_succs(&mut lowered_succs, next);
|
||||
lowered_succ_indices.resize(lowered_succs.len(), 0);
|
||||
stack.push(StackEntry {
|
||||
this: next,
|
||||
succs: range,
|
||||
cur_succ: range.1,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
postorder.reverse();
|
||||
let rpo = postorder;
|
||||
|
||||
// Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
|
||||
let mut lowered_order = vec![];
|
||||
let mut cold_blocks = FxHashSet::default();
|
||||
let mut lowered_succ_ranges = vec![];
|
||||
let mut lb_to_bindex = FxHashMap::default();
|
||||
let mut lowered_order = Vec::new();
|
||||
|
||||
for &block in domtree.cfg_postorder().iter().rev() {
|
||||
let lb = LoweredBlock::Orig { block };
|
||||
let bindex = BlockIndex::new(lowered_order.len());
|
||||
lb_to_bindex.insert(lb.clone(), bindex);
|
||||
lowered_order.push(lb);
|
||||
|
||||
if block_out_count[block] > 1 {
|
||||
let range = block_succ_range[block].clone();
|
||||
for (succ_ix, lb) in block_succs[range].iter_mut().enumerate() {
|
||||
let succ = lb.orig_block().unwrap();
|
||||
if block_in_count[succ] > 1 {
|
||||
// Mutate the successor to be a critical edge, as `block` has multiple
|
||||
// edges leaving it, and `succ` has multiple edges entering it.
|
||||
*lb = LoweredBlock::CriticalEdge {
|
||||
pred: block,
|
||||
succ,
|
||||
succ_idx: succ_ix as u32,
|
||||
};
|
||||
let bindex = BlockIndex::new(lowered_order.len());
|
||||
lb_to_bindex.insert(*lb, bindex);
|
||||
lowered_order.push(*lb);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: build the successor tables given the lowering order. We can't perform this step
|
||||
// during the creation of `lowering_order`, as we need `lb_to_bindex` to be fully populated
|
||||
// first.
|
||||
let mut lowered_succ_indices = Vec::new();
|
||||
let mut cold_blocks = FxHashSet::default();
|
||||
let mut indirect_branch_targets = FxHashSet::default();
|
||||
for (block, succ_range) in rpo.into_iter() {
|
||||
let index = BlockIndex::new(lowered_order.len());
|
||||
lb_to_bindex.insert(block, index);
|
||||
lowered_order.push(block);
|
||||
lowered_succ_ranges.push(succ_range);
|
||||
let lowered_succ_ranges =
|
||||
Vec::from_iter(lowered_order.iter().enumerate().map(|(ix, lb)| {
|
||||
let bindex = BlockIndex::new(ix);
|
||||
let start = lowered_succ_indices.len();
|
||||
let opt_inst = match lb {
|
||||
// Block successors are pulled directly over, as they'll have been mutated when
|
||||
// determining the block order already.
|
||||
&LoweredBlock::Orig { block } => {
|
||||
let range = block_succ_range[block].clone();
|
||||
lowered_succ_indices
|
||||
.extend(block_succs[range].iter().map(|lb| lb_to_bindex[lb]));
|
||||
|
||||
match block {
|
||||
LoweredBlock::Orig { block }
|
||||
| LoweredBlock::OrigAndEdge { block, .. }
|
||||
| LoweredBlock::EdgeAndOrig { block, .. } => {
|
||||
if f.layout.is_cold(block) {
|
||||
cold_blocks.insert(index);
|
||||
if f.layout.is_cold(block) {
|
||||
cold_blocks.insert(bindex);
|
||||
}
|
||||
|
||||
if indirect_branch_target_clif_blocks.contains(&block) {
|
||||
indirect_branch_targets.insert(bindex);
|
||||
}
|
||||
|
||||
let last = f.layout.last_inst(block).unwrap();
|
||||
let opcode = f.dfg.insts[last].opcode();
|
||||
|
||||
assert!(opcode.is_terminator());
|
||||
|
||||
opcode.is_branch().then_some(last)
|
||||
}
|
||||
|
||||
if indirect_branch_target_clif_blocks.contains(&block) {
|
||||
indirect_branch_targets.insert(index);
|
||||
}
|
||||
}
|
||||
LoweredBlock::Edge { pred, succ, .. } => {
|
||||
if f.layout.is_cold(pred) || f.layout.is_cold(succ) {
|
||||
cold_blocks.insert(index);
|
||||
}
|
||||
// Critical edges won't have successor information in block_succ_range, but
|
||||
// they only have a single known successor to record anyway.
|
||||
&LoweredBlock::CriticalEdge { succ, .. } => {
|
||||
let succ_index = lb_to_bindex[&LoweredBlock::Orig { block: succ }];
|
||||
lowered_succ_indices.push(succ_index);
|
||||
|
||||
if indirect_branch_target_clif_blocks.contains(&succ) {
|
||||
indirect_branch_targets.insert(index);
|
||||
// Edges inherit indirect branch and cold block metadata from their
|
||||
// successor.
|
||||
|
||||
if f.layout.is_cold(succ) {
|
||||
cold_blocks.insert(bindex);
|
||||
}
|
||||
|
||||
if indirect_branch_target_clif_blocks.contains(&succ) {
|
||||
indirect_branch_targets.insert(bindex);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let lowered_succ_indices = lowered_succs
|
||||
.iter()
|
||||
.map(|&(inst, succ)| (inst, lb_to_bindex.get(&succ).cloned().unwrap()))
|
||||
.collect();
|
||||
|
||||
let mut orig_map = SecondaryMap::with_default(None);
|
||||
for (i, lb) in lowered_order.iter().enumerate() {
|
||||
let i = BlockIndex::new(i);
|
||||
if let Some(b) = lb.orig_block() {
|
||||
orig_map[b] = Some(i);
|
||||
}
|
||||
}
|
||||
};
|
||||
let end = lowered_succ_indices.len();
|
||||
(opt_inst, start..end)
|
||||
}));
|
||||
|
||||
let result = BlockLoweringOrder {
|
||||
lowered_order,
|
||||
lowered_succs,
|
||||
lowered_succ_indices,
|
||||
lowered_succ_ranges,
|
||||
orig_map,
|
||||
cold_blocks,
|
||||
indirect_branch_targets,
|
||||
};
|
||||
trace!("BlockLoweringOrder: {:?}", result);
|
||||
|
||||
trace!("BlockLoweringOrder: {:#?}", result);
|
||||
result
|
||||
}
|
||||
|
||||
@@ -503,9 +293,9 @@ impl BlockLoweringOrder {
|
||||
}
|
||||
|
||||
/// Get the successor indices for a lowered block.
|
||||
pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
|
||||
let range = self.lowered_succ_ranges[block.index()];
|
||||
&self.lowered_succ_indices[range.0..range.1]
|
||||
pub fn succ_indices(&self, block: BlockIndex) -> (Option<Inst>, &[BlockIndex]) {
|
||||
let (opt_inst, range) = &self.lowered_succ_ranges[block.index()];
|
||||
(opt_inst.clone(), &self.lowered_succ_indices[range.clone()])
|
||||
}
|
||||
|
||||
/// Determine whether the given lowered-block index is cold.
|
||||
@@ -524,12 +314,13 @@ impl BlockLoweringOrder {
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::UserFuncName;
|
||||
use crate::ir::{AbiParam, Function, InstBuilder, Signature};
|
||||
use crate::isa::CallConv;
|
||||
|
||||
fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> Function {
|
||||
fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> BlockLoweringOrder {
|
||||
assert!(n_blocks > 0);
|
||||
|
||||
let name = UserFuncName::testcase("test0");
|
||||
@@ -568,42 +359,20 @@ mod test {
|
||||
}
|
||||
}
|
||||
|
||||
func
|
||||
let mut cfg = ControlFlowGraph::new();
|
||||
cfg.compute(&func);
|
||||
let dom_tree = DominatorTree::with_function(&func, &cfg);
|
||||
|
||||
BlockLoweringOrder::new(&func, &dom_tree)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_blockorder_diamond() {
|
||||
let func = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
|
||||
let order = BlockLoweringOrder::new(&func);
|
||||
let order = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
|
||||
|
||||
assert_eq!(order.lowered_order.len(), 6);
|
||||
|
||||
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
|
||||
assert!(order.lowered_order[0].in_edge().is_none());
|
||||
assert!(order.lowered_order[0].out_edge().is_none());
|
||||
|
||||
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
|
||||
|
||||
assert!(order.lowered_order[2].orig_block().is_none());
|
||||
assert!(order.lowered_order[2].in_edge().is_none());
|
||||
assert!(order.lowered_order[2].out_edge().unwrap().0.as_u32() == 1);
|
||||
assert!(order.lowered_order[2].out_edge().unwrap().2.as_u32() == 3);
|
||||
|
||||
assert!(order.lowered_order[3].orig_block().unwrap().as_u32() == 2);
|
||||
assert!(order.lowered_order[3].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[3].in_edge().unwrap().2.as_u32() == 2);
|
||||
assert!(order.lowered_order[3].out_edge().is_none());
|
||||
|
||||
assert!(order.lowered_order[4].orig_block().is_none());
|
||||
assert!(order.lowered_order[4].in_edge().is_none());
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 2);
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 3);
|
||||
|
||||
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 3);
|
||||
assert!(order.lowered_order[5].in_edge().is_none());
|
||||
assert!(order.lowered_order[5].out_edge().is_none());
|
||||
// This test case doesn't need to introduce any critical edges, as all regalloc allocations
|
||||
// can sit on either the entry or exit of blocks 1 and 2.
|
||||
assert_eq!(order.lowered_order.len(), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -618,9 +387,9 @@ mod test {
|
||||
// | /\ |
|
||||
// 5 6
|
||||
//
|
||||
// (3 -> 5, 3 -> 6, 4 -> 6 are critical edges and must be split)
|
||||
// (3 -> 5, and 3 -> 6 are critical edges and must be split)
|
||||
//
|
||||
let func = build_test_func(
|
||||
let order = build_test_func(
|
||||
7,
|
||||
&[
|
||||
(0, 1),
|
||||
@@ -633,72 +402,53 @@ mod test {
|
||||
(4, 6),
|
||||
],
|
||||
);
|
||||
let order = BlockLoweringOrder::new(&func);
|
||||
|
||||
assert_eq!(order.lowered_order.len(), 11);
|
||||
assert_eq!(order.lowered_order.len(), 9);
|
||||
println!("ordered = {:?}", order.lowered_order);
|
||||
|
||||
// block 0
|
||||
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
|
||||
assert_eq!(order.lowered_order[0].orig_block().unwrap().as_u32(), 0);
|
||||
assert!(order.lowered_order[0].in_edge().is_none());
|
||||
assert!(order.lowered_order[0].out_edge().is_none());
|
||||
|
||||
// edge 0->1 + block 1
|
||||
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
|
||||
// block 2
|
||||
assert_eq!(order.lowered_order[1].orig_block().unwrap().as_u32(), 2);
|
||||
assert!(order.lowered_order[1].in_edge().is_none());
|
||||
assert!(order.lowered_order[1].out_edge().is_none());
|
||||
|
||||
// edge 1->3 + block 3
|
||||
assert!(order.lowered_order[2].orig_block().unwrap().as_u32() == 3);
|
||||
assert!(order.lowered_order[2].in_edge().unwrap().0.as_u32() == 1);
|
||||
assert!(order.lowered_order[2].in_edge().unwrap().2.as_u32() == 3);
|
||||
// block 1
|
||||
assert_eq!(order.lowered_order[2].orig_block().unwrap().as_u32(), 1);
|
||||
assert!(order.lowered_order[2].in_edge().is_none());
|
||||
assert!(order.lowered_order[2].out_edge().is_none());
|
||||
|
||||
// edge 3->5
|
||||
assert!(order.lowered_order[3].orig_block().is_none());
|
||||
// block 4
|
||||
assert_eq!(order.lowered_order[3].orig_block().unwrap().as_u32(), 4);
|
||||
assert!(order.lowered_order[3].in_edge().is_none());
|
||||
assert!(order.lowered_order[3].out_edge().unwrap().0.as_u32() == 3);
|
||||
assert!(order.lowered_order[3].out_edge().unwrap().2.as_u32() == 5);
|
||||
assert!(order.lowered_order[3].out_edge().is_none());
|
||||
|
||||
// edge 3->6
|
||||
assert!(order.lowered_order[4].orig_block().is_none());
|
||||
// block 3
|
||||
assert_eq!(order.lowered_order[4].orig_block().unwrap().as_u32(), 3);
|
||||
assert!(order.lowered_order[4].in_edge().is_none());
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 3);
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 6);
|
||||
assert!(order.lowered_order[4].out_edge().is_none());
|
||||
|
||||
// edge 1->4 + block 4
|
||||
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 4);
|
||||
assert!(order.lowered_order[5].in_edge().unwrap().0.as_u32() == 1);
|
||||
assert!(order.lowered_order[5].in_edge().unwrap().2.as_u32() == 4);
|
||||
assert!(order.lowered_order[5].out_edge().is_none());
|
||||
// critical edge 3 -> 5
|
||||
assert!(order.lowered_order[5].orig_block().is_none());
|
||||
assert_eq!(order.lowered_order[5].in_edge().unwrap().as_u32(), 3);
|
||||
assert_eq!(order.lowered_order[5].out_edge().unwrap().as_u32(), 5);
|
||||
|
||||
// edge 4->6
|
||||
// critical edge 3 -> 6
|
||||
assert!(order.lowered_order[6].orig_block().is_none());
|
||||
assert!(order.lowered_order[6].in_edge().is_none());
|
||||
assert!(order.lowered_order[6].out_edge().unwrap().0.as_u32() == 4);
|
||||
assert!(order.lowered_order[6].out_edge().unwrap().2.as_u32() == 6);
|
||||
assert_eq!(order.lowered_order[6].in_edge().unwrap().as_u32(), 3);
|
||||
assert_eq!(order.lowered_order[6].out_edge().unwrap().as_u32(), 6);
|
||||
|
||||
// block 6
|
||||
assert!(order.lowered_order[7].orig_block().unwrap().as_u32() == 6);
|
||||
assert_eq!(order.lowered_order[7].orig_block().unwrap().as_u32(), 6);
|
||||
assert!(order.lowered_order[7].in_edge().is_none());
|
||||
assert!(order.lowered_order[7].out_edge().is_none());
|
||||
|
||||
// edge 0->2 + block 2
|
||||
assert!(order.lowered_order[8].orig_block().unwrap().as_u32() == 2);
|
||||
assert!(order.lowered_order[8].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[8].in_edge().unwrap().2.as_u32() == 2);
|
||||
assert!(order.lowered_order[8].out_edge().is_none());
|
||||
|
||||
// edge 2->5
|
||||
assert!(order.lowered_order[9].orig_block().is_none());
|
||||
assert!(order.lowered_order[9].in_edge().is_none());
|
||||
assert!(order.lowered_order[9].out_edge().unwrap().0.as_u32() == 2);
|
||||
assert!(order.lowered_order[9].out_edge().unwrap().2.as_u32() == 5);
|
||||
|
||||
// block 5
|
||||
assert!(order.lowered_order[10].orig_block().unwrap().as_u32() == 5);
|
||||
assert!(order.lowered_order[10].in_edge().is_none());
|
||||
assert!(order.lowered_order[10].out_edge().is_none());
|
||||
assert_eq!(order.lowered_order[8].orig_block().unwrap().as_u32(), 5);
|
||||
assert!(order.lowered_order[8].in_edge().is_none());
|
||||
assert!(order.lowered_order[8].out_edge().is_none());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
//! Compilation backend pipeline: optimized IR to VCode / binemit.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::machinst::*;
|
||||
@@ -12,6 +13,7 @@ use regalloc2::RegallocOptions;
|
||||
/// for binary emission.
|
||||
pub fn compile<B: LowerBackend + TargetIsa>(
|
||||
f: &Function,
|
||||
domtree: &DominatorTree,
|
||||
b: &B,
|
||||
abi: Callee<<<B as LowerBackend>::MInst as MachInst>::ABIMachineSpec>,
|
||||
emit_info: <B::MInst as MachInstEmit>::Info,
|
||||
@@ -20,7 +22,7 @@ pub fn compile<B: LowerBackend + TargetIsa>(
|
||||
let machine_env = b.machine_env();
|
||||
|
||||
// Compute lowered block order.
|
||||
let block_order = BlockLoweringOrder::new(f);
|
||||
let block_order = BlockLoweringOrder::new(f, domtree);
|
||||
|
||||
// Build the lowering context.
|
||||
let lower = crate::machinst::Lower::new(f, machine_env, abi, emit_info, block_order, sigs)?;
|
||||
|
||||
@@ -928,9 +928,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
}
|
||||
|
||||
fn lower_branch_blockparam_args(&mut self, block: BlockIndex) {
|
||||
for succ_idx in 0..self.vcode.block_order().succ_indices(block).len() {
|
||||
// TODO: why not make `block_order` public?
|
||||
for succ_idx in 0..self.vcode.block_order().succ_indices(block).1.len() {
|
||||
// Avoid immutable borrow by explicitly indexing.
|
||||
let (inst, succ) = self.vcode.block_order().succ_indices(block)[succ_idx];
|
||||
let (opt_inst, succs) = self.vcode.block_order().succ_indices(block);
|
||||
let inst = opt_inst.expect("lower_branch_blockparam_args called on a critical edge!");
|
||||
let succ = succs[succ_idx];
|
||||
|
||||
// The use of `succ_idx` to index `branch_destination` is valid on the assumption that
|
||||
// the traversal order defined in `visit_block_succs` mirrors the order returned by
|
||||
@@ -960,17 +963,9 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
targets: &mut SmallVec<[MachLabel; 2]>,
|
||||
) -> Option<Inst> {
|
||||
targets.clear();
|
||||
let mut last_inst = None;
|
||||
for &(inst, succ) in self.vcode.block_order().succ_indices(bindex) {
|
||||
// Basic blocks may end in a single branch instruction, but those instructions may have
|
||||
// multiple destinations. As such, all `inst` values in `succ_indices` must be the
|
||||
// same, or this basic block would have multiple branch instructions present.
|
||||
debug_assert!(last_inst.map_or(true, |prev| prev == inst));
|
||||
last_inst = Some(inst);
|
||||
targets.push(MachLabel::from_block(succ));
|
||||
}
|
||||
|
||||
last_inst
|
||||
let (opt_inst, succs) = self.vcode.block_order().succ_indices(bindex);
|
||||
targets.extend(succs.iter().map(|succ| MachLabel::from_block(*succ)));
|
||||
opt_inst
|
||||
}
|
||||
|
||||
/// Lower the function.
|
||||
@@ -1025,7 +1020,8 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
// according to the one successor, and pass them
|
||||
// through; note that the successor must have an
|
||||
// original block.
|
||||
let (_, succ) = self.vcode.block_order().succ_indices(bindex)[0];
|
||||
let (_, succs) = self.vcode.block_order().succ_indices(bindex);
|
||||
let succ = succs[0];
|
||||
|
||||
let orig_succ = lowered_order[succ.index()];
|
||||
let orig_succ = orig_succ
|
||||
|
||||
Reference in New Issue
Block a user