From 11a2ef01e7a91bd58f3071455ace57b3ee8352de Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 28 Apr 2021 17:52:42 -0700 Subject: [PATCH 1/2] Provide BB layout info externally in terms of code offsets. This is sometimes useful when performing analyses on the generated machine code: for example, some kinds of code verifiers will want to do a control-flow analysis, and it is much easier to do this if one does not have to recover the CFG from the machine code (doing so requires heavyweight analysis when indirect branches are involved). If one trusts the control-flow lowering and only needs to verify other properties of the code, this can be very useful. --- cranelift/codegen/src/context.rs | 18 ++++++++++ cranelift/codegen/src/isa/aarch64/mod.rs | 4 ++- cranelift/codegen/src/isa/arm32/mod.rs | 4 ++- cranelift/codegen/src/isa/s390x/mod.rs | 4 ++- cranelift/codegen/src/isa/x64/mod.rs | 4 ++- cranelift/codegen/src/machinst/buffer.rs | 2 +- cranelift/codegen/src/machinst/mod.rs | 6 ++++ cranelift/codegen/src/machinst/vcode.rs | 46 ++++++++++++++++++++++-- 8 files changed, 81 insertions(+), 7 deletions(-) diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 33c205d226..4a28c696bb 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -258,6 +258,24 @@ impl Context { } } + /// If available, return information about the code layout in the + /// final machine code: the offsets (in bytes) of each basic-block + /// start, and all basic-block edges. + pub fn get_code_bb_layout(&self) -> Option<(Vec, Vec<(usize, usize)>)> { + if let Some(result) = self.mach_compile_result.as_ref() { + Some(( + result.bb_starts.iter().map(|&off| off as usize).collect(), + result + .bb_edges + .iter() + .map(|&(from, to)| (from as usize, to as usize)) + .collect(), + )) + } else { + None + } + } + /// Creates unwind information for the function. /// /// Returns `None` if the function has no unwind information. diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index a6892b301d..a2161645bc 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -69,7 +69,7 @@ impl MachBackend for AArch64Backend { let flags = self.flags(); let vcode = self.compile_vcode(func, flags.clone())?; - let buffer = vcode.emit(); + let (buffer, bb_starts, bb_edges) = vcode.emit(); let frame_size = vcode.frame_size(); let stackslot_offsets = vcode.stackslot_offsets().clone(); @@ -87,6 +87,8 @@ impl MachBackend for AArch64Backend { disasm, value_labels_ranges: Default::default(), stackslot_offsets, + bb_starts, + bb_edges, }) } diff --git a/cranelift/codegen/src/isa/arm32/mod.rs b/cranelift/codegen/src/isa/arm32/mod.rs index 832fc46f47..477dc6ec46 100644 --- a/cranelift/codegen/src/isa/arm32/mod.rs +++ b/cranelift/codegen/src/isa/arm32/mod.rs @@ -59,7 +59,7 @@ impl MachBackend for Arm32Backend { ) -> CodegenResult { let flags = self.flags(); let vcode = self.compile_vcode(func, flags.clone())?; - let buffer = vcode.emit(); + let (buffer, bb_starts, bb_edges) = vcode.emit(); let frame_size = vcode.frame_size(); let stackslot_offsets = vcode.stackslot_offsets().clone(); @@ -77,6 +77,8 @@ impl MachBackend for Arm32Backend { disasm, value_labels_ranges: Default::default(), stackslot_offsets, + bb_starts, + bb_edges, }) } diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index 3a78b54c95..d6629d7fa9 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -70,7 +70,7 @@ impl MachBackend for S390xBackend { ) -> CodegenResult { let flags = self.flags(); let vcode = self.compile_vcode(func, flags.clone())?; - let buffer = vcode.emit(); + let (buffer, bb_starts, bb_edges) = vcode.emit(); let frame_size = vcode.frame_size(); let value_labels_ranges = vcode.value_labels_ranges(); let stackslot_offsets = vcode.stackslot_offsets().clone(); @@ -89,6 +89,8 @@ impl MachBackend for S390xBackend { disasm, value_labels_ranges, stackslot_offsets, + bb_starts, + bb_edges, }) } diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 6ca4a7cec4..e4933c0586 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -61,7 +61,7 @@ impl MachBackend for X64Backend { let flags = self.flags(); let vcode = self.compile_vcode(func, flags.clone())?; - let buffer = vcode.emit(); + let (buffer, bb_starts, bb_edges) = vcode.emit(); let buffer = buffer.finish(); let frame_size = vcode.frame_size(); let value_labels_ranges = vcode.value_labels_ranges(); @@ -79,6 +79,8 @@ impl MachBackend for X64Backend { disasm, value_labels_ranges, stackslot_offsets, + bb_starts, + bb_edges, }) } diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index 5747f363c1..6a5c06c04e 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -546,7 +546,7 @@ impl MachBuffer { } /// Resolve a label to an offset, if known. May return `UNKNOWN_LABEL_OFFSET`. - fn resolve_label_offset(&self, mut label: MachLabel) -> CodeOffset { + pub(crate) fn resolve_label_offset(&self, mut label: MachLabel) -> CodeOffset { let mut iters = 0; while self.label_aliases[label.0 as usize] != UNKNOWN_LABEL { label = self.label_aliases[label.0 as usize]; diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 401863cbd8..0c2376ce66 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -340,6 +340,12 @@ pub struct MachCompileResult { pub value_labels_ranges: ValueLabelsRanges, /// Debug info: stackslots to stack pointer offsets. pub stackslot_offsets: PrimaryMap, + /// Basic-block layout info: block start offsets. + pub bb_starts: Vec, + /// Basic-block layout info: block edges. Each edge is `(from, + /// to)`, where `from` and `to` are basic-block start offsets of + /// the respective blocks. + pub bb_edges: Vec<(CodeOffset, CodeOffset)>, } impl MachCompileResult { diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 4cafcf425e..bf1b2e0698 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -473,13 +473,20 @@ impl VCode { /// Emit the instructions to a `MachBuffer`, containing fixed-up code and external /// reloc/trap/etc. records ready for use. - pub fn emit(&self) -> MachBuffer + pub fn emit( + &self, + ) -> ( + MachBuffer, + Vec, + Vec<(CodeOffset, CodeOffset)>, + ) where I: MachInstEmit, { let _tt = timing::vcode_emit(); let mut buffer = MachBuffer::new(); let mut state = I::State::new(&*self.abi); + let mut bb_starts: Vec> = vec![]; // The first M MachLabels are reserved for block indices, the next N MachLabels for // constants. @@ -491,6 +498,7 @@ impl VCode { let mut safepoint_idx = 0; let mut cur_srcloc = None; + let mut last_offset = None; for block in 0..self.num_blocks() { let block = block as BlockIndex; let new_offset = I::align_basic_block(buffer.cur_offset()); @@ -504,6 +512,21 @@ impl VCode { let (start, end) = self.block_ranges[block as usize]; buffer.bind_label(MachLabel::from_block(block)); label_insn_iix[block as usize] = start; + + // Track BB starts. If we have backed up due to MachBuffer + // branch opts, note that the removed blocks were removed. + let cur_offset = buffer.cur_offset(); + if last_offset.is_some() && cur_offset <= last_offset.unwrap() { + for i in (0..bb_starts.len()).rev() { + if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { + break; + } + bb_starts[i] = None; + } + } + bb_starts.push(Some(cur_offset)); + last_offset = Some(cur_offset); + for iix in start..end { let srcloc = self.srclocs[iix as usize]; if cur_srcloc != Some(srcloc) { @@ -580,7 +603,26 @@ impl VCode { *self.insts_layout.borrow_mut() = (inst_ends, label_insn_iix, buffer.cur_offset()); } - buffer + // Create `bb_edges` and final (filtered) `bb_starts`. + let mut final_bb_starts = vec![]; + let mut bb_edges = vec![]; + for block in 0..self.num_blocks() { + if bb_starts[block].is_none() { + // Block was deleted by MachBuffer; skip. + continue; + } + let from = bb_starts[block].unwrap(); + + final_bb_starts.push(from); + // Resolve each `succ` label and add edges. + let succs = self.block_succs(BlockIx::new(block as u32)); + for succ in succs.iter() { + let to = buffer.resolve_label_offset(MachLabel::from_block(succ.get())); + bb_edges.push((from, to)); + } + } + + (buffer, final_bb_starts, bb_edges) } /// Generates value-label ranges. From 800cf25bb586bef6e9f0dd8b7dd82ab8f57a7625 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 24 May 2021 11:16:57 -0700 Subject: [PATCH 2/2] Make the CFG metadata computation conditional on a flag. --- cranelift/codegen/meta/src/shared/settings.rs | 15 ++++++ cranelift/codegen/src/machinst/mod.rs | 6 +++ cranelift/codegen/src/machinst/vcode.rs | 49 ++++++++++--------- cranelift/codegen/src/settings.rs | 1 + 4 files changed, 49 insertions(+), 22 deletions(-) diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs index 2233e85dbc..b9fa89587a 100644 --- a/cranelift/codegen/meta/src/shared/settings.rs +++ b/cranelift/codegen/meta/src/shared/settings.rs @@ -245,6 +245,21 @@ pub(crate) fn define() -> SettingGroup { true, ); + settings.add_bool( + "machine_code_cfg_info", + "Generate CFG metadata for machine code.", + r#" + This increases metadata size and compile time, but allows for the + embedder to more easily post-process or analyze the generated + machine code. It provides code offsets for the start of each + basic block in the generated machine code, and a list of CFG + edges (with blocks identified by start offsets) between them. + This is useful for, e.g., machine-code analyses that verify certain + properties of the generated code. + "#, + false, + ); + // BaldrMonkey requires that not-yet-relocated function addresses be encoded // as all-ones bitpatterns. settings.add_bool( diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 0c2376ce66..41d6e05bed 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -341,10 +341,16 @@ pub struct MachCompileResult { /// Debug info: stackslots to stack pointer offsets. pub stackslot_offsets: PrimaryMap, /// Basic-block layout info: block start offsets. + /// + /// This info is generated only if the `machine_code_cfg_info` + /// flag is set. pub bb_starts: Vec, /// Basic-block layout info: block edges. Each edge is `(from, /// to)`, where `from` and `to` are basic-block start offsets of /// the respective blocks. + /// + /// This info is generated only if the `machine_code_cfg_info` + /// flag is set. pub bb_edges: Vec<(CodeOffset, CodeOffset)>, } diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index bf1b2e0698..c27f1e00ee 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -486,6 +486,7 @@ impl VCode { let _tt = timing::vcode_emit(); let mut buffer = MachBuffer::new(); let mut state = I::State::new(&*self.abi); + let cfg_metadata = self.flags().machine_code_cfg_info(); let mut bb_starts: Vec> = vec![]; // The first M MachLabels are reserved for block indices, the next N MachLabels for @@ -513,19 +514,21 @@ impl VCode { buffer.bind_label(MachLabel::from_block(block)); label_insn_iix[block as usize] = start; - // Track BB starts. If we have backed up due to MachBuffer - // branch opts, note that the removed blocks were removed. - let cur_offset = buffer.cur_offset(); - if last_offset.is_some() && cur_offset <= last_offset.unwrap() { - for i in (0..bb_starts.len()).rev() { - if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { - break; + if cfg_metadata { + // Track BB starts. If we have backed up due to MachBuffer + // branch opts, note that the removed blocks were removed. + let cur_offset = buffer.cur_offset(); + if last_offset.is_some() && cur_offset <= last_offset.unwrap() { + for i in (0..bb_starts.len()).rev() { + if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() { + break; + } + bb_starts[i] = None; } - bb_starts[i] = None; } + bb_starts.push(Some(cur_offset)); + last_offset = Some(cur_offset); } - bb_starts.push(Some(cur_offset)); - last_offset = Some(cur_offset); for iix in start..end { let srcloc = self.srclocs[iix as usize]; @@ -606,19 +609,21 @@ impl VCode { // Create `bb_edges` and final (filtered) `bb_starts`. let mut final_bb_starts = vec![]; let mut bb_edges = vec![]; - for block in 0..self.num_blocks() { - if bb_starts[block].is_none() { - // Block was deleted by MachBuffer; skip. - continue; - } - let from = bb_starts[block].unwrap(); + if cfg_metadata { + for block in 0..self.num_blocks() { + if bb_starts[block].is_none() { + // Block was deleted by MachBuffer; skip. + continue; + } + let from = bb_starts[block].unwrap(); - final_bb_starts.push(from); - // Resolve each `succ` label and add edges. - let succs = self.block_succs(BlockIx::new(block as u32)); - for succ in succs.iter() { - let to = buffer.resolve_label_offset(MachLabel::from_block(succ.get())); - bb_edges.push((from, to)); + final_bb_starts.push(from); + // Resolve each `succ` label and add edges. + let succs = self.block_succs(BlockIx::new(block as u32)); + for succ in succs.iter() { + let to = buffer.resolve_label_offset(MachLabel::from_block(succ.get())); + bb_edges.push((from, to)); + } } } diff --git a/cranelift/codegen/src/settings.rs b/cranelift/codegen/src/settings.rs index 0f36db82a9..09db7f55e5 100644 --- a/cranelift/codegen/src/settings.rs +++ b/cranelift/codegen/src/settings.rs @@ -511,6 +511,7 @@ enable_atomics = true enable_safepoints = false enable_llvm_abi_extensions = false unwind_info = true +machine_code_cfg_info = false emit_all_ones_funcaddrs = false enable_probestack = true probestack_func_adjusts_sp = false