Add per-instruction source locations to the Cretonne IR.

Source locations are opaque 32-bit entities that can be used to
represent WebAssembly byte-code positions or some other source
identifier.
This commit is contained in:
Jakob Stoklund Olesen
2017-09-20 16:42:30 -07:00
parent 16eb689dd1
commit b2a314a229
8 changed files with 156 additions and 24 deletions

View File

@@ -10,8 +10,8 @@ ebb1(v0: i32 [%x8], v1: i32):
[-] trap heap_oob [-] trap heap_oob
[R#1234, %x5, %x11] v6, v7 = iadd_cout v2, v0 [R#1234, %x5, %x11] v6, v7 = iadd_cout v2, v0
[Rshamt#beef, %x25] v8 = ishl_imm v6, 2 [Rshamt#beef, %x25] v8 = ishl_imm v6, 2
v9 = iadd v8, v7 @55 v9 = iadd v8, v7
[Iret#5] return v0, v8 @a5 [Iret#5] return v0, v8
} }
; sameln: function %foo(i32, i32) native { ; sameln: function %foo(i32, i32) native {
; nextln: $ebb1($v0: i32 [%x8], $v1: i32): ; nextln: $ebb1($v0: i32 [%x8], $v1: i32):
@@ -19,6 +19,6 @@ ebb1(v0: i32 [%x8], v1: i32):
; nextln: [-]$WS trap heap_oob ; nextln: [-]$WS trap heap_oob
; nextln: [R#1234,%x5,%x11]$WS $v6, $v7 = iadd_cout $v2, $v0 ; nextln: [R#1234,%x5,%x11]$WS $v6, $v7 = iadd_cout $v2, $v0
; nextln: [Rshamt#beef,%x25]$WS $v8 = ishl_imm $v6, 2 ; nextln: [Rshamt#beef,%x25]$WS $v8 = ishl_imm $v6, 2
; nextln: [-,-]$WS $v9 = iadd $v8, $v7 ; nextln: @0055 [-,-]$WS $v9 = iadd $v8, $v7
; nextln: [Iret#05]$WS return $v0, $v8 ; nextln: @00a5 [Iret#05]$WS return $v0, $v8
; nextln: } ; nextln: }

View File

@@ -6,7 +6,7 @@
use entity::{PrimaryMap, EntityMap}; use entity::{PrimaryMap, EntityMap};
use ir; use ir;
use ir::{FunctionName, CallConv, Signature, DataFlowGraph, Layout}; use ir::{FunctionName, CallConv, Signature, DataFlowGraph, Layout};
use ir::{InstEncodings, ValueLocations, JumpTables, StackSlots, EbbOffsets}; use ir::{InstEncodings, ValueLocations, JumpTables, StackSlots, EbbOffsets, SourceLocs};
use ir::{Ebb, JumpTableData, JumpTable, StackSlotData, StackSlot, SigRef, ExtFuncData, FuncRef, use ir::{Ebb, JumpTableData, JumpTable, StackSlotData, StackSlot, SigRef, ExtFuncData, FuncRef,
GlobalVarData, GlobalVar, HeapData, Heap}; GlobalVarData, GlobalVar, HeapData, Heap};
use isa::TargetIsa; use isa::TargetIsa;
@@ -56,6 +56,12 @@ pub struct Function {
/// computes it, and it can easily be recomputed by calling that function. It is not included /// computes it, and it can easily be recomputed by calling that function. It is not included
/// in the textual IL format. /// in the textual IL format.
pub offsets: EbbOffsets, pub offsets: EbbOffsets,
/// Source locations.
///
/// Track the original source location for each instruction. The source locations are not
/// interpreted by Cretonne, only preserved.
pub srclocs: SourceLocs,
} }
impl Function { impl Function {
@@ -73,6 +79,7 @@ impl Function {
encodings: EntityMap::new(), encodings: EntityMap::new(),
locations: EntityMap::new(), locations: EntityMap::new(),
offsets: EntityMap::new(), offsets: EntityMap::new(),
srclocs: EntityMap::new(),
} }
} }
@@ -88,6 +95,7 @@ impl Function {
self.encodings.clear(); self.encodings.clear();
self.locations.clear(); self.locations.clear();
self.offsets.clear(); self.offsets.clear();
self.srclocs.clear();
} }
/// Create a new empty, anonymous function with a native calling convention. /// Create a new empty, anonymous function with a native calling convention.

View File

@@ -17,6 +17,7 @@ mod globalvar;
mod heap; mod heap;
mod memflags; mod memflags;
mod progpoint; mod progpoint;
mod sourceloc;
mod trapcode; mod trapcode;
mod valueloc; mod valueloc;
@@ -34,6 +35,7 @@ pub use ir::jumptable::JumpTableData;
pub use ir::layout::{Layout, CursorBase, Cursor}; pub use ir::layout::{Layout, CursorBase, Cursor};
pub use ir::memflags::MemFlags; pub use ir::memflags::MemFlags;
pub use ir::progpoint::{ProgramPoint, ProgramOrder, ExpandedProgramPoint}; pub use ir::progpoint::{ProgramPoint, ProgramOrder, ExpandedProgramPoint};
pub use ir::sourceloc::SourceLoc;
pub use ir::stackslot::{StackSlots, StackSlotKind, StackSlotData}; pub use ir::stackslot::{StackSlots, StackSlotKind, StackSlotData};
pub use ir::trapcode::TrapCode; pub use ir::trapcode::TrapCode;
pub use ir::types::Type; pub use ir::types::Type;
@@ -54,3 +56,6 @@ pub type InstEncodings = EntityMap<Inst, isa::Encoding>;
/// Code offsets for EBBs. /// Code offsets for EBBs.
pub type EbbOffsets = EntityMap<Ebb, binemit::CodeOffset>; pub type EbbOffsets = EntityMap<Ebb, binemit::CodeOffset>;
/// Source locations for instructions.
pub type SourceLocs = EntityMap<Inst, SourceLoc>;

View File

@@ -0,0 +1,62 @@
//! Source locations.
//!
//! Cretonne tracks the original source location of each instruction, and preserves the source
//! location when instructions are transformed.
use std::fmt;
/// A source location.
///
/// This is an opaque 32-bit number attached to each Cretonne IL instruction. Cretonne does not
/// interpret source locations in any way, they are simply preserved from the input to the output.
///
/// The default source location uses the all-ones bit pattern `!0`. It is used for instructions
/// that can't be given a real source location.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct SourceLoc(u32);
impl SourceLoc {
/// Create a new source location with the given bits.
pub fn new(bits: u32) -> SourceLoc {
SourceLoc(bits)
}
/// Is this the default source location?
pub fn is_default(self) -> bool {
self == Default::default()
}
/// Read the bits of this source location.
pub fn bits(self) -> u32 {
self.0
}
}
impl Default for SourceLoc {
fn default() -> SourceLoc {
SourceLoc(!0)
}
}
impl fmt::Display for SourceLoc {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.is_default() {
write!(f, "@-")
} else {
write!(f, "@{:04x}", self.0)
}
}
}
#[cfg(test)]
mod tests {
use ir::SourceLoc;
#[test]
fn display() {
assert_eq!(SourceLoc::default().to_string(), "@-");
assert_eq!(SourceLoc::new(0).to_string(), "@0000");
assert_eq!(SourceLoc::new(16).to_string(), "@0010");
assert_eq!(SourceLoc::new(0xabcdef).to_string(), "@abcdef");
}
}

View File

@@ -106,6 +106,7 @@ pub fn write_ebb_header(
func: &Function, func: &Function,
isa: Option<&TargetIsa>, isa: Option<&TargetIsa>,
ebb: Ebb, ebb: Ebb,
indent: usize,
) -> Result { ) -> Result {
// Write out the basic block header, outdented: // Write out the basic block header, outdented:
// //
@@ -114,19 +115,17 @@ pub fn write_ebb_header(
// ebb10(v4: f64, v5: b1): // ebb10(v4: f64, v5: b1):
// //
// If we're writing encoding annotations, shift by 20. // The `indent` is the instruction indentation. EBB headers are 4 spaces out from that.
if !func.encodings.is_empty() { write!(w, "{1:0$}{2}", indent - 4, "", ebb)?;
write!(w, " ")?;
}
let regs = isa.map(TargetIsa::register_info); let regs = isa.map(TargetIsa::register_info);
let regs = regs.as_ref(); let regs = regs.as_ref();
let mut args = func.dfg.ebb_args(ebb).iter().cloned(); let mut args = func.dfg.ebb_args(ebb).iter().cloned();
match args.next() { match args.next() {
None => return writeln!(w, "{}:", ebb), None => return writeln!(w, ":"),
Some(arg) => { Some(arg) => {
write!(w, "{}(", ebb)?; write!(w, "(")?;
write_arg(w, func, regs, arg)?; write_arg(w, func, regs, arg)?;
} }
} }
@@ -139,9 +138,16 @@ pub fn write_ebb_header(
} }
pub fn write_ebb(w: &mut Write, func: &Function, isa: Option<&TargetIsa>, ebb: Ebb) -> Result { pub fn write_ebb(w: &mut Write, func: &Function, isa: Option<&TargetIsa>, ebb: Ebb) -> Result {
write_ebb_header(w, func, isa, ebb)?; // Indent all instructions if any encodings are present.
let indent = if func.encodings.is_empty() && func.srclocs.is_empty() {
4
} else {
36
};
write_ebb_header(w, func, isa, ebb, indent)?;
for inst in func.layout.ebb_insts(ebb) { for inst in func.layout.ebb_insts(ebb) {
write_instruction(w, func, isa, inst)?; write_instruction(w, func, isa, inst, indent)?;
} }
Ok(()) Ok(())
} }
@@ -203,16 +209,22 @@ fn write_instruction(
func: &Function, func: &Function,
isa: Option<&TargetIsa>, isa: Option<&TargetIsa>,
inst: Inst, inst: Inst,
indent: usize,
) -> Result { ) -> Result {
// Indent all instructions to col 24 if any encodings are present.
let indent = if func.encodings.is_empty() { 4 } else { 24 };
// Value aliases come out on lines before the instruction using them. // Value aliases come out on lines before the instruction using them.
write_value_aliases(w, func, inst, indent)?; write_value_aliases(w, func, inst, indent)?;
// Prefix containing source location, encoding, and value locations.
let mut s = String::with_capacity(16);
// Source location goes first.
let srcloc = func.srclocs[inst];
if !srcloc.is_default() {
write!(s, "{} ", srcloc)?;
}
// Write out encoding info. // Write out encoding info.
if let Some(enc) = func.encodings.get(inst).cloned() { if let Some(enc) = func.encodings.get(inst).cloned() {
let mut s = String::with_capacity(16);
if let Some(isa) = isa { if let Some(isa) = isa {
write!(s, "[{}", isa.encoding_info().display(enc))?; write!(s, "[{}", isa.encoding_info().display(enc))?;
// Write value locations, if we have them. // Write value locations, if we have them.
@@ -222,17 +234,15 @@ fn write_instruction(
write!(s, ",{}", func.locations[r].display(&regs))? write!(s, ",{}", func.locations[r].display(&regs))?
} }
} }
write!(s, "]")?; write!(s, "] ")?;
} else { } else {
write!(s, "[{}]", enc)?; write!(s, "[{}] ", enc)?;
} }
// Align instruction following ISA annotation to col 24.
write!(w, "{:23} ", s)?;
} else {
// No annotations, simply indent.
write!(w, "{1:0$}", indent, "")?;
} }
// Write out prefix and indent the instruction.
write!(w, "{1:0$}", indent, s)?;
// Write out the result values, if any. // Write out the result values, if any.
let mut has_results = false; let mut has_results = false;
for r in func.dfg.inst_results(inst) { for r in func.dfg.inst_results(inst) {

View File

@@ -45,6 +45,7 @@ pub enum Token<'a> {
Name(&'a str), // %9arbitrary_alphanum, %x3, %0, %function ... Name(&'a str), // %9arbitrary_alphanum, %x3, %0, %function ...
HexSequence(&'a str), // #89AF HexSequence(&'a str), // #89AF
Identifier(&'a str), // Unrecognized identifier (opcode, enumerator, ...) Identifier(&'a str), // Unrecognized identifier (opcode, enumerator, ...)
SourceLoc(&'a str), // @00c7
} }
/// A `Token` with an associated location. /// A `Token` with an associated location.
@@ -388,6 +389,22 @@ impl<'a> Lexer<'a> {
token(Token::HexSequence(&self.source[begin..end]), loc) token(Token::HexSequence(&self.source[begin..end]), loc)
} }
fn scan_srcloc(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
let loc = self.loc();
let begin = self.pos + 1;
assert_eq!(self.lookahead, Some('@'));
while let Some(c) = self.next_ch() {
if !char::is_digit(c, 16) {
break;
}
}
let end = self.pos;
token(Token::SourceLoc(&self.source[begin..end]), loc)
}
/// Get the next token or a lexical error. /// Get the next token or a lexical error.
/// ///
/// Return None when the end of the source is encountered. /// Return None when the end of the source is encountered.
@@ -419,6 +436,7 @@ impl<'a> Lexer<'a> {
Some(ch) if ch.is_alphabetic() => Some(self.scan_word()), Some(ch) if ch.is_alphabetic() => Some(self.scan_word()),
Some('%') => Some(self.scan_name()), Some('%') => Some(self.scan_name()),
Some('#') => Some(self.scan_hex_sequence()), Some('#') => Some(self.scan_hex_sequence()),
Some('@') => Some(self.scan_srcloc()),
Some(ch) if ch.is_whitespace() => { Some(ch) if ch.is_whitespace() => {
self.next_ch(); self.next_ch();
continue; continue;

View File

@@ -13,6 +13,7 @@ use cretonne::ir::{Function, Ebb, Opcode, Value, Type, FunctionName, CallConv, S
JumpTable, JumpTableData, Signature, ArgumentType, ArgumentExtension, JumpTable, JumpTableData, Signature, ArgumentType, ArgumentExtension,
ExtFuncData, SigRef, FuncRef, StackSlot, ValueLoc, ArgumentLoc, MemFlags, ExtFuncData, SigRef, FuncRef, StackSlot, ValueLoc, ArgumentLoc, MemFlags,
GlobalVar, GlobalVarData, Heap, HeapData, HeapStyle, HeapBase}; GlobalVar, GlobalVarData, Heap, HeapData, HeapStyle, HeapBase};
use cretonne::ir;
use cretonne::ir::types::VOID; use cretonne::ir::types::VOID;
use cretonne::ir::immediates::{Imm64, Uimm32, Offset32, Ieee32, Ieee64}; use cretonne::ir::immediates::{Imm64, Uimm32, Offset32, Ieee32, Ieee64};
use cretonne::ir::entities::AnyEntity; use cretonne::ir::entities::AnyEntity;
@@ -696,6 +697,23 @@ impl<'a> Parser<'a> {
} }
} }
/// Parse an optional source location.
///
/// Return an optional source location if no real location is present.
fn optional_srcloc(&mut self) -> Result<ir::SourceLoc> {
if let Some(Token::SourceLoc(text)) = self.token() {
match u32::from_str_radix(text, 16) {
Ok(num) => {
self.consume();
Ok(ir::SourceLoc::new(num))
}
Err(_) => return err!(self.loc, "invalid source location: {}", text),
}
} else {
Ok(Default::default())
}
}
/// Parse a list of test commands. /// Parse a list of test commands.
pub fn parse_test_commands(&mut self) -> Vec<TestCommand<'a>> { pub fn parse_test_commands(&mut self) -> Vec<TestCommand<'a>> {
let mut list = Vec::new(); let mut list = Vec::new();
@@ -1360,9 +1378,11 @@ impl<'a> Parser<'a> {
Some(Token::Value(_)) => true, Some(Token::Value(_)) => true,
Some(Token::Identifier(_)) => true, Some(Token::Identifier(_)) => true,
Some(Token::LBracket) => true, Some(Token::LBracket) => true,
Some(Token::SourceLoc(_)) => true,
_ => false, _ => false,
} }
{ {
let srcloc = self.optional_srcloc()?;
let (encoding, result_locations) = self.parse_instruction_encoding(ctx)?; let (encoding, result_locations) = self.parse_instruction_encoding(ctx)?;
// We need to parse instruction results here because they are shared // We need to parse instruction results here because they are shared
@@ -1380,6 +1400,7 @@ impl<'a> Parser<'a> {
self.consume(); self.consume();
self.parse_instruction( self.parse_instruction(
results, results,
srcloc,
encoding, encoding,
result_locations, result_locations,
ctx, ctx,
@@ -1390,6 +1411,7 @@ impl<'a> Parser<'a> {
_ => { _ => {
self.parse_instruction( self.parse_instruction(
results, results,
srcloc,
encoding, encoding,
result_locations, result_locations,
ctx, ctx,
@@ -1587,6 +1609,7 @@ impl<'a> Parser<'a> {
fn parse_instruction( fn parse_instruction(
&mut self, &mut self,
results: Vec<Value>, results: Vec<Value>,
srcloc: ir::SourceLoc,
encoding: Option<Encoding>, encoding: Option<Encoding>,
result_locations: Option<Vec<ValueLoc>>, result_locations: Option<Vec<ValueLoc>>,
ctx: &mut Context, ctx: &mut Context,
@@ -1636,6 +1659,10 @@ impl<'a> Parser<'a> {
"duplicate inst references created", "duplicate inst references created",
); );
if !srcloc.is_default() {
ctx.function.srclocs[inst] = srcloc;
}
if let Some(encoding) = encoding { if let Some(encoding) = encoding {
ctx.function.encodings[inst] = encoding; ctx.function.encodings[inst] = encoding;
} }

View File

@@ -25,6 +25,7 @@ syn match ctonName /%\w\+\>/
syn match ctonNumber /-\?\<[0-9_]\+\>/ syn match ctonNumber /-\?\<[0-9_]\+\>/
syn match ctonNumber /-\?\<0x[0-9a-fA-F_]\+\(\.[0-9a-fA-F_]*\)\?\(p[+-]\?\d\+\)\?\>/ syn match ctonNumber /-\?\<0x[0-9a-fA-F_]\+\(\.[0-9a-fA-F_]*\)\?\(p[+-]\?\d\+\)\?\>/
syn match ctonHexSeq /#\x\+\>/ syn match ctonHexSeq /#\x\+\>/
syn match ctonSourceLoc /@[0-9a-f]\+\>/
syn region ctonCommentLine start=";" end="$" contains=ctonFilecheck syn region ctonCommentLine start=";" end="$" contains=ctonFilecheck
@@ -38,5 +39,6 @@ hi def link ctonNumber Number
hi def link ctonHexSeq Number hi def link ctonHexSeq Number
hi def link ctonCommentLine Comment hi def link ctonCommentLine Comment
hi def link ctonFilecheck SpecialComment hi def link ctonFilecheck SpecialComment
hi def link ctonSourceLoc LineNr
let b:current_syntax = "cton" let b:current_syntax = "cton"