Relocation of DW_OP_{bra, skip} instructions (#2143)

The fundamental problem is that the target distance of jump-like operations may change in the DWARF expression translation process. Intervening DW_OP_deref will expand to about 10 bytes, for example.

So the jumps must be relocated. We approach this task by inserting artificial LandingPad markers (new CompiledExpressionParts constructors) into the parsed vector at actual Jump targets.

LandingPads are identified by JumpTargetMarker tokens which are generated on the fly.

Additionally we now parse the Jump instructions. These also get their corresponding JumpTargetMarker token.

We bail in two situations:

    frame_base is too complicated (i.e. itself contains Jump)
    some jump distance in the original expression is fishy.
This commit is contained in:
Gabor Greif
2020-08-27 00:19:03 +02:00
committed by GitHub
parent 0c4e15a52e
commit 2632848491

View File

@@ -2,7 +2,10 @@ use super::address_transform::AddressTransform;
use anyhow::{Context, Error, Result}; use anyhow::{Context, Error, Result};
use gimli::{self, write, Expression, Operation, Reader, ReaderOffset, X86_64}; use gimli::{self, write, Expression, Operation, Reader, ReaderOffset, X86_64};
use more_asserts::{assert_le, assert_lt}; use more_asserts::{assert_le, assert_lt};
use std::cmp::PartialEq;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::hash::{Hash, Hasher};
use std::rc::Rc;
use wasmtime_environ::entity::EntityRef; use wasmtime_environ::entity::EntityRef;
use wasmtime_environ::ir::{StackSlots, ValueLabel, ValueLabelsRanges, ValueLoc}; use wasmtime_environ::ir::{StackSlots, ValueLabel, ValueLabelsRanges, ValueLoc};
use wasmtime_environ::isa::TargetIsa; use wasmtime_environ::isa::TargetIsa;
@@ -88,9 +91,19 @@ enum CompiledExpressionPart {
// The wasm-local DWARF operator. The label points to `ValueLabel`. // The wasm-local DWARF operator. The label points to `ValueLabel`.
// The trailing field denotes that the operator was last in sequence, // The trailing field denotes that the operator was last in sequence,
// and it is the DWARF location (not a pointer). // and it is the DWARF location (not a pointer).
Local { label: ValueLabel, trailing: bool }, Local {
label: ValueLabel,
trailing: bool,
},
// Dereference is needed. // Dereference is needed.
Deref, Deref,
// Jumping in the expression.
Jump {
conditionally: bool,
target: JumpTargetMarker,
},
// Floating landing pad.
LandingPad(JumpTargetMarker),
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
@@ -288,7 +301,9 @@ impl CompiledExpression {
let mut ranges_builder = ValueLabelRangesBuilder::new(scope, addr_tr, frame_info); let mut ranges_builder = ValueLabelRangesBuilder::new(scope, addr_tr, frame_info);
for p in self.parts.iter() { for p in self.parts.iter() {
match p { match p {
CompiledExpressionPart::Code(_) => (), CompiledExpressionPart::Code(_)
| CompiledExpressionPart::Jump { .. }
| CompiledExpressionPart::LandingPad { .. } => (),
CompiledExpressionPart::Local { label, .. } => ranges_builder.process_label(*label), CompiledExpressionPart::Local { label, .. } => ranges_builder.process_label(*label),
CompiledExpressionPart::Deref => ranges_builder.process_label(vmctx_label), CompiledExpressionPart::Deref => ranges_builder.process_label(vmctx_label),
} }
@@ -310,6 +325,9 @@ impl CompiledExpression {
}| { }| {
// build expression // build expression
let mut code_buf = Vec::new(); let mut code_buf = Vec::new();
let mut jump_positions = Vec::new();
let mut landing_positions = HashMap::new();
macro_rules! deref { macro_rules! deref {
() => { () => {
if let (Some(vmctx_loc), Some(frame_info)) = if let (Some(vmctx_loc), Some(frame_info)) =
@@ -333,6 +351,24 @@ impl CompiledExpression {
CompiledExpressionPart::Code(c) => { CompiledExpressionPart::Code(c) => {
code_buf.extend_from_slice(c.as_slice()) code_buf.extend_from_slice(c.as_slice())
} }
CompiledExpressionPart::LandingPad(marker) => {
landing_positions.insert(marker.clone(), code_buf.len());
}
CompiledExpressionPart::Jump {
conditionally,
target,
} => {
code_buf.push(
match conditionally {
true => gimli::constants::DW_OP_bra,
false => gimli::constants::DW_OP_skip,
}
.0 as u8,
);
code_buf.push(!0);
code_buf.push(!0); // these will be relocated below
jump_positions.push((target.clone(), code_buf.len()));
}
CompiledExpressionPart::Local { label, trailing } => { CompiledExpressionPart::Local { label, trailing } => {
let loc = let loc =
*label_location.get(&label).context("label_location")?; *label_location.get(&label).context("label_location")?;
@@ -350,6 +386,15 @@ impl CompiledExpression {
if self.need_deref { if self.need_deref {
deref!(); deref!();
} }
for (marker, new_from) in jump_positions {
// relocate jump targets
let new_to = landing_positions[&marker];
let new_diff = new_to as isize - new_from as isize;
// FIXME: use encoding? LittleEndian for now...
&code_buf[new_from - 2..new_from]
.copy_from_slice(&(new_diff as i16).to_le_bytes());
}
Ok(Some((func_index, start, end, code_buf))) Ok(Some((func_index, start, end, code_buf)))
}, },
) )
@@ -376,9 +421,35 @@ pub fn compile_expression<R>(
where where
R: Reader, R: Reader,
{ {
// Bail when `frame_base` is complicated.
if let Some(expr) = frame_base {
if expr.parts.iter().any(|p| match p {
CompiledExpressionPart::Jump { .. } => true,
_ => false,
}) {
return Ok(None);
}
}
// jump_targets key is offset in buf starting from the end
// (see also `unread_bytes` below)
let mut jump_targets: HashMap<u64, JumpTargetMarker> = HashMap::new();
let mut pc = expr.0.clone(); let mut pc = expr.0.clone();
let buf = expr.0.to_slice()?; let buf = expr.0.to_slice()?;
let mut parts = Vec::new(); let mut parts = Vec::new();
macro_rules! push {
($part:expr) => {{
let part = $part;
if let (CompiledExpressionPart::Code(cc2), Some(CompiledExpressionPart::Code(cc1))) =
(&part, parts.last_mut())
{
cc1.extend_from_slice(cc2);
} else {
parts.push(part)
}
}};
}
let mut need_deref = false; let mut need_deref = false;
if is_old_expression_format(&buf) && frame_base.is_some() { if is_old_expression_format(&buf) && frame_base.is_some() {
// Still supporting old DWARF variable expressions without fbreg. // Still supporting old DWARF variable expressions without fbreg.
@@ -388,17 +459,41 @@ where
} }
need_deref = frame_base.unwrap().need_deref; need_deref = frame_base.unwrap().need_deref;
} }
let base_len = parts.len();
let mut code_chunk = Vec::new(); let mut code_chunk = Vec::new();
macro_rules! flush_code_chunk { macro_rules! flush_code_chunk {
() => { () => {
if !code_chunk.is_empty() { if !code_chunk.is_empty() {
parts.push(CompiledExpressionPart::Code(code_chunk)); push!(CompiledExpressionPart::Code(code_chunk));
code_chunk = Vec::new(); code_chunk = Vec::new();
let _ = code_chunk; // suppresses warning for final flush
} }
}; };
}; };
// Find all landing pads by scanning bytes, do not care about
// false location at this moment.
// Looks hacky but it is fast; does not need to be really exact.
for i in 0..buf.len() - 2 {
let op = buf[i];
if op == gimli::constants::DW_OP_bra.0 || op == gimli::constants::DW_OP_skip.0 {
// TODO fix for big-endian
let offset = i16::from_le_bytes([buf[i + 1], buf[i + 2]]);
let origin = i + 3;
// Discarding out-of-bounds jumps (also some of falsely detected ops)
if (offset >= 0 && offset as usize + origin <= buf.len())
|| (offset < 0 && -offset as usize <= origin)
{
let target = buf.len() as isize - origin as isize - offset as isize;
jump_targets.insert(target as u64, JumpTargetMarker::new());
}
}
}
while !pc.is_empty() { while !pc.is_empty() {
let unread_bytes = pc.len().into_u64();
if let Some(marker) = jump_targets.get(&unread_bytes) {
flush_code_chunk!();
parts.push(CompiledExpressionPart::LandingPad(marker.clone()));
}
let next = buf[pc.offset_from(&expr.0).into_u64() as usize]; let next = buf[pc.offset_from(&expr.0).into_u64() as usize];
need_deref = true; need_deref = true;
if next == 0xED { if next == 0xED {
@@ -413,7 +508,7 @@ where
let index = pc.read_sleb128()?; let index = pc.read_sleb128()?;
flush_code_chunk!(); flush_code_chunk!();
let label = ValueLabel::from_u32(index as u32); let label = ValueLabel::from_u32(index as u32);
parts.push(CompiledExpressionPart::Local { push!(CompiledExpressionPart::Local {
label, label,
trailing: false, trailing: false,
}); });
@@ -439,10 +534,58 @@ where
code_chunk.extend(writer.into_vec()); code_chunk.extend(writer.into_vec());
continue; continue;
} }
Operation::UnsignedConstant { .. } Operation::Drop { .. }
| Operation::Pick { .. }
| Operation::Swap { .. }
| Operation::Rot { .. }
| Operation::Nop { .. }
| Operation::UnsignedConstant { .. }
| Operation::SignedConstant { .. } | Operation::SignedConstant { .. }
| Operation::ConstantIndex { .. }
| Operation::PlusConstant { .. } | Operation::PlusConstant { .. }
| Operation::Abs { .. }
| Operation::And { .. }
| Operation::Or { .. }
| Operation::Xor { .. }
| Operation::Shr { .. }
| Operation::Shra { .. }
| Operation::Shl { .. }
| Operation::Plus { .. }
| Operation::Minus { .. }
| Operation::Div { .. }
| Operation::Mod { .. }
| Operation::Mul { .. }
| Operation::Neg { .. }
| Operation::Not { .. }
| Operation::Lt { .. }
| Operation::Gt { .. }
| Operation::Le { .. }
| Operation::Ge { .. }
| Operation::Eq { .. }
| Operation::Ne { .. }
| Operation::TypedLiteral { .. }
| Operation::Convert { .. }
| Operation::Reinterpret { .. }
| Operation::Piece { .. } => (), | Operation::Piece { .. } => (),
Operation::Bra { target } | Operation::Skip { target } => {
flush_code_chunk!();
let arc_to = (pc.len().into_u64() as isize - target as isize) as u64;
let marker = match jump_targets.get(&arc_to) {
Some(m) => m.clone(),
None => {
// Marker not found: probably out of bounds.
return Ok(None);
}
};
push!(CompiledExpressionPart::Jump {
conditionally: match op {
Operation::Bra { .. } => true,
_ => false,
},
target: marker,
});
continue;
}
Operation::StackValue => { Operation::StackValue => {
need_deref = false; need_deref = false;
@@ -457,11 +600,22 @@ where
} }
Operation::Deref { .. } => { Operation::Deref { .. } => {
flush_code_chunk!(); flush_code_chunk!();
parts.push(CompiledExpressionPart::Deref); push!(CompiledExpressionPart::Deref);
// Don't re-enter the loop here (i.e. continue), because the // Don't re-enter the loop here (i.e. continue), because the
// DW_OP_deref still needs to be kept. // DW_OP_deref still needs to be kept.
} }
_ => { Operation::Address { .. }
| Operation::AddressIndex { .. }
| Operation::Call { .. }
| Operation::Register { .. }
| Operation::RegisterOffset { .. }
| Operation::CallFrameCFA
| Operation::PushObjectAddress
| Operation::TLS
| Operation::ImplicitValue { .. }
| Operation::ImplicitPointer { .. }
| Operation::EntryValue { .. }
| Operation::ParameterRef { .. } => {
return Ok(None); return Ok(None);
} }
} }
@@ -470,20 +624,9 @@ where
} }
} }
if !code_chunk.is_empty() { flush_code_chunk!();
parts.push(CompiledExpressionPart::Code(code_chunk)); if let Some(marker) = jump_targets.get(&0) {
} parts.push(CompiledExpressionPart::LandingPad(marker.clone()));
if base_len > 0 && base_len + 1 < parts.len() {
// see if we can glue two code chunks
if let [CompiledExpressionPart::Code(cc1), CompiledExpressionPart::Code(cc2)] =
&parts[base_len..=base_len]
{
let mut combined = cc1.clone();
combined.extend_from_slice(cc2);
parts[base_len] = CompiledExpressionPart::Code(combined);
parts.remove(base_len + 1);
}
} }
Ok(Some(CompiledExpression { parts, need_deref })) Ok(Some(CompiledExpression { parts, need_deref }))
@@ -602,10 +745,49 @@ impl<'a, 'b> ValueLabelRangesBuilder<'a, 'b> {
} }
} }
/// Marker for tracking incoming jumps.
/// Different when created new, and the same when cloned.
#[derive(Clone, Eq)]
struct JumpTargetMarker(Rc<u32>);
impl JumpTargetMarker {
fn new() -> JumpTargetMarker {
// Create somewhat unique hash data -- using part of
// the pointer of the RcBox.
let mut rc = Rc::new(0);
let hash_data = rc.as_ref() as *const u32 as usize as u32;
*Rc::get_mut(&mut rc).unwrap() = hash_data;
JumpTargetMarker(rc)
}
}
impl PartialEq for JumpTargetMarker {
fn eq(&self, other: &JumpTargetMarker) -> bool {
Rc::ptr_eq(&self.0, &other.0)
}
}
impl Hash for JumpTargetMarker {
fn hash<H: Hasher>(&self, hasher: &mut H) {
hasher.write_u32(*self.0);
}
}
impl std::fmt::Debug for JumpTargetMarker {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::result::Result<(), std::fmt::Error> {
write!(
f,
"JumpMarker<{:08x}>",
self.0.as_ref() as *const u32 as usize
)
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::compile_expression; use super::{
use super::{AddressTransform, FunctionFrameInfo, ValueLabel, ValueLabelsRanges}; compile_expression, AddressTransform, CompiledExpression, CompiledExpressionPart,
FunctionFrameInfo, JumpTargetMarker, ValueLabel, ValueLabelsRanges,
};
use gimli::{self, constants, Encoding, EndianSlice, Expression, RunTimeEndian}; use gimli::{self, constants, Encoding, EndianSlice, Expression, RunTimeEndian};
use wasmtime_environ::CompiledFunction; use wasmtime_environ::CompiledFunction;
@@ -619,6 +801,9 @@ mod tests {
($d:ident) => { ($d:ident) => {
constants::$d.0 as u8 constants::$d.0 as u8
}; };
($e:expr) => {
$e as u8
};
} }
macro_rules! expression { macro_rules! expression {
@@ -630,15 +815,38 @@ mod tests {
} }
} }
fn find_jump_targets<'a>(ce: &'a CompiledExpression) -> Vec<&'a JumpTargetMarker> {
ce.parts
.iter()
.filter_map(|p| {
if let CompiledExpressionPart::LandingPad(t) = p {
Some(t)
} else {
None
}
})
.collect::<Vec<_>>()
}
static DWARF_ENCODING: Encoding = Encoding { static DWARF_ENCODING: Encoding = Encoding {
address_size: 4, address_size: 4,
format: gimli::Format::Dwarf32, format: gimli::Format::Dwarf32,
version: 4, version: 4,
}; };
#[test]
fn test_debug_expression_jump_target() {
let m1 = JumpTargetMarker::new();
let m2 = JumpTargetMarker::new();
assert!(m1 != m2);
assert!(m1 == m1.clone());
// Internal hash_data test (theoretically can fail intermittently).
assert!(m1.0 != m2.0);
}
#[test] #[test]
fn test_debug_parse_expressions() { fn test_debug_parse_expressions() {
use super::{CompiledExpression, CompiledExpressionPart};
use wasmtime_environ::entity::EntityRef; use wasmtime_environ::entity::EntityRef;
let (val1, val3, val20) = (ValueLabel::new(1), ValueLabel::new(3), ValueLabel::new(20)); let (val1, val3, val20) = (ValueLabel::new(1), ValueLabel::new(3), ValueLabel::new(20));
@@ -654,7 +862,7 @@ mod tests {
label: val20, label: val20,
trailing: true trailing: true
}], }],
need_deref: false need_deref: false,
} }
); );
@@ -679,7 +887,7 @@ mod tests {
}, },
CompiledExpressionPart::Code(vec![35, 16, 159]) CompiledExpressionPart::Code(vec![35, 16, 159])
], ],
need_deref: false need_deref: false,
} }
); );
@@ -699,7 +907,7 @@ mod tests {
}, },
CompiledExpressionPart::Code(vec![35, 18]) CompiledExpressionPart::Code(vec![35, 18])
], ],
need_deref: true need_deref: true,
} }
); );
@@ -727,7 +935,158 @@ mod tests {
CompiledExpressionPart::Deref, CompiledExpressionPart::Deref,
CompiledExpressionPart::Code(vec![6, 159]) CompiledExpressionPart::Code(vec![6, 159])
], ],
need_deref: false need_deref: false,
}
);
let e = expression!(
DW_OP_lit1,
DW_OP_dup,
DW_OP_WASM_location,
0x0,
1,
DW_OP_and,
DW_OP_bra,
5,
0, // --> pointer
DW_OP_swap,
DW_OP_shr,
DW_OP_skip,
2,
0, // --> done
// pointer:
DW_OP_plus,
DW_OP_deref,
// done:
DW_OP_stack_value
);
let ce = compile_expression(&e, DWARF_ENCODING, None)
.expect("non-error")
.expect("expression");
let targets = find_jump_targets(&ce);
assert_eq!(targets.len(), 2);
assert_eq!(
ce,
CompiledExpression {
parts: vec![
CompiledExpressionPart::Code(vec![49, 18]),
CompiledExpressionPart::Local {
label: val1,
trailing: false
},
CompiledExpressionPart::Code(vec![26]),
CompiledExpressionPart::Jump {
conditionally: true,
target: targets[0].clone(),
},
CompiledExpressionPart::Code(vec![22, 37]),
CompiledExpressionPart::Jump {
conditionally: false,
target: targets[1].clone(),
},
CompiledExpressionPart::LandingPad(targets[0].clone()), // capture from
CompiledExpressionPart::Code(vec![34]),
CompiledExpressionPart::Deref,
CompiledExpressionPart::Code(vec![6]),
CompiledExpressionPart::LandingPad(targets[1].clone()), // capture to
CompiledExpressionPart::Code(vec![159])
],
need_deref: false,
}
);
let e = expression!(
DW_OP_lit1,
DW_OP_dup,
DW_OP_bra,
2,
0, // --> target
DW_OP_deref,
DW_OP_lit0,
// target:
DW_OP_stack_value
);
let ce = compile_expression(&e, DWARF_ENCODING, None)
.expect("non-error")
.expect("expression");
let targets = find_jump_targets(&ce);
assert_eq!(targets.len(), 1);
assert_eq!(
ce,
CompiledExpression {
parts: vec![
CompiledExpressionPart::Code(vec![49, 18]),
CompiledExpressionPart::Jump {
conditionally: true,
target: targets[0].clone(),
},
CompiledExpressionPart::Deref,
CompiledExpressionPart::Code(vec![6, 48]),
CompiledExpressionPart::LandingPad(targets[0].clone()), // capture to
CompiledExpressionPart::Code(vec![159])
],
need_deref: false,
}
);
let e = expression!(
DW_OP_lit1,
/* loop */ DW_OP_dup,
DW_OP_lit25,
DW_OP_ge,
DW_OP_bra,
5,
0, // --> done
DW_OP_plus_uconst,
1,
DW_OP_skip,
(-11 as i8),
(!0), // --> loop
/* done */ DW_OP_stack_value
);
let ce = compile_expression(&e, DWARF_ENCODING, None)
.expect("non-error")
.expect("expression");
let targets = find_jump_targets(&ce);
assert_eq!(targets.len(), 2);
assert_eq!(
ce,
CompiledExpression {
parts: vec![
CompiledExpressionPart::Code(vec![49]),
CompiledExpressionPart::LandingPad(targets[0].clone()),
CompiledExpressionPart::Code(vec![18, 73, 42]),
CompiledExpressionPart::Jump {
conditionally: true,
target: targets[1].clone(),
},
CompiledExpressionPart::Code(vec![35, 1]),
CompiledExpressionPart::Jump {
conditionally: false,
target: targets[0].clone(),
},
CompiledExpressionPart::LandingPad(targets[1].clone()),
CompiledExpressionPart::Code(vec![159])
],
need_deref: false,
}
);
let e = expression!(DW_OP_WASM_location, 0x0, 1, DW_OP_plus_uconst, 5);
let ce = compile_expression(&e, DWARF_ENCODING, None)
.expect("non-error")
.expect("expression");
assert_eq!(
ce,
CompiledExpression {
parts: vec![
CompiledExpressionPart::Local {
label: val1,
trailing: false
},
CompiledExpressionPart::Code(vec![35, 5])
],
need_deref: true,
} }
); );
} }