cranelift: Add heap support to the interpreter (#3302)

* cranelift: Add heaps to interpreter

* cranelift: Add RunTest Environment mechanism to  test interpret

* cranelift: Remove unused `MemoryError`

* cranelift: Add docs for `State::resolve_global_value`

* cranelift: Rename heap tests

* cranelift: Refactor heap address resolution

* Fix typos and clarify docs (thanks @cfallin)
This commit is contained in:
Afonso Bordado
2022-07-05 17:05:26 +01:00
committed by GitHub
parent 76a2545a7f
commit e91f493ff5
7 changed files with 531 additions and 140 deletions

View File

@@ -8,12 +8,16 @@ use crate::frame::Frame;
use crate::instruction::DfgInstructionContext;
use crate::state::{MemoryError, State};
use crate::step::{step, ControlFlow, StepError};
use crate::value::ValueError;
use crate::value::{Value, ValueError};
use cranelift_codegen::data_value::DataValue;
use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
use cranelift_codegen::ir::{Block, FuncRef, Function, StackSlot, Type, Value as ValueRef};
use cranelift_codegen::ir::{
ArgumentPurpose, Block, FuncRef, Function, GlobalValue, GlobalValueData, Heap, StackSlot, Type,
Value as ValueRef,
};
use log::trace;
use std::collections::HashSet;
use std::convert::{TryFrom, TryInto};
use std::fmt::Debug;
use std::iter;
use thiserror::Error;
@@ -172,6 +176,21 @@ pub enum InterpreterError {
FuelExhausted,
}
pub type HeapBacking = Vec<u8>;
/// Represents a registered heap with an interpreter.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct HeapId(u32);
/// Options for initializing a heap memory region
#[derive(Debug)]
pub enum HeapInit {
/// A zero initialized heap with `size` bytes
Zeroed(usize),
/// Initializes the heap with the backing memory unchanged.
FromBacking(HeapBacking),
}
/// Maintains the [Interpreter]'s state, implementing the [State] trait.
pub struct InterpreterState<'a> {
pub functions: FunctionStore<'a>,
@@ -179,7 +198,7 @@ pub struct InterpreterState<'a> {
/// Number of bytes from the bottom of the stack where the current frame's stack space is
pub frame_offset: usize,
pub stack: Vec<u8>,
pub heap: Vec<u8>,
pub heaps: Vec<HeapBacking>,
pub iflags: HashSet<IntCC>,
pub fflags: HashSet<FloatCC>,
}
@@ -191,7 +210,7 @@ impl Default for InterpreterState<'_> {
frame_stack: vec![],
frame_offset: 0,
stack: Vec::with_capacity(1024),
heap: vec![0; 1024],
heaps: Vec::new(),
iflags: HashSet::new(),
fflags: HashSet::new(),
}
@@ -203,6 +222,57 @@ impl<'a> InterpreterState<'a> {
Self { functions, ..self }
}
/// Registers a static heap and returns a reference to it
///
/// This heap reference can be used to generate a heap pointer, which
/// can be used inside the interpreter to load / store values into the heap.
///
/// ```rust
/// # use cranelift_codegen::ir::types::I64;
/// # use cranelift_interpreter::interpreter::{InterpreterState, HeapInit};
/// let mut state = InterpreterState::default();
/// let heap0 = state.register_heap(HeapInit::Zeroed(1024));
///
/// let backing = Vec::from([10u8; 24]);
/// let heap1 = state.register_heap(HeapInit::FromBacking(backing));
/// ```
pub fn register_heap(&mut self, init: HeapInit) -> HeapId {
let heap_id = HeapId(self.heaps.len() as u32);
self.heaps.push(match init {
HeapInit::Zeroed(size) => iter::repeat(0).take(size).collect(),
HeapInit::FromBacking(backing) => backing,
});
heap_id
}
/// Returns a heap address that can be used inside the interpreter
///
/// ```rust
/// # use cranelift_codegen::ir::types::I64;
/// # use cranelift_interpreter::interpreter::{InterpreterState, HeapInit};
/// let mut state = InterpreterState::default();
/// let heap_id = state.register_heap(HeapInit::Zeroed(1024));
/// let heap_base = state.get_heap_address(I64, heap_id, 0);
/// let heap_bound = state.get_heap_address(I64, heap_id, 1024);
/// ```
pub fn get_heap_address(
&self,
ty: Type,
heap_id: HeapId,
offset: u64,
) -> Result<DataValue, MemoryError> {
let size = AddressSize::try_from(ty)?;
let heap_id = heap_id.0 as u64;
let addr = Address::from_parts(size, AddressRegion::Heap, heap_id, offset)?;
self.validate_address(&addr)?;
let dv = addr.try_into()?;
Ok(dv)
}
fn current_frame_mut(&mut self) -> &mut Frame<'a> {
let num_frames = self.frame_stack.len();
match num_frames {
@@ -310,23 +380,54 @@ impl<'a> State<'a, DataValue> for InterpreterState<'a> {
Address::from_parts(size, AddressRegion::Stack, 0, final_offset)
}
fn heap_address(&self, _size: AddressSize, _offset: u64) -> Result<Address, MemoryError> {
unimplemented!()
/// Builds an [Address] for the [Heap] referenced in the currently executing function.
///
/// A CLIF Heap is essentially a GlobalValue and some metadata about that memory
/// region, such as bounds. Since heaps are based on Global Values it means that
/// once that GV is resolved we can essentially end up anywhere in memory.
///
/// To build an [Address] we perform GV resolution, and try to ensure that we end up
/// in a valid region of memory.
fn heap_address(
&self,
size: AddressSize,
heap: Heap,
offset: u64,
) -> Result<Address, MemoryError> {
let heap_data = &self.get_current_function().heaps[heap];
let heap_base = self.resolve_global_value(heap_data.base)?;
let mut addr = Address::try_from(heap_base)?;
addr.size = size;
addr.offset += offset;
// After resolving the address can point anywhere, we need to check if it's
// still valid.
self.validate_address(&addr)?;
Ok(addr)
}
fn checked_load(&self, addr: Address, ty: Type) -> Result<DataValue, MemoryError> {
let load_size = ty.bytes() as usize;
let addr_start = addr.offset as usize;
let addr_end = addr_start + load_size;
let src = match addr.region {
AddressRegion::Stack => {
let addr_start = addr.offset as usize;
let addr_end = addr_start + load_size;
if addr_end > self.stack.len() {
return Err(MemoryError::OutOfBoundsLoad { addr, load_size });
}
&self.stack[addr_start..addr_end]
}
AddressRegion::Heap => {
let heap_mem = match self.heaps.get(addr.entry as usize) {
Some(mem) if addr_end <= mem.len() => mem,
_ => return Err(MemoryError::OutOfBoundsLoad { addr, load_size }),
};
&heap_mem[addr_start..addr_end]
}
_ => unimplemented!(),
};
@@ -335,28 +436,172 @@ impl<'a> State<'a, DataValue> for InterpreterState<'a> {
fn checked_store(&mut self, addr: Address, v: DataValue) -> Result<(), MemoryError> {
let store_size = v.ty().bytes() as usize;
let addr_start = addr.offset as usize;
let addr_end = addr_start + store_size;
let dst = match addr.region {
AddressRegion::Stack => {
let addr_start = addr.offset as usize;
let addr_end = addr_start + store_size;
if addr_end > self.stack.len() {
return Err(MemoryError::OutOfBoundsStore { addr, store_size });
}
&mut self.stack[addr_start..addr_end]
}
AddressRegion::Heap => {
let heap_mem = match self.heaps.get_mut(addr.entry as usize) {
Some(mem) if addr_end <= mem.len() => mem,
_ => return Err(MemoryError::OutOfBoundsStore { addr, store_size }),
};
&mut heap_mem[addr_start..addr_end]
}
_ => unimplemented!(),
};
Ok(v.write_to_slice(dst))
}
/// Non-Recursively resolves a global value until its address is found
fn resolve_global_value(&self, gv: GlobalValue) -> Result<DataValue, MemoryError> {
// Resolving a Global Value is a "pointer" chasing operation that lends itself to
// using a recursive solution. However, resolving this in a recursive manner
// is a bad idea because its very easy to add a bunch of global values and
// blow up the call stack.
//
// Adding to the challenges of this, is that the operations possible with GlobalValues
// mean that we cannot use a simple loop to resolve each global value, we must keep
// a pending list of operations.
// These are the possible actions that we can perform
#[derive(Debug)]
enum ResolveAction {
Resolve(GlobalValue),
/// Perform an add on the current address
Add(DataValue),
/// Load From the current address and replace it with the loaded value
Load {
/// Offset added to the base pointer before doing the load.
offset: i32,
/// Type of the loaded value.
global_type: Type,
},
}
let func = self.get_current_function();
// We start with a sentinel value that will fail if we try to load / add to it
// without resolving the base GV First.
let mut current_val = DataValue::B(false);
let mut action_stack = vec![ResolveAction::Resolve(gv)];
loop {
match action_stack.pop() {
Some(ResolveAction::Resolve(gv)) => match func.global_values[gv] {
GlobalValueData::VMContext => {
// Fetch the VMContext value from the values of the first block in the function
let index = func
.signature
.params
.iter()
.enumerate()
.find(|(_, p)| p.purpose == ArgumentPurpose::VMContext)
.map(|(i, _)| i)
// This should be validated by the verifier
.expect("No VMCtx argument was found, but one is referenced");
let first_block =
func.layout.blocks().next().expect("to have a first block");
let vmctx_value = func.dfg.block_params(first_block)[index];
current_val = self.current_frame().get(vmctx_value).clone();
}
GlobalValueData::Load {
base,
offset,
global_type,
..
} => {
action_stack.push(ResolveAction::Load {
offset: offset.into(),
global_type,
});
action_stack.push(ResolveAction::Resolve(base));
}
GlobalValueData::IAddImm {
base,
offset,
global_type,
} => {
let offset: i64 = offset.into();
let dv = DataValue::int(offset as i128, global_type)
.map_err(|_| MemoryError::InvalidAddressType(global_type))?;
action_stack.push(ResolveAction::Add(dv));
action_stack.push(ResolveAction::Resolve(base));
}
GlobalValueData::Symbol { .. } => unimplemented!(),
},
Some(ResolveAction::Add(dv)) => {
current_val = current_val
.add(dv.clone())
.map_err(|_| MemoryError::InvalidAddress(dv))?;
}
Some(ResolveAction::Load {
offset,
global_type,
}) => {
let mut addr = Address::try_from(current_val)?;
// We can forego bounds checking here since its performed in `checked_load`
addr.offset += offset as u64;
current_val = self.checked_load(addr, global_type)?;
}
// We are done resolving this, return the current value
None => return Ok(current_val),
}
}
}
fn validate_address(&self, addr: &Address) -> Result<(), MemoryError> {
match addr.region {
AddressRegion::Stack => {
let stack_len = self.stack.len() as u64;
if addr.offset > stack_len {
return Err(MemoryError::InvalidEntry {
entry: addr.entry,
max: self.heaps.len() as u64,
});
}
}
AddressRegion::Heap => {
let heap_len = self
.heaps
.get(addr.entry as usize)
.ok_or_else(|| MemoryError::InvalidEntry {
entry: addr.entry,
max: self.heaps.len() as u64,
})
.map(|heap| heap.len() as u64)?;
if addr.offset > heap_len {
return Err(MemoryError::InvalidOffset {
offset: addr.offset,
max: heap_len,
});
}
}
_ => unimplemented!(),
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::step::CraneliftTrap;
use cranelift_codegen::ir::types::I64;
use cranelift_codegen::ir::TrapCode;
use cranelift_reader::parse_functions;
@@ -720,4 +965,51 @@ mod tests {
assert_eq!(trap, CraneliftTrap::User(TrapCode::HeapOutOfBounds));
}
/// Most heap tests are in .clif files using the filetest machinery. However, this is a sanity
/// check that the heap mechanism works without the rest of the filetest infrastructure
#[test]
fn heap_sanity_test() {
let code = "
function %heap_load_store(i64 vmctx) -> b1 {
gv0 = vmctx
gv1 = load.i64 notrap aligned gv0+0
; gv2/3 do nothing, but makes sure we understand the iadd_imm mechanism
gv2 = iadd_imm.i64 gv1, 1
gv3 = iadd_imm.i64 gv2, -1
heap0 = static gv3, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64
block0(v0: i64):
v1 = iconst.i64 0
v2 = iconst.i64 123
v3 = heap_addr.i64 heap0, v1, 8
store.i64 v2, v3
v4 = load.i64 v3
v5 = icmp eq v2, v4
return v5
}";
let func = parse_functions(code).unwrap().into_iter().next().unwrap();
let mut env = FunctionStore::default();
env.add(func.name.to_string(), &func);
let mut state = InterpreterState::default().with_function_store(env);
let heap0 = state.register_heap(HeapInit::Zeroed(0x1000));
let base_addr = state.get_heap_address(I64, heap0, 0).unwrap();
// Build a vmctx struct by writing the base pointer at index 0
let mut vmctx_struct = vec![0u8; 8];
base_addr.write_to_slice(&mut vmctx_struct[..]);
// This is our vmctx "heap"
let vmctx = state.register_heap(HeapInit::FromBacking(vmctx_struct));
let vmctx_addr = state.get_heap_address(I64, vmctx, 0).unwrap();
let result = Interpreter::new(state)
.call_by_name("%heap_load_store", &[vmctx_addr])
.unwrap()
.unwrap_return();
assert_eq!(result, vec![DataValue::B(true)])
}
}