Add a CLIF interpreter

This is an incomplete version of a Cranelift IR interpreter: only a small subset of instructions are implemented and (known) missing parts are marked with TODO or FIXME.
This commit is contained in:
Andrew Brown
2020-04-21 11:57:08 -07:00
parent b4238229c2
commit 8b18fc5937
7 changed files with 831 additions and 0 deletions

View File

@@ -0,0 +1,82 @@
//! Implements the function environment (e.g. a name-to-function mapping) for interpretation.
use cranelift_codegen::ir::{FuncRef, Function};
use std::collections::HashMap;
#[derive(Default)]
pub struct Environment {
functions: HashMap<FuncRef, Function>,
function_name_to_func_ref: HashMap<String, FuncRef>,
}
impl From<Function> for Environment {
fn from(f: Function) -> Self {
let func_ref = FuncRef::from_u32(0);
let mut function_name_to_func_ref = HashMap::new();
function_name_to_func_ref.insert(f.name.to_string(), func_ref);
let mut functions = HashMap::new();
functions.insert(func_ref, f);
Self {
functions,
function_name_to_func_ref,
}
}
}
impl Environment {
/// Add a function by name.
pub fn add(&mut self, name: String, function: Function) {
let func_ref = FuncRef::with_number(self.function_name_to_func_ref.len() as u32)
.expect("a valid function reference");
self.function_name_to_func_ref.insert(name, func_ref);
self.functions.insert(func_ref, function);
}
/// Retrieve a reference to a function in the environment by its name.
pub fn index_of(&self, name: &str) -> Option<FuncRef> {
self.function_name_to_func_ref.get(name).cloned()
}
/// Retrieve a function by its function reference.
pub fn get_by_func_ref(&self, func_ref: FuncRef) -> Option<&Function> {
self.functions.get(&func_ref)
}
/// Retrieve a function by its name.
pub fn get_by_name(&self, name: &str) -> Option<&Function> {
let func_ref = self.index_of(name)?;
self.get_by_func_ref(func_ref)
}
}
#[cfg(test)]
mod tests {
use super::*;
use cranelift_codegen::ir::{ExternalName, Signature};
use cranelift_codegen::isa::CallConv;
#[test]
fn addition() {
let mut env = Environment::default();
let a = "a";
let f = Function::new();
env.add(a.to_string(), f);
assert!(env.get_by_name(a).is_some());
}
#[test]
fn nonexistence() {
let env = Environment::default();
assert!(env.get_by_name("a").is_none());
}
#[test]
fn from() {
let name = ExternalName::testcase("test");
let signature = Signature::new(CallConv::Fast);
let func = Function::with_name_signature(name, signature);
let env: Environment = func.into();
assert_eq!(env.index_of("%test"), FuncRef::with_number(0));
}
}

View File

@@ -0,0 +1,128 @@
//! Implements a call frame (activation record) for the Cranelift interpreter.
use cranelift_codegen::ir::{Function, Value as ValueRef};
use cranelift_reader::DataValue;
use std::collections::HashMap;
/// Holds the mutable elements of an interpretation. At some point I thought about using
/// Cell/RefCell to do field-level mutability, thinking that otherwise I would have to
/// pass around a mutable object (for inst and registers) and an immutable one (for function,
/// could be self)--in the end I decided to do exactly that but perhaps one day that will become
/// untenable.
#[derive(Debug)]
pub struct Frame<'a> {
/// The currently executing function.
pub function: &'a Function,
/// The current mapping of SSA value-references to their actual values.
registers: HashMap<ValueRef, DataValue>,
}
impl<'a> Frame<'a> {
/// Construct a new [Frame] for a function. This allocates a slot in the hash map for each SSA
/// `Value` (renamed to `ValueRef` here) which should mean that no additional allocations are
/// needed while interpreting the frame.
pub fn new(function: &'a Function) -> Self {
Self {
function,
registers: HashMap::with_capacity(function.dfg.num_values()),
}
}
/// Construct a new [Frame] with the given `values` assigned to their corresponding slot
/// (from the SSA references in `parameters`) in the [Frame].
pub fn with_parameters(mut self, parameters: &[ValueRef], values: &[DataValue]) -> Self {
assert_eq!(parameters.len(), values.len());
for (n, v) in parameters.iter().zip(values) {
self.registers.insert(*n, v.clone());
}
self
}
/// Retrieve the actual value associated with an SSA reference.
#[inline]
pub fn get(&self, name: &ValueRef) -> &DataValue {
self.registers
.get(name)
.unwrap_or_else(|| panic!("unknown value: {}", name))
}
/// Retrieve multiple SSA references; see `get`.
pub fn get_all(&self, names: &[ValueRef]) -> Vec<DataValue> {
names.iter().map(|r| self.get(r)).cloned().collect()
}
/// Assign `value` to the SSA reference `name`.
#[inline]
pub fn set(&mut self, name: ValueRef, value: DataValue) -> Option<DataValue> {
self.registers.insert(name, value)
}
/// Assign to multiple SSA references; see `set`.
pub fn set_all(&mut self, names: &[ValueRef], values: Vec<DataValue>) {
assert_eq!(names.len(), values.len());
for (n, v) in names.iter().zip(values) {
self.set(*n, v);
}
}
/// Rename all of the SSA references in `old_names` to those in `new_names`. This will remove
/// any old references that are not in `old_names`. TODO This performs an extra allocation that
/// could be removed if we copied the values in the right order (i.e. when modifying in place,
/// we need to avoid changing a value before it is referenced).
pub fn rename(&mut self, old_names: &[ValueRef], new_names: &[ValueRef]) {
assert_eq!(old_names.len(), new_names.len());
let mut registers = HashMap::with_capacity(self.registers.len());
for (on, nn) in old_names.iter().zip(new_names) {
let v = self.registers.get(on).unwrap().clone();
registers.insert(*nn, v);
}
self.registers = registers;
}
}
#[cfg(test)]
mod tests {
use super::*;
use cranelift_codegen::ir::InstBuilder;
use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext};
use cranelift_reader::DataValue;
/// Build an empty function with a single return.
fn empty_function() -> Function {
let mut func = Function::new();
let mut context = FunctionBuilderContext::new();
let mut builder = FunctionBuilder::new(&mut func, &mut context);
let block = builder.create_block();
builder.switch_to_block(block);
builder.ins().return_(&[]);
func
}
#[test]
fn construction() {
let func = empty_function();
// Construction should not fail.
Frame::new(&func);
}
#[test]
fn assignment() {
let func = empty_function();
let mut frame = Frame::new(&func);
let a = ValueRef::with_number(1).unwrap();
let fortytwo = DataValue::I32(42);
frame.set(a, fortytwo.clone());
assert_eq!(frame.get(&a), &fortytwo);
}
#[test]
#[should_panic]
fn no_existing_value() {
let func = empty_function();
let frame = Frame::new(&func);
let a = ValueRef::with_number(1).unwrap();
frame.get(&a);
}
}

View File

@@ -0,0 +1,365 @@
//! Cranelift IR interpreter.
//!
//! This module contains the logic for interpreting Cranelift instructions.
use crate::environment::Environment;
use crate::frame::Frame;
use crate::interpreter::Trap::InvalidType;
use cranelift_codegen::ir::condcodes::IntCC;
use cranelift_codegen::ir::{
Block, FuncRef, Function, Inst, InstructionData, InstructionData::*, Opcode, Opcode::*, Type,
Value as ValueRef, ValueList,
};
use cranelift_reader::{DataValue, DataValueCastFailure};
use log::debug;
use std::ops::{Add, Sub};
use thiserror::Error;
/// The valid control flow states.
pub enum ControlFlow {
Continue,
ContinueAt(Block, Vec<ValueRef>),
Return(Vec<DataValue>),
}
impl ControlFlow {
/// For convenience, we can unwrap the [ControlFlow] state assuming that it is a
/// [ControlFlow::Return], panicking otherwise.
pub fn unwrap_return(self) -> Vec<DataValue> {
if let ControlFlow::Return(values) = self {
values
} else {
panic!("expected the control flow to be in the return state")
}
}
}
/// The ways interpretation can fail.
#[derive(Error, Debug)]
pub enum Trap {
#[error("unknown trap")]
Unknown,
#[error("invalid type for {1}: expected {0}")]
InvalidType(String, ValueRef),
#[error("invalid cast")]
InvalidCast(#[from] DataValueCastFailure),
#[error("the instruction is not implemented (perhaps for the given types): {0}")]
Unsupported(Inst),
#[error("reached an unreachable statement")]
Unreachable,
#[error("invalid control flow: {0}")]
InvalidControlFlow(String),
#[error("invalid function reference: {0}")]
InvalidFunctionReference(FuncRef),
#[error("invalid function name: {0}")]
InvalidFunctionName(String),
}
/// The Cranelift interpreter; it contains immutable elements such as the function environment and
/// implements the Cranelift IR semantics.
#[derive(Default)]
pub struct Interpreter {
pub env: Environment,
}
/// Helper for more concise matching.
macro_rules! binary_op {
( $op:path[$arg1:ident, $arg2:ident]; [ $( $data_value_ty:ident ),* ]; $inst:ident ) => {
match ($arg1, $arg2) {
$( (DataValue::$data_value_ty(a), DataValue::$data_value_ty(b)) => { Ok(DataValue::$data_value_ty($op(a, b))) } )*
_ => Err(Trap::Unsupported($inst)),
}
};
}
impl Interpreter {
/// Construct a new [Interpreter] using the given [Environment].
pub fn new(env: Environment) -> Self {
Self { env }
}
/// Call a function by name; this is a helpful proxy for [Interpreter::call_by_index].
pub fn call_by_name(
&self,
func_name: &str,
arguments: &[DataValue],
) -> Result<ControlFlow, Trap> {
let func_ref = self
.env
.index_of(func_name)
.ok_or_else(|| Trap::InvalidFunctionName(func_name.to_string()))?;
self.call_by_index(func_ref, arguments)
}
/// Call a function by its index in the [Environment]; this is a proxy for [Interpreter::call].
pub fn call_by_index(
&self,
func_ref: FuncRef,
arguments: &[DataValue],
) -> Result<ControlFlow, Trap> {
match self.env.get_by_func_ref(func_ref) {
None => Err(Trap::InvalidFunctionReference(func_ref)),
Some(func) => self.call(func, arguments),
}
}
/// Interpret a call to a [Function] given its [DataValue] arguments.
fn call(&self, function: &Function, arguments: &[DataValue]) -> Result<ControlFlow, Trap> {
debug!("Call: {}({:?})", function.name, arguments);
let first_block = function
.layout
.blocks()
.next()
.expect("to have a first block");
let parameters = function.dfg.block_params(first_block);
let mut frame = Frame::new(function).with_parameters(parameters, arguments);
self.block(&mut frame, first_block)
}
/// Interpret a single [Block] in a [Function].
fn block(&self, frame: &mut Frame, block: Block) -> Result<ControlFlow, Trap> {
debug!("Block: {}", block);
for inst in frame.function.layout.block_insts(block) {
match self.inst(frame, inst)? {
ControlFlow::Continue => continue,
ControlFlow::ContinueAt(block, old_names) => {
let new_names = frame.function.dfg.block_params(block);
frame.rename(&old_names, new_names);
return self.block(frame, block);
}
ControlFlow::Return(rs) => return Ok(ControlFlow::Return(rs)),
}
}
Err(Trap::Unreachable)
}
/// Interpret a single [instruction](Inst). This contains a `match`-based dispatch to the
/// implementations.
fn inst(&self, frame: &mut Frame, inst: Inst) -> Result<ControlFlow, Trap> {
use ControlFlow::{Continue, ContinueAt};
debug!("Inst: {}", &frame.function.dfg.display_inst(inst, None));
let data = &frame.function.dfg[inst];
match data {
Binary { opcode, args } => {
let arg1 = frame.get(&args[0]);
let arg2 = frame.get(&args[1]);
let result = match opcode {
Iadd => binary_op!(Add::add[arg1, arg2]; [I8, I16, I32, I64, F32, F64]; inst),
Isub => binary_op!(Sub::sub[arg1, arg2]; [I8, I16, I32, I64, F32, F64]; inst),
_ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
}?;
frame.set(first_result(frame.function, inst), result);
Ok(Continue)
}
BinaryImm { opcode, arg, imm } => {
let imm = DataValue::from_integer(*imm, type_of(*arg, frame.function))?;
let arg = frame.get(&arg);
let result = match opcode {
IaddImm => binary_op!(Add::add[arg, imm]; [I8, I16, I32, I64, F32, F64]; inst),
IrsubImm => binary_op!(Sub::sub[arg, imm]; [I8, I16, I32, I64, F32, F64]; inst),
_ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
}?;
frame.set(first_result(frame.function, inst), result);
Ok(Continue)
}
Branch {
opcode,
args,
destination,
} => match opcode {
Brnz => {
let mut args = value_refs(frame.function, args);
let first = args.remove(0);
match frame.get(&first) {
DataValue::B(false)
| DataValue::I8(0)
| DataValue::I16(0)
| DataValue::I32(0)
| DataValue::I64(0) => Ok(Continue),
DataValue::B(true)
| DataValue::I8(_)
| DataValue::I16(_)
| DataValue::I32(_)
| DataValue::I64(_) => Ok(ContinueAt(*destination, args)),
_ => Err(Trap::InvalidType("boolean or integer".to_string(), args[0])),
}
}
_ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
},
InstructionData::Call { args, func_ref, .. } => {
// Find the function to call.
let func_name = function_name_of_func_ref(*func_ref, frame.function);
// Call function.
let args = frame.get_all(args.as_slice(&frame.function.dfg.value_lists));
let result = self.call_by_name(&func_name, &args)?;
// Save results.
if let ControlFlow::Return(returned_values) = result {
let ssa_values = frame.function.dfg.inst_results(inst);
assert_eq!(
ssa_values.len(),
returned_values.len(),
"expected result length ({}) to match SSA values length ({}): {}",
returned_values.len(),
ssa_values.len(),
frame.function.dfg.display_inst(inst, None)
);
frame.set_all(ssa_values, returned_values);
Ok(Continue)
} else {
Err(Trap::InvalidControlFlow(format!(
"did not return from: {}",
frame.function.dfg.display_inst(inst, None)
)))
}
}
InstructionData::Jump {
opcode,
destination,
args,
} => match opcode {
Opcode::Fallthrough => {
Ok(ContinueAt(*destination, value_refs(frame.function, args)))
}
Opcode::Jump => Ok(ContinueAt(*destination, value_refs(frame.function, args))),
_ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
},
IntCompareImm {
opcode,
arg,
cond,
imm,
} => match opcode {
IcmpImm => {
let arg_value = match *frame.get(arg) {
DataValue::I8(i) => Ok(i as i64),
DataValue::I16(i) => Ok(i as i64),
DataValue::I32(i) => Ok(i as i64),
DataValue::I64(i) => Ok(i),
_ => Err(InvalidType("integer".to_string(), *arg)),
}?;
let imm_value = (*imm).into();
let result = match cond {
IntCC::UnsignedLessThanOrEqual => arg_value <= imm_value,
IntCC::Equal => arg_value == imm_value,
_ => unimplemented!(
"interpreter does not support condition code yet: {}",
cond
),
};
let res = first_result(frame.function, inst);
frame.set(res, DataValue::B(result));
Ok(Continue)
}
_ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
},
MultiAry { opcode, args } => match opcode {
Return => {
let rs: Vec<DataValue> = args
.as_slice(&frame.function.dfg.value_lists)
.iter()
.map(|r| frame.get(r).clone())
.collect();
Ok(ControlFlow::Return(rs))
}
_ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
},
NullAry { opcode } => match opcode {
Nop => Ok(Continue),
_ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
},
UnaryImm { opcode, imm } => match opcode {
Iconst => {
let res = first_result(frame.function, inst);
let imm_value = DataValue::from_integer(*imm, type_of(res, frame.function))?;
frame.set(res, imm_value);
Ok(Continue)
}
_ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
},
UnaryBool { opcode, imm } => match opcode {
Bconst => {
let res = first_result(frame.function, inst);
frame.set(res, DataValue::B(*imm));
Ok(Continue)
}
_ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
},
_ => unimplemented!("interpreter does not support instruction yet: {:?}", data),
}
}
}
/// Return the first result of an instruction.
///
/// This helper cushions the interpreter from changes to the [Function] API.
#[inline]
fn first_result(function: &Function, inst: Inst) -> ValueRef {
function.dfg.first_result(inst)
}
/// Return a list of IR values as a vector.
///
/// This helper cushions the interpreter from changes to the [Function] API.
#[inline]
fn value_refs(function: &Function, args: &ValueList) -> Vec<ValueRef> {
args.as_slice(&function.dfg.value_lists).to_vec()
}
/// Return the (external) function name of `func_ref` in a local `function`. Note that this may
/// be truncated.
///
/// This helper cushions the interpreter from changes to the [Function] API.
#[inline]
fn function_name_of_func_ref(func_ref: FuncRef, function: &Function) -> String {
function
.dfg
.ext_funcs
.get(func_ref)
.expect("function to exist")
.name
.to_string()
}
/// Helper for calculating the type of an IR value. TODO move to Frame?
#[inline]
fn type_of(value: ValueRef, function: &Function) -> Type {
function.dfg.value_type(value)
}
#[cfg(test)]
mod tests {
use super::*;
use cranelift_reader::parse_functions;
// Most interpreter tests should use the more ergonomic `test interpret` filetest but this
// unit test serves as a sanity check that the interpreter still works without all of the
// filetest infrastructure.
#[test]
fn sanity() {
let code = "function %test() -> b1 {
block0:
v0 = iconst.i32 40
v1 = iadd_imm v0, 3
v2 = irsub_imm v1, 1
v3 = icmp_imm eq v2, 42
return v3
}";
let func = parse_functions(code).unwrap().into_iter().next().unwrap();
let mut env = Environment::default();
env.add(func.name.to_string(), func);
let interpreter = Interpreter::new(env);
let result = interpreter
.call_by_name("%test", &[])
.unwrap()
.unwrap_return();
assert_eq!(result, vec![DataValue::B(true)])
}
}

View File

@@ -0,0 +1,7 @@
//! Cranelift IR interpreter.
//!
//! This module is a project for interpreting Cranelift IR.
pub mod environment;
pub mod frame;
pub mod interpreter;