Rewrite interpreter generically (#2323)

* Rewrite interpreter generically This change re-implements the Cranelift interpreter to use generic values; this makes it possible to do abstract interpretation of Cranelift instructions. In doing so, the interpretation state is extracted from the `Interpreter` structure and is accessed via a `State` trait; this makes it possible to not only more clearly observe the interpreter's state but also to interpret using a dummy state (e.g. `ImmutableRegisterState`). This addition made it possible to implement more of the Cranelift instructions (~70%, ignoring the x86-specific instructions). * Replace macros with closures
2020-11-02 12:28:07 -08:00
parent 59a2ce4d34
commit 6d50099816
16 changed files with 1590 additions and 342 deletions
--- a/cranelift/interpreter/src/interpreter.rs
+++ b/cranelift/interpreter/src/interpreter.rs
@@ -1,110 +1,65 @@
 //! Cranelift IR interpreter.
 //!
-//! This module contains the logic for interpreting Cranelift instructions.
+//! This module partially contains the logic for interpreting Cranelift IR.

-use crate::environment::Environment;
+use crate::environment::FunctionStore;
 use crate::frame::Frame;
-use crate::interpreter::Trap::InvalidType;
-use cranelift_codegen::data_value::{DataValue, DataValueCastFailure};
-use cranelift_codegen::ir::condcodes::IntCC;
-use cranelift_codegen::ir::{
-    Block, FuncRef, Function, Inst, InstructionData, InstructionData::*, Opcode, Opcode::*, Type,
-    Value as ValueRef, ValueList,
-};
+use crate::instruction::DfgInstructionContext;
+use crate::state::{MemoryError, State};
+use crate::step::{step, ControlFlow, StepError};
+use crate::value::ValueError;
+use cranelift_codegen::data_value::DataValue;
+use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
+use cranelift_codegen::ir::{Block, FuncRef, Function, Type, Value as ValueRef};
 use log::trace;
-use std::ops::{Add, Mul, Sub};
+use std::collections::HashSet;
+use std::fmt::Debug;
 use thiserror::Error;

-/// The valid control flow states.
-pub enum ControlFlow {
-    Continue,
-    ContinueAt(Block, Vec<ValueRef>),
-    Return(Vec<DataValue>),
+/// The Cranelift interpreter; this contains some high-level functions to control the interpreter's
+/// flow. The interpreter state is defined separately (see [InterpreterState]) as the execution
+/// semantics for each Cranelift instruction (see [step]).
+pub struct Interpreter<'a> {
+    state: InterpreterState<'a>,
 }

-impl ControlFlow {
-    /// For convenience, we can unwrap the [ControlFlow] state assuming that it is a
-    /// [ControlFlow::Return], panicking otherwise.
-    pub fn unwrap_return(self) -> Vec<DataValue> {
-        if let ControlFlow::Return(values) = self {
-            values
-        } else {
-            panic!("expected the control flow to be in the return state")
-        }
-    }
-}
-
-/// The ways interpretation can fail.
-#[derive(Error, Debug)]
-pub enum Trap {
-    #[error("unknown trap")]
-    Unknown,
-    #[error("invalid type for {1}: expected {0}")]
-    InvalidType(String, ValueRef),
-    #[error("invalid cast")]
-    InvalidCast(#[from] DataValueCastFailure),
-    #[error("the instruction is not implemented (perhaps for the given types): {0}")]
-    Unsupported(Inst),
-    #[error("reached an unreachable statement")]
-    Unreachable,
-    #[error("invalid control flow: {0}")]
-    InvalidControlFlow(String),
-    #[error("invalid function reference: {0}")]
-    InvalidFunctionReference(FuncRef),
-    #[error("invalid function name: {0}")]
-    InvalidFunctionName(String),
-}
-
-/// The Cranelift interpreter; it contains immutable elements such as the function environment and
-/// implements the Cranelift IR semantics.
-#[derive(Default)]
-pub struct Interpreter {
-    pub env: Environment,
-}
-
-/// Helper for more concise matching.
-macro_rules! binary_op {
-    ( $op:path[$arg1:ident, $arg2:ident]; [ $( $data_value_ty:ident ),* ]; $inst:ident ) => {
-        match ($arg1, $arg2) {
-            $( (DataValue::$data_value_ty(a), DataValue::$data_value_ty(b)) => { Ok(DataValue::$data_value_ty($op(a, b))) } )*
-            _ => Err(Trap::Unsupported($inst)),
-        }
-    };
-}
-
-impl Interpreter {
-    /// Construct a new [Interpreter] using the given [Environment].
-    pub fn new(env: Environment) -> Self {
-        Self { env }
+impl<'a> Interpreter<'a> {
+    pub fn new(state: InterpreterState<'a>) -> Self {
+        Self { state }
    }

    /// Call a function by name; this is a helpful proxy for [Interpreter::call_by_index].
    pub fn call_by_name(
-        &self,
+        &mut self,
        func_name: &str,
        arguments: &[DataValue],
-    ) -> Result<ControlFlow, Trap> {
+    ) -> Result<ControlFlow<'a, DataValue>, InterpreterError> {
        let func_ref = self
-            .env
+            .state
+            .functions
            .index_of(func_name)
-            .ok_or_else(|| Trap::InvalidFunctionName(func_name.to_string()))?;
+            .ok_or_else(|| InterpreterError::UnknownFunctionName(func_name.to_string()))?;
        self.call_by_index(func_ref, arguments)
    }

-    /// Call a function by its index in the [Environment]; this is a proxy for [Interpreter::call].
+    /// Call a function by its index in the [FunctionStore]; this is a proxy for [Interpreter::call].
    pub fn call_by_index(
-        &self,
+        &mut self,
        func_ref: FuncRef,
        arguments: &[DataValue],
-    ) -> Result<ControlFlow, Trap> {
-        match self.env.get_by_func_ref(func_ref) {
-            None => Err(Trap::InvalidFunctionReference(func_ref)),
+    ) -> Result<ControlFlow<'a, DataValue>, InterpreterError> {
+        match self.state.get_function(func_ref) {
+            None => Err(InterpreterError::UnknownFunctionReference(func_ref)),
            Some(func) => self.call(func, arguments),
        }
    }

    /// Interpret a call to a [Function] given its [DataValue] arguments.
-    fn call(&self, function: &Function, arguments: &[DataValue]) -> Result<ControlFlow, Trap> {
+    fn call(
+        &mut self,
+        function: &'a Function,
+        arguments: &[DataValue],
+    ) -> Result<ControlFlow<'a, DataValue>, InterpreterError> {
        trace!("Call: {}({:?})", function.name, arguments);
        let first_block = function
            .layout
@@ -112,241 +67,184 @@ impl Interpreter {
            .next()
            .expect("to have a first block");
        let parameters = function.dfg.block_params(first_block);
-        let mut frame = Frame::new(function);
-        frame.set_all(parameters, arguments.to_vec());
-        self.block(&mut frame, first_block)
+        self.state.push_frame(function);
+        self.state
+            .current_frame_mut()
+            .set_all(parameters, arguments.to_vec());
+        self.block(first_block)
    }

    /// Interpret a [Block] in a [Function]. This drives the interpretation over sequences of
    /// instructions, which may continue in other blocks, until the function returns.
-    fn block(&self, frame: &mut Frame, block: Block) -> Result<ControlFlow, Trap> {
+    fn block(&mut self, block: Block) -> Result<ControlFlow<'a, DataValue>, InterpreterError> {
        trace!("Block: {}", block);
-        let layout = &frame.function.layout;
+        let function = self.state.current_frame_mut().function;
+        let layout = &function.layout;
        let mut maybe_inst = layout.first_inst(block);
        while let Some(inst) = maybe_inst {
-            match self.inst(frame, inst)? {
+            let inst_context = DfgInstructionContext::new(inst, &function.dfg);
+            match step(&mut self.state, inst_context)? {
+                ControlFlow::Assign(values) => {
+                    self.state
+                        .current_frame_mut()
+                        .set_all(function.dfg.inst_results(inst), values.to_vec());
+                    maybe_inst = layout.next_inst(inst)
+                }
                ControlFlow::Continue => maybe_inst = layout.next_inst(inst),
-                ControlFlow::ContinueAt(block, old_names) => {
+                ControlFlow::ContinueAt(block, block_arguments) => {
                    trace!("Block: {}", block);
-                    let new_names = frame.function.dfg.block_params(block);
-                    frame.rename(&old_names, new_names);
+                    self.state
+                        .current_frame_mut()
+                        .set_all(function.dfg.block_params(block), block_arguments.to_vec());
                    maybe_inst = layout.first_inst(block)
                }
-                ControlFlow::Return(rs) => return Ok(ControlFlow::Return(rs)),
+                ControlFlow::Call(function, arguments) => {
+                    let returned_arguments = self.call(function, &arguments)?.unwrap_return();
+                    self.state
+                        .current_frame_mut()
+                        .set_all(function.dfg.inst_results(inst), returned_arguments);
+                    maybe_inst = layout.next_inst(inst)
+                }
+                ControlFlow::Return(returned_values) => {
+                    self.state.pop_frame();
+                    return Ok(ControlFlow::Return(returned_values));
+                }
+                ControlFlow::Trap(trap) => return Ok(ControlFlow::Trap(trap)),
            }
        }
-        Err(Trap::Unreachable)
+        Err(InterpreterError::Unreachable)
    }
+}

-    /// Interpret a single [instruction](Inst). This contains a `match`-based dispatch to the
-    /// implementations.
-    fn inst(&self, frame: &mut Frame, inst: Inst) -> Result<ControlFlow, Trap> {
-        use ControlFlow::{Continue, ContinueAt};
-        trace!("Inst: {}", &frame.function.dfg.display_inst(inst, None));
+/// The ways interpretation can fail.
+#[derive(Error, Debug)]
+pub enum InterpreterError {
+    #[error("failed to interpret instruction")]
+    StepError(#[from] StepError),
+    #[error("reached an unreachable statement")]
+    Unreachable,
+    #[error("unknown function reference (has it been added to the function store?): {0}")]
+    UnknownFunctionReference(FuncRef),
+    #[error("unknown function with name (has it been added to the function store?): {0}")]
+    UnknownFunctionName(String),
+    #[error("value error")]
+    ValueError(#[from] ValueError),
+}

-        let data = &frame.function.dfg[inst];
-        match data {
-            Binary { opcode, args } => {
-                let arg1 = frame.get(&args[0]);
-                let arg2 = frame.get(&args[1]);
-                let result = match opcode {
-                    Iadd => binary_op!(Add::add[arg1, arg2]; [I8, I16, I32, I64]; inst),
-                    Isub => binary_op!(Sub::sub[arg1, arg2]; [I8, I16, I32, I64]; inst),
-                    Imul => binary_op!(Mul::mul[arg1, arg2]; [I8, I16, I32, I64]; inst),
-                    // TODO re-enable by importing something like rustc_apfloat for correctness.
-                    // Fadd => binary_op!(Add::add[arg1, arg2]; [F32, F64]; inst),
-                    // Fsub => binary_op!(Sub::sub[arg1, arg2]; [F32, F64]; inst),
-                    // Fmul => binary_op!(Mul::mul[arg1, arg2]; [F32, F64]; inst),
-                    // Fdiv => binary_op!(Div::div[arg1, arg2]; [F32, F64]; inst),
-                    _ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
-                }?;
-                frame.set(first_result(frame.function, inst), result);
-                Ok(Continue)
-            }
+/// Maintains the [Interpreter]'s state, implementing the [State] trait.
+pub struct InterpreterState<'a> {
+    pub functions: FunctionStore<'a>,
+    pub frame_stack: Vec<Frame<'a>>,
+    pub heap: Vec<u8>,
+    pub iflags: HashSet<IntCC>,
+    pub fflags: HashSet<FloatCC>,
+}

-            BinaryImm64 { opcode, arg, imm } => {
-                let imm = DataValue::from_integer(*imm, type_of(*arg, frame.function))?;
-                let arg = frame.get(&arg);
-                let result = match opcode {
-                    IaddImm => binary_op!(Add::add[arg, imm]; [I8, I16, I32, I64]; inst),
-                    IrsubImm => binary_op!(Sub::sub[imm, arg]; [I8, I16, I32, I64]; inst),
-                    ImulImm => binary_op!(Mul::mul[arg, imm]; [I8, I16, I32, I64]; inst),
-                    _ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
-                }?;
-                frame.set(first_result(frame.function, inst), result);
-                Ok(Continue)
-            }
-
-            Branch {
-                opcode,
-                args,
-                destination,
-            } => match opcode {
-                Brnz => {
-                    let mut args = value_refs(frame.function, args);
-                    let first = args.remove(0);
-                    match frame.get(&first) {
-                        DataValue::B(false)
-                        | DataValue::I8(0)
-                        | DataValue::I16(0)
-                        | DataValue::I32(0)
-                        | DataValue::I64(0) => Ok(Continue),
-                        DataValue::B(true)
-                        | DataValue::I8(_)
-                        | DataValue::I16(_)
-                        | DataValue::I32(_)
-                        | DataValue::I64(_) => Ok(ContinueAt(*destination, args)),
-                        _ => Err(Trap::InvalidType("boolean or integer".to_string(), args[0])),
-                    }
-                }
-                _ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
-            },
-            InstructionData::Call { args, func_ref, .. } => {
-                // Find the function to call.
-                let func_name = function_name_of_func_ref(*func_ref, frame.function);
-
-                // Call function.
-                let args = frame.get_all(args.as_slice(&frame.function.dfg.value_lists));
-                let result = self.call_by_name(&func_name, &args)?;
-
-                // Save results.
-                if let ControlFlow::Return(returned_values) = result {
-                    let ssa_values = frame.function.dfg.inst_results(inst);
-                    assert_eq!(
-                        ssa_values.len(),
-                        returned_values.len(),
-                        "expected result length ({}) to match SSA values length ({}): {}",
-                        returned_values.len(),
-                        ssa_values.len(),
-                        frame.function.dfg.display_inst(inst, None)
-                    );
-                    frame.set_all(ssa_values, returned_values);
-                    Ok(Continue)
-                } else {
-                    Err(Trap::InvalidControlFlow(format!(
-                        "did not return from: {}",
-                        frame.function.dfg.display_inst(inst, None)
-                    )))
-                }
-            }
-            InstructionData::Jump {
-                opcode,
-                destination,
-                args,
-            } => match opcode {
-                Opcode::Fallthrough => {
-                    Ok(ContinueAt(*destination, value_refs(frame.function, args)))
-                }
-                Opcode::Jump => Ok(ContinueAt(*destination, value_refs(frame.function, args))),
-                _ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
-            },
-            IntCompareImm {
-                opcode,
-                arg,
-                cond,
-                imm,
-            } => match opcode {
-                IcmpImm => {
-                    let arg_value = match *frame.get(arg) {
-                        DataValue::I8(i) => Ok(i as i64),
-                        DataValue::I16(i) => Ok(i as i64),
-                        DataValue::I32(i) => Ok(i as i64),
-                        DataValue::I64(i) => Ok(i),
-                        _ => Err(InvalidType("integer".to_string(), *arg)),
-                    }?;
-                    let imm_value = (*imm).into();
-                    let result = match cond {
-                        IntCC::UnsignedLessThanOrEqual => arg_value <= imm_value,
-                        IntCC::Equal => arg_value == imm_value,
-                        _ => unimplemented!(
-                            "interpreter does not support condition code yet: {}",
-                            cond
-                        ),
-                    };
-                    let res = first_result(frame.function, inst);
-                    frame.set(res, DataValue::B(result));
-                    Ok(Continue)
-                }
-                _ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
-            },
-            MultiAry { opcode, args } => match opcode {
-                Return => {
-                    let rs: Vec<DataValue> = args
-                        .as_slice(&frame.function.dfg.value_lists)
-                        .iter()
-                        .map(|r| frame.get(r).clone())
-                        .collect();
-                    Ok(ControlFlow::Return(rs))
-                }
-                _ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
-            },
-            NullAry { opcode } => match opcode {
-                Nop => Ok(Continue),
-                _ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
-            },
-            UnaryImm { opcode, imm } => match opcode {
-                Iconst => {
-                    let res = first_result(frame.function, inst);
-                    let imm_value = DataValue::from_integer(*imm, type_of(res, frame.function))?;
-                    frame.set(res, imm_value);
-                    Ok(Continue)
-                }
-                _ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
-            },
-            UnaryBool { opcode, imm } => match opcode {
-                Bconst => {
-                    let res = first_result(frame.function, inst);
-                    frame.set(res, DataValue::B(*imm));
-                    Ok(Continue)
-                }
-                _ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
-            },
-
-            _ => unimplemented!("interpreter does not support instruction yet: {:?}", data),
+impl Default for InterpreterState<'_> {
+    fn default() -> Self {
+        Self {
+            functions: FunctionStore::default(),
+            frame_stack: vec![],
+            heap: vec![0; 1024],
+            iflags: HashSet::new(),
+            fflags: HashSet::new(),
        }
    }
 }

-/// Return the first result of an instruction.
-///
-/// This helper cushions the interpreter from changes to the [Function] API.
-#[inline]
-fn first_result(function: &Function, inst: Inst) -> ValueRef {
-    function.dfg.first_result(inst)
+impl<'a> InterpreterState<'a> {
+    pub fn with_function_store(self, functions: FunctionStore<'a>) -> Self {
+        Self { functions, ..self }
+    }
+
+    fn current_frame_mut(&mut self) -> &mut Frame<'a> {
+        let num_frames = self.frame_stack.len();
+        match num_frames {
+            0 => panic!("unable to retrieve the current frame because no frames were pushed"),
+            _ => &mut self.frame_stack[num_frames - 1],
+        }
+    }
+
+    fn current_frame(&self) -> &Frame<'a> {
+        let num_frames = self.frame_stack.len();
+        match num_frames {
+            0 => panic!("unable to retrieve the current frame because no frames were pushed"),
+            _ => &self.frame_stack[num_frames - 1],
+        }
+    }
 }

-/// Return a list of IR values as a vector.
-///
-/// This helper cushions the interpreter from changes to the [Function] API.
-#[inline]
-fn value_refs(function: &Function, args: &ValueList) -> Vec<ValueRef> {
-    args.as_slice(&function.dfg.value_lists).to_vec()
-}
+impl<'a> State<'a, DataValue> for InterpreterState<'a> {
+    fn get_function(&self, func_ref: FuncRef) -> Option<&'a Function> {
+        self.functions.get_by_func_ref(func_ref)
+    }
+    fn push_frame(&mut self, function: &'a Function) {
+        self.frame_stack.push(Frame::new(function));
+    }
+    fn pop_frame(&mut self) {
+        self.frame_stack.pop();
+    }

-/// Return the (external) function name of `func_ref` in a local `function`. Note that this may
-/// be truncated.
-///
-/// This helper cushions the interpreter from changes to the [Function] API.
-#[inline]
-fn function_name_of_func_ref(func_ref: FuncRef, function: &Function) -> String {
-    function
-        .dfg
-        .ext_funcs
-        .get(func_ref)
-        .expect("function to exist")
-        .name
-        .to_string()
-}
+    fn get_value(&self, name: ValueRef) -> Option<DataValue> {
+        Some(self.current_frame().get(name).clone()) // TODO avoid clone?
+    }

-/// Helper for calculating the type of an IR value. TODO move to Frame?
-#[inline]
-fn type_of(value: ValueRef, function: &Function) -> Type {
-    function.dfg.value_type(value)
+    fn set_value(&mut self, name: ValueRef, value: DataValue) -> Option<DataValue> {
+        self.current_frame_mut().set(name, value)
+    }
+
+    fn has_iflag(&self, flag: IntCC) -> bool {
+        self.iflags.contains(&flag)
+    }
+
+    fn has_fflag(&self, flag: FloatCC) -> bool {
+        self.fflags.contains(&flag)
+    }
+
+    fn set_iflag(&mut self, flag: IntCC) {
+        self.iflags.insert(flag);
+    }
+
+    fn set_fflag(&mut self, flag: FloatCC) {
+        self.fflags.insert(flag);
+    }
+
+    fn clear_flags(&mut self) {
+        self.iflags.clear();
+        self.fflags.clear()
+    }
+
+    fn load_heap(&self, offset: usize, ty: Type) -> Result<DataValue, MemoryError> {
+        if offset + 16 < self.heap.len() {
+            let pointer = self.heap[offset..offset + 16].as_ptr() as *const _ as *const u128;
+            Ok(unsafe { DataValue::read_value_from(pointer, ty) })
+        } else {
+            Err(MemoryError::InsufficientMemory(offset, self.heap.len()))
+        }
+    }
+
+    fn store_heap(&mut self, offset: usize, v: DataValue) -> Result<(), MemoryError> {
+        if offset + 16 < self.heap.len() {
+            let pointer = self.heap[offset..offset + 16].as_mut_ptr() as *mut _ as *mut u128;
+            Ok(unsafe { v.write_value_to(pointer) })
+        } else {
+            Err(MemoryError::InsufficientMemory(offset, self.heap.len()))
+        }
+    }
+
+    fn load_stack(&self, _offset: usize, _ty: Type) -> Result<DataValue, MemoryError> {
+        unimplemented!()
+    }
+
+    fn store_stack(&mut self, _offset: usize, _v: DataValue) -> Result<(), MemoryError> {
+        unimplemented!()
+    }
 }

 #[cfg(test)]
 mod tests {
    use super::*;
+    use cranelift_codegen::ir::immediates::Ieee32;
    use cranelift_reader::parse_functions;

    // Most interpreter tests should use the more ergonomic `test interpret` filetest but this
@@ -364,14 +262,39 @@ mod tests {
        }";

        let func = parse_functions(code).unwrap().into_iter().next().unwrap();
-        let mut env = Environment::default();
-        env.add(func.name.to_string(), func);
-        let interpreter = Interpreter::new(env);
-        let result = interpreter
+        let mut env = FunctionStore::default();
+        env.add(func.name.to_string(), &func);
+        let state = InterpreterState::default().with_function_store(env);
+        let result = Interpreter::new(state)
            .call_by_name("%test", &[])
            .unwrap()
            .unwrap_return();

        assert_eq!(result, vec![DataValue::B(true)])
    }
+
+    #[test]
+    fn state_heap_roundtrip() -> Result<(), MemoryError> {
+        let mut state = InterpreterState::default();
+        let mut roundtrip = |dv: DataValue| {
+            state.store_heap(0, dv.clone())?;
+            assert_eq!(dv, state.load_heap(0, dv.ty())?);
+            Ok(())
+        };
+
+        roundtrip(DataValue::B(true))?;
+        roundtrip(DataValue::I64(42))?;
+        roundtrip(DataValue::F32(Ieee32::from(0.42)))
+    }
+
+    #[test]
+    fn state_flags() {
+        let mut state = InterpreterState::default();
+        let flag = IntCC::Overflow;
+        assert!(!state.has_iflag(flag));
+        state.set_iflag(flag);
+        assert!(state.has_iflag(flag));
+        state.clear_flags();
+        assert!(!state.has_iflag(flag));
+    }
 }