cranelift: Add stack support to the interpreter with virtual addresses (#3187)

* cranelift: Add stack support to the interpreter We also change the approach for heap loads and stores. Previously we would use the offset as the address to the heap. However, this approach does not allow using the load/store instructions to read/write from both the heap and the stack. This commit changes the addressing mechanism of the interpreter. We now return the real addresses from the addressing instructions (stack_addr/heap_addr), and instead check if the address passed into the load/store instructions points to an area in the heap or the stack. * cranelift: Add virtual addresses to cranelift interpreter Adds a Virtual Addressing scheme that was discussed as a better alternative to returning the real addresses. The virtual addresses are split into 4 regions (stack, heap, tables and global values), and the address itself is composed of an `entry` field and an `offset` field. In general the `entry` field corresponds to the instance of the resource (e.g. table5 is entry 5) and the `offset` field is a byte offset inside that entry. There is one exception to this which is the stack, where due to only having one stack, the whole address is an offset field. The number of bits in entry vs offset fields is variable with respect to the `region` and the address size (32bits vs 64bits). This is done because with 32 bit addresses we would have to compromise on heap size, or have a small number of global values / tables. With 64 bit addresses we do not have to compromise on this, but we need to support 32 bit addresses. * cranelift: Remove interpreter trap codes * cranelift: Calculate frame_offset when entering or exiting a frame * cranelift: Add safe read/write interface to DataValue * cranelift: DataValue write full 128bit slot for booleans * cranelift: Use DataValue accessors for trampoline.
2021-08-24 17:29:11 +01:00
parent f4ff7c350a
commit 2776074dfc
13 changed files with 1094 additions and 157 deletions
--- a/cranelift/filetests/filetests/runtests/stack-addr-32.clif
+++ b/cranelift/filetests/filetests/runtests/stack-addr-32.clif
@@ -0,0 +1,85 @@
+test interpret
+
+function %stack_addr_iadd(i64) -> b1 {
+    ss0 = explicit_slot 16
+
+block0(v0: i64):
+    v1 = stack_addr.i32 ss0
+    v2 = iadd_imm.i32 v1, 8
+
+    stack_store.i64 v0, ss0+8
+    v3 = load.i64 v2
+
+    v5 = iadd_imm.i64 v0, 20
+    store.i64 v5, v2
+    v6 = stack_load.i64 ss0+8
+
+    v7 = icmp eq v0, v3
+    v8 = icmp eq v5, v6
+    v9 = band v7, v8
+    return v9
+}
+; run: %stack_addr_iadd(0) == true
+; run: %stack_addr_iadd(1) == true
+; run: %stack_addr_iadd(-1) == true
+
+
+function %stack_addr_32(i64) -> b1 {
+    ss0 = explicit_slot 24
+
+block0(v0: i64):
+    v1 = stack_addr.i32 ss0
+    stack_store.i64 v0, ss0
+    v2 = load.i64 v1
+    v3 = icmp eq v0, v2
+
+    v4 = stack_addr.i32 ss0+8
+    store.i64 v0, v4
+    v5 = stack_load.i64 ss0+8
+    v6 = icmp eq v0, v5
+
+    v7 = stack_addr.i32 ss0+16
+    store.i64 v0, v7
+    v8 = load.i64 v7
+    v9 = icmp eq v0, v8
+
+    v10 = band v3, v6
+    v11 = band v10, v9
+    return v11
+}
+; run: %stack_addr_32(0) == true
+; run: %stack_addr_32(1) == true
+; run: %stack_addr_32(-1) == true
+
+
+
+function %addr32_64(i64) -> b1 {
+    ss0 = explicit_slot 16
+
+block0(v0: i64):
+    v1 = stack_addr.i32 ss0+8
+    v2 = stack_addr.i64 ss0+8
+
+    store.i64 v0, v1
+    v3 = load.i64 v2
+
+    v4 = icmp eq v3, v0
+
+    return v4
+}
+; run: %addr32_64(0) == true
+; run: %addr32_64(1) == true
+; run: %addr32_64(-1) == true
+
+
+function %multi_slot_different_addrs() -> b1 {
+    ss0 = explicit_slot 8
+    ss1 = explicit_slot 8
+
+block0:
+    v0 = stack_addr.i32 ss0
+    v1 = stack_addr.i32 ss1
+    v2 = icmp ne v0, v1
+    return v2
+}
+; run: %multi_slot_diffe() == true
--- a/cranelift/filetests/filetests/runtests/stack-addr-64.clif
+++ b/cranelift/filetests/filetests/runtests/stack-addr-64.clif
@@ -0,0 +1,56 @@
+test interpret
+test run
+target x86_64 machinst
+target s390x
+target aarch64
+
+
+function %stack_addr_iadd(i64) -> b1 {
+    ss0 = explicit_slot 16
+
+block0(v0: i64):
+    v1 = stack_addr.i64 ss0
+    v2 = iadd_imm.i64 v1, 8
+
+    stack_store.i64 v0, ss0+8
+    v3 = load.i64 v2
+
+    v5 = iadd_imm.i64 v0, 20
+    store.i64 v5, v2
+    v6 = stack_load.i64 ss0+8
+
+    v7 = icmp eq v0, v3
+    v8 = icmp eq v5, v6
+    v9 = band v7, v8
+    return v9
+}
+; run: %stack_addr_iadd(0) == true
+; run: %stack_addr_iadd(1) == true
+; run: %stack_addr_iadd(-1) == true
+
+function %stack_addr_64(i64) -> b1 {
+    ss0 = explicit_slot 24
+
+block0(v0: i64):
+    v1 = stack_addr.i64 ss0
+    stack_store.i64 v0, ss0
+    v2 = load.i64 v1
+    v3 = icmp eq v0, v2
+
+    v4 = stack_addr.i64 ss0+8
+    store.i64 v0, v4
+    v5 = stack_load.i64 ss0+8
+    v6 = icmp eq v0, v5
+
+    v7 = stack_addr.i64 ss0+16
+    store.i64 v0, v7
+    v8 = load.i64 v7
+    v9 = icmp eq v0, v8
+
+    v10 = band v3, v6
+    v11 = band v10, v9
+    return v11
+}
+; run: %stack_addr_64(0) == true
+; run: %stack_addr_64(1) == true
+; run: %stack_addr_64(-1) == true
--- a/cranelift/filetests/filetests/runtests/stack.clif
+++ b/cranelift/filetests/filetests/runtests/stack.clif
@@ -0,0 +1,130 @@
+test interpret
+test run
+target x86_64 machinst
+target s390x
+target aarch64
+
+function %stack_simple(i64) -> i64 {
+    ss0 = explicit_slot 8
+
+block0(v0: i64):
+    stack_store.i64 v0, ss0
+    v1 = stack_load.i64 ss0
+    return v1
+}
+; run: %stack_simple(0) == 0
+; run: %stack_simple(1) == 1
+; run: %stack_simple(-1) == -1
+
+
+function %slot_offset(i64) -> i64 {
+    ss0 = explicit_slot 8, offset 8
+
+block0(v0: i64):
+    stack_store.i64 v0, ss0
+    v1 = stack_load.i64 ss0
+    return v1
+}
+; run: %slot_offset(0) == 0
+; run: %slot_offset(1) == 1
+; run: %slot_offset(-1) == -1
+
+function %stack_offset(i64) -> i64 {
+    ss0 = explicit_slot 16
+
+block0(v0: i64):
+    stack_store.i64 v0, ss0+8
+    v1 = stack_load.i64 ss0+8
+    return v1
+}
+; run: %stack_offset(0) == 0
+; run: %stack_offset(1) == 1
+; run: %stack_offset(-1) == -1
+
+
+function %offset_unaligned(i64) -> i64 {
+    ss0 = explicit_slot 11
+
+block0(v0: i64):
+    stack_store.i64 v0, ss0+3
+    v1 = stack_load.i64 ss0+3
+    return v1
+}
+; run: %offset_unaligned(0) == 0
+; run: %offset_unaligned(1) == 1
+; run: %offset_unaligned(-1) == -1
+
+
+
+function %multi_slot_stack(i64, i64) -> i64 {
+    ss0 = explicit_slot 8
+    ss1 = explicit_slot 8
+
+block0(v0: i64, v1: i64):
+    stack_store.i64 v0, ss0
+    stack_store.i64 v1, ss1
+    v2 = stack_load.i64 ss0
+    v3 = stack_load.i64 ss1
+    v4 = iadd.i64 v2, v3
+    return v4
+}
+; run: %multi_slot_stack(0, 1) == 1
+; run: %multi_slot_stack(1, 2) == 3
+
+
+
+function %multi_slot_out_of_bounds_writes(i8, i64) -> i8, i64 {
+    ss0 = explicit_slot 1
+    ss1 = explicit_slot 8
+
+block0(v0: i8, v1: i64):
+    stack_store.i8 v0, ss0
+    stack_store.i64 v1, ss1
+    v2 = stack_load.i8 ss0
+    v3 = stack_load.i64 ss1
+    return v2, v3
+}
+; run: %multi_slot_out_o(10, 1) == [10, 1]
+; run: %multi_slot_out_o(0, 2) == [0, 2]
+
+
+function %multi_slot_offset_writes(i8, i64) -> i8, i64 {
+    ss0 = explicit_slot 8, offset 8
+    ss1 = explicit_slot 8
+
+block0(v0: i8, v1: i64):
+    stack_store.i8 v0, ss0
+    stack_store.i64 v1, ss1
+    v2 = stack_load.i8 ss0
+    v3 = stack_load.i64 ss1
+    return v2, v3
+}
+; run: %multi_slot_offse(0, 1) == [0, 1]
+; run: %multi_slot_offse(1, 2) == [1, 2]
+
+function %slot_offset_negative(i64, i64) -> i64, i64 {
+    ss0 = explicit_slot 8
+    ss1 = explicit_slot 8, offset -8
+
+block0(v0: i64, v1: i64):
+    stack_store.i64 v0, ss0
+    stack_store.i64 v1, ss1
+    v2 = stack_load.i64 ss0
+    v3 = stack_load.i64 ss1
+    return v2, v3
+}
+; run: %slot_offset_nega(0, 1) == [0, 1]
+; run: %slot_offset_nega(2, 3) == [2, 3]
+
+
+function %huge_slots(i64) -> i64 {
+    ss0 = explicit_slot 1048576 ; 1MB Slot
+
+block0(v0: i64):
+    stack_store.i64 v0, ss0+1048568 ; Store at 1MB - 8bytes
+    v1 = stack_load.i64 ss0+1048568
+    return v1
+}
+; run: %huge_slots(0) == 0
+; run: %huge_slots(1) == 1
+; run: %huge_slots(-1) == -1
--- a/cranelift/filetests/src/function_runner.rs
+++ b/cranelift/filetests/src/function_runner.rs
@@ -1,8 +1,7 @@
 //! Provides functionality for compiling and running CLIF IR for `run` tests.
-use core::{mem, ptr};
+use core::mem;
 use cranelift_codegen::binemit::{NullRelocSink, NullStackMapSink, NullTrapSink};
 use cranelift_codegen::data_value::DataValue;
-use cranelift_codegen::ir::immediates::{Ieee32, Ieee64};
 use cranelift_codegen::ir::{condcodes::IntCC, Function, InstBuilder, Signature, Type};
 use cranelift_codegen::isa::{BackendVariant, TargetIsa};
 use cranelift_codegen::{ir, settings, CodegenError, Context};
@@ -204,7 +203,7 @@ impl UnboxedValues {
                param.value_type
            );
            unsafe {
-                Self::write_value_to(arg, slot);
+                arg.write_value_to(slot);
            }
        }

@@ -224,50 +223,12 @@ impl UnboxedValues {

        // Extract the returned values from this vector.
        for (slot, param) in self.0.iter().zip(&signature.returns) {
-            let value = unsafe { Self::read_value_from(slot, param.value_type) };
+            let value = unsafe { DataValue::read_value_from(slot, param.value_type) };
            returns.push(value);
        }

        returns
    }
-
-    /// Write a [DataValue] to a memory location.
-    unsafe fn write_value_to(v: &DataValue, p: *mut u128) {
-        match v {
-            DataValue::B(b) => ptr::write(p as *mut bool, *b),
-            DataValue::I8(i) => ptr::write(p as *mut i8, *i),
-            DataValue::I16(i) => ptr::write(p as *mut i16, *i),
-            DataValue::I32(i) => ptr::write(p as *mut i32, *i),
-            DataValue::I64(i) => ptr::write(p as *mut i64, *i),
-            DataValue::F32(f) => ptr::write(p as *mut Ieee32, *f),
-            DataValue::F64(f) => ptr::write(p as *mut Ieee64, *f),
-            DataValue::V128(b) => ptr::write(p as *mut [u8; 16], *b),
-            _ => unimplemented!(),
-        }
-    }
-
-    /// Read a [DataValue] from a memory location using a given [Type].
-    unsafe fn read_value_from(p: *const u128, ty: Type) -> DataValue {
-        match ty {
-            ir::types::I8 => DataValue::I8(ptr::read(p as *const i8)),
-            ir::types::I16 => DataValue::I16(ptr::read(p as *const i16)),
-            ir::types::I32 => DataValue::I32(ptr::read(p as *const i32)),
-            ir::types::I64 => DataValue::I64(ptr::read(p as *const i64)),
-            ir::types::F32 => DataValue::F32(ptr::read(p as *const Ieee32)),
-            ir::types::F64 => DataValue::F64(ptr::read(p as *const Ieee64)),
-            _ if ty.is_bool() => match ty.bytes() {
-                1 => DataValue::B(ptr::read(p as *const i8) != 0),
-                2 => DataValue::B(ptr::read(p as *const i16) != 0),
-                4 => DataValue::B(ptr::read(p as *const i32) != 0),
-                8 => DataValue::B(ptr::read(p as *const i64) != 0),
-                _ => unimplemented!(),
-            },
-            _ if ty.is_vector() && ty.bytes() == 16 => {
-                DataValue::V128(ptr::read(p as *const [u8; 16]))
-            }
-            _ => unimplemented!(),
-        }
-    }
 }

 /// Compile a [Function] to its executable bytes in memory.