cranelift: Add stack support to the interpreter with virtual addresses (#3187)

* cranelift: Add stack support to the interpreter

We also change the approach for heap loads and stores.

Previously we would use the offset as the address to the heap. However,
this approach does not allow using the load/store instructions to
read/write from both the heap and the stack.

This commit changes the addressing mechanism of the interpreter. We now
return the real addresses from the addressing instructions
(stack_addr/heap_addr), and instead check if the address passed into
the load/store instructions points to an area in the heap or the stack.

* cranelift: Add virtual addresses to cranelift interpreter

Adds a  Virtual Addressing scheme that was discussed as a better
alternative to returning the real addresses.

The virtual addresses are split into 4 regions (stack, heap, tables and
global values), and the address itself is composed of an `entry` field
and an `offset` field. In general the `entry` field corresponds to the
instance of the resource (e.g. table5 is entry 5) and the `offset` field
is a byte offset inside that entry.

There is one exception to this which is the stack, where due to only
having one stack, the whole address is an offset field.

The number of bits in entry vs offset fields is variable with respect to
the `region` and the address size (32bits vs 64bits). This is done
because with 32 bit addresses we would have to compromise on heap size,
or have a small number of global values / tables. With 64 bit addresses
we do not have to compromise on this, but we need to support 32 bit
addresses.

* cranelift: Remove interpreter trap codes

* cranelift: Calculate frame_offset when entering or exiting a frame

* cranelift: Add safe read/write interface to DataValue

* cranelift: DataValue write full 128bit slot for booleans

* cranelift: Use DataValue accessors for trampoline.
This commit is contained in:
Afonso Bordado
2021-08-24 17:29:11 +01:00
committed by GitHub
parent f4ff7c350a
commit 2776074dfc
13 changed files with 1094 additions and 157 deletions

View File

@@ -0,0 +1,85 @@
test interpret
function %stack_addr_iadd(i64) -> b1 {
ss0 = explicit_slot 16
block0(v0: i64):
v1 = stack_addr.i32 ss0
v2 = iadd_imm.i32 v1, 8
stack_store.i64 v0, ss0+8
v3 = load.i64 v2
v5 = iadd_imm.i64 v0, 20
store.i64 v5, v2
v6 = stack_load.i64 ss0+8
v7 = icmp eq v0, v3
v8 = icmp eq v5, v6
v9 = band v7, v8
return v9
}
; run: %stack_addr_iadd(0) == true
; run: %stack_addr_iadd(1) == true
; run: %stack_addr_iadd(-1) == true
function %stack_addr_32(i64) -> b1 {
ss0 = explicit_slot 24
block0(v0: i64):
v1 = stack_addr.i32 ss0
stack_store.i64 v0, ss0
v2 = load.i64 v1
v3 = icmp eq v0, v2
v4 = stack_addr.i32 ss0+8
store.i64 v0, v4
v5 = stack_load.i64 ss0+8
v6 = icmp eq v0, v5
v7 = stack_addr.i32 ss0+16
store.i64 v0, v7
v8 = load.i64 v7
v9 = icmp eq v0, v8
v10 = band v3, v6
v11 = band v10, v9
return v11
}
; run: %stack_addr_32(0) == true
; run: %stack_addr_32(1) == true
; run: %stack_addr_32(-1) == true
function %addr32_64(i64) -> b1 {
ss0 = explicit_slot 16
block0(v0: i64):
v1 = stack_addr.i32 ss0+8
v2 = stack_addr.i64 ss0+8
store.i64 v0, v1
v3 = load.i64 v2
v4 = icmp eq v3, v0
return v4
}
; run: %addr32_64(0) == true
; run: %addr32_64(1) == true
; run: %addr32_64(-1) == true
function %multi_slot_different_addrs() -> b1 {
ss0 = explicit_slot 8
ss1 = explicit_slot 8
block0:
v0 = stack_addr.i32 ss0
v1 = stack_addr.i32 ss1
v2 = icmp ne v0, v1
return v2
}
; run: %multi_slot_diffe() == true

View File

@@ -0,0 +1,56 @@
test interpret
test run
target x86_64 machinst
target s390x
target aarch64
function %stack_addr_iadd(i64) -> b1 {
ss0 = explicit_slot 16
block0(v0: i64):
v1 = stack_addr.i64 ss0
v2 = iadd_imm.i64 v1, 8
stack_store.i64 v0, ss0+8
v3 = load.i64 v2
v5 = iadd_imm.i64 v0, 20
store.i64 v5, v2
v6 = stack_load.i64 ss0+8
v7 = icmp eq v0, v3
v8 = icmp eq v5, v6
v9 = band v7, v8
return v9
}
; run: %stack_addr_iadd(0) == true
; run: %stack_addr_iadd(1) == true
; run: %stack_addr_iadd(-1) == true
function %stack_addr_64(i64) -> b1 {
ss0 = explicit_slot 24
block0(v0: i64):
v1 = stack_addr.i64 ss0
stack_store.i64 v0, ss0
v2 = load.i64 v1
v3 = icmp eq v0, v2
v4 = stack_addr.i64 ss0+8
store.i64 v0, v4
v5 = stack_load.i64 ss0+8
v6 = icmp eq v0, v5
v7 = stack_addr.i64 ss0+16
store.i64 v0, v7
v8 = load.i64 v7
v9 = icmp eq v0, v8
v10 = band v3, v6
v11 = band v10, v9
return v11
}
; run: %stack_addr_64(0) == true
; run: %stack_addr_64(1) == true
; run: %stack_addr_64(-1) == true

View File

@@ -0,0 +1,130 @@
test interpret
test run
target x86_64 machinst
target s390x
target aarch64
function %stack_simple(i64) -> i64 {
ss0 = explicit_slot 8
block0(v0: i64):
stack_store.i64 v0, ss0
v1 = stack_load.i64 ss0
return v1
}
; run: %stack_simple(0) == 0
; run: %stack_simple(1) == 1
; run: %stack_simple(-1) == -1
function %slot_offset(i64) -> i64 {
ss0 = explicit_slot 8, offset 8
block0(v0: i64):
stack_store.i64 v0, ss0
v1 = stack_load.i64 ss0
return v1
}
; run: %slot_offset(0) == 0
; run: %slot_offset(1) == 1
; run: %slot_offset(-1) == -1
function %stack_offset(i64) -> i64 {
ss0 = explicit_slot 16
block0(v0: i64):
stack_store.i64 v0, ss0+8
v1 = stack_load.i64 ss0+8
return v1
}
; run: %stack_offset(0) == 0
; run: %stack_offset(1) == 1
; run: %stack_offset(-1) == -1
function %offset_unaligned(i64) -> i64 {
ss0 = explicit_slot 11
block0(v0: i64):
stack_store.i64 v0, ss0+3
v1 = stack_load.i64 ss0+3
return v1
}
; run: %offset_unaligned(0) == 0
; run: %offset_unaligned(1) == 1
; run: %offset_unaligned(-1) == -1
function %multi_slot_stack(i64, i64) -> i64 {
ss0 = explicit_slot 8
ss1 = explicit_slot 8
block0(v0: i64, v1: i64):
stack_store.i64 v0, ss0
stack_store.i64 v1, ss1
v2 = stack_load.i64 ss0
v3 = stack_load.i64 ss1
v4 = iadd.i64 v2, v3
return v4
}
; run: %multi_slot_stack(0, 1) == 1
; run: %multi_slot_stack(1, 2) == 3
function %multi_slot_out_of_bounds_writes(i8, i64) -> i8, i64 {
ss0 = explicit_slot 1
ss1 = explicit_slot 8
block0(v0: i8, v1: i64):
stack_store.i8 v0, ss0
stack_store.i64 v1, ss1
v2 = stack_load.i8 ss0
v3 = stack_load.i64 ss1
return v2, v3
}
; run: %multi_slot_out_o(10, 1) == [10, 1]
; run: %multi_slot_out_o(0, 2) == [0, 2]
function %multi_slot_offset_writes(i8, i64) -> i8, i64 {
ss0 = explicit_slot 8, offset 8
ss1 = explicit_slot 8
block0(v0: i8, v1: i64):
stack_store.i8 v0, ss0
stack_store.i64 v1, ss1
v2 = stack_load.i8 ss0
v3 = stack_load.i64 ss1
return v2, v3
}
; run: %multi_slot_offse(0, 1) == [0, 1]
; run: %multi_slot_offse(1, 2) == [1, 2]
function %slot_offset_negative(i64, i64) -> i64, i64 {
ss0 = explicit_slot 8
ss1 = explicit_slot 8, offset -8
block0(v0: i64, v1: i64):
stack_store.i64 v0, ss0
stack_store.i64 v1, ss1
v2 = stack_load.i64 ss0
v3 = stack_load.i64 ss1
return v2, v3
}
; run: %slot_offset_nega(0, 1) == [0, 1]
; run: %slot_offset_nega(2, 3) == [2, 3]
function %huge_slots(i64) -> i64 {
ss0 = explicit_slot 1048576 ; 1MB Slot
block0(v0: i64):
stack_store.i64 v0, ss0+1048568 ; Store at 1MB - 8bytes
v1 = stack_load.i64 ss0+1048568
return v1
}
; run: %huge_slots(0) == 0
; run: %huge_slots(1) == 1
; run: %huge_slots(-1) == -1

View File

@@ -1,8 +1,7 @@
//! Provides functionality for compiling and running CLIF IR for `run` tests.
use core::{mem, ptr};
use core::mem;
use cranelift_codegen::binemit::{NullRelocSink, NullStackMapSink, NullTrapSink};
use cranelift_codegen::data_value::DataValue;
use cranelift_codegen::ir::immediates::{Ieee32, Ieee64};
use cranelift_codegen::ir::{condcodes::IntCC, Function, InstBuilder, Signature, Type};
use cranelift_codegen::isa::{BackendVariant, TargetIsa};
use cranelift_codegen::{ir, settings, CodegenError, Context};
@@ -204,7 +203,7 @@ impl UnboxedValues {
param.value_type
);
unsafe {
Self::write_value_to(arg, slot);
arg.write_value_to(slot);
}
}
@@ -224,50 +223,12 @@ impl UnboxedValues {
// Extract the returned values from this vector.
for (slot, param) in self.0.iter().zip(&signature.returns) {
let value = unsafe { Self::read_value_from(slot, param.value_type) };
let value = unsafe { DataValue::read_value_from(slot, param.value_type) };
returns.push(value);
}
returns
}
/// Write a [DataValue] to a memory location.
unsafe fn write_value_to(v: &DataValue, p: *mut u128) {
match v {
DataValue::B(b) => ptr::write(p as *mut bool, *b),
DataValue::I8(i) => ptr::write(p as *mut i8, *i),
DataValue::I16(i) => ptr::write(p as *mut i16, *i),
DataValue::I32(i) => ptr::write(p as *mut i32, *i),
DataValue::I64(i) => ptr::write(p as *mut i64, *i),
DataValue::F32(f) => ptr::write(p as *mut Ieee32, *f),
DataValue::F64(f) => ptr::write(p as *mut Ieee64, *f),
DataValue::V128(b) => ptr::write(p as *mut [u8; 16], *b),
_ => unimplemented!(),
}
}
/// Read a [DataValue] from a memory location using a given [Type].
unsafe fn read_value_from(p: *const u128, ty: Type) -> DataValue {
match ty {
ir::types::I8 => DataValue::I8(ptr::read(p as *const i8)),
ir::types::I16 => DataValue::I16(ptr::read(p as *const i16)),
ir::types::I32 => DataValue::I32(ptr::read(p as *const i32)),
ir::types::I64 => DataValue::I64(ptr::read(p as *const i64)),
ir::types::F32 => DataValue::F32(ptr::read(p as *const Ieee32)),
ir::types::F64 => DataValue::F64(ptr::read(p as *const Ieee64)),
_ if ty.is_bool() => match ty.bytes() {
1 => DataValue::B(ptr::read(p as *const i8) != 0),
2 => DataValue::B(ptr::read(p as *const i16) != 0),
4 => DataValue::B(ptr::read(p as *const i32) != 0),
8 => DataValue::B(ptr::read(p as *const i64) != 0),
_ => unimplemented!(),
},
_ if ty.is_vector() && ty.bytes() == 16 => {
DataValue::V128(ptr::read(p as *const [u8; 16]))
}
_ => unimplemented!(),
}
}
}
/// Compile a [Function] to its executable bytes in memory.