Do call_indirect properly, by accessing the table section

This commit is contained in:
Jef
2019-01-18 13:01:42 +01:00
parent d06be92a4e
commit e57cec3b3f
5 changed files with 198 additions and 90 deletions

View File

@@ -9,7 +9,7 @@ use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, Executab
use error::Error;
use std::{iter, mem};
use module::VmCtx;
use module::{RuntimeFunc, VmCtx};
/// Size of a pointer on the target in bytes.
const WORD_SIZE: u32 = 8;
@@ -45,6 +45,62 @@ const R14: u8 = 14;
const R15: u8 = 15;
const NUM_GPRS: u8 = 16;
extern "sysv64" fn println(len: u64, args: *const u8) {
println!("{}", unsafe {
std::str::from_utf8_unchecked(std::slice::from_raw_parts(args, len as usize))
});
}
macro_rules! asm_println {
($asm:expr, $($args:tt)*) => {{
use std::mem;
let mut args = format!($($args)*).into_bytes();
let len = args.len();
let ptr = args.as_mut_ptr();
mem::forget(args);
dynasm!($asm
; push rdi
; push rsi
; push rdx
; push rcx
; push r8
; push r9
; push r10
; push r11
; mov rax, QWORD println as *const u8 as i64
; mov rdi, QWORD len as i64
; mov rsi, QWORD ptr as i64
; mov r11, rsp
; and r11, 0b1111
; test r11, r11
; jnz >with_adjusted_stack_ptr
; call rax
; jmp >pop_rest
; with_adjusted_stack_ptr:
; push 1
; call rax
; pop r11
; pop_rest:
; pop r11
; pop r10
; pop r9
; pop r8
; pop rcx
; pop rdx
; pop rsi
; pop rdi
);
}}
}
impl GPRs {
fn take(&mut self) -> GPR {
let lz = self.bits.trailing_zeros();
@@ -1381,7 +1437,7 @@ impl Context<'_> {
Value::Local(loc) => StackValue::Local(loc),
Value::Immediate(i) => StackValue::Immediate(i),
Value::Temp(gpr) => {
if self.block_state.regs.free_scratch() >= 2 {
if self.block_state.regs.free_scratch() >= 3 {
StackValue::Temp(gpr)
} else {
self.block_state.depth.reserve(1);
@@ -1489,7 +1545,6 @@ impl Context<'_> {
} else {
(self.block_state.regs.take_scratch_gpr(), true)
};
let offset = self.adjusted_offset(offset);
dynasm!(self.asm
; mov Rq(reg), [rsp + offset]
);
@@ -2088,54 +2143,63 @@ impl Context<'_> {
self.push(Value::Temp(RAX));
}
pub fn call_indirect(
&mut self,
valid_indexes: impl IntoIterator<Item = u32>,
arg_arity: u32,
return_arity: u32,
) {
pub fn call_indirect(&mut self, signature_hash: u32, arg_arity: u32, return_arity: u32) {
debug_assert!(
return_arity == 0 || return_arity == 1,
"We don't support multiple return yet"
);
let callee = self.pop();
let (callee, callee_needs_release) = self.into_reg(callee);
let callee = self.into_temp_reg(callee);
let vmctx = StackValue::Local(self.block_state.locals.vmctx_index());
let count = self.block_state.stack.len();
let vmctx_idx = self.block_state.locals.vmctx_index();
let (vmctx_reg, should_release_vmctx_reg) = self.into_reg(Value::Local(vmctx_idx));
let label = self.create_label();
let index_reg = self.block_state.regs.take_scratch_gpr();
let signature_matches = self.create_label();
let temp0 = self.block_state.regs.take_scratch_gpr();
let temp1 = self.block_state.regs.take_scratch_gpr();
// TODO: Generate faster check using bitsets like GCC does?
for index in valid_indexes {
dynasm!(self.asm
; lea Rq(index_reg), [=>self.func_starts[index as usize].1]
; cmp Rd(callee), index as i32
; je =>label.0
; imul Rq(callee), Rq(callee), mem::size_of::<RuntimeFunc>() as i32
; mov Rq(temp0), [Rq(vmctx_reg) + VmCtx::offset_of_funcs_ptr() as i32]
; mov Rd(temp1), [
Rq(temp0) +
Rq(callee) +
RuntimeFunc::offset_of_sig_hash() as i32
]
; cmp Rd(temp1), signature_hash as i32
; je =>signature_matches.0
);
}
self.trap();
self.define_label(label);
if callee_needs_release {
self.block_state.regs.release_scratch_gpr(callee);
}
self.define_label(signature_matches);
self.block_state.regs.release_scratch_gpr(temp1);
// TODO: I believe that this can't cause quadratic runtime but I'm not
// certain.
let vmctx = StackValue::Local(vmctx_idx);
let count = self.block_state.stack.len();
self.block_state
.stack
.insert(count - arg_arity as usize, vmctx);
let cleanup = self.pass_outgoing_args(arg_arity + 1, return_arity, true);
dynasm!(self.asm
; call Rq(index_reg)
; call QWORD [
Rq(temp0) +
Rq(callee) +
RuntimeFunc::offset_of_func_start() as i32
]
);
self.block_state.regs.release_scratch_gpr(index_reg);
self.block_state.regs.release_scratch_gpr(temp0);
self.block_state.regs.release_scratch_gpr(callee);
if should_release_vmctx_reg {
self.block_state.regs.release_scratch_gpr(vmctx_reg);
}
self.post_call_cleanup(cleanup);
self.push_function_return(return_arity);
@@ -2176,7 +2240,12 @@ impl Context<'_> {
let arguments = arguments + 1;
let (reg_args, locals_in_gprs) =
ARGS_IN_GPRS.split_at((arguments as usize).min(ARGS_IN_GPRS.len()));
let reg_locals = &locals_in_gprs[..(locals as usize).min(locals_in_gprs.len())];
let (reg_locals, temps) =
locals_in_gprs.split_at((locals as usize).min(locals_in_gprs.len()));
for temp in temps {
self.block_state.regs.release_scratch_gpr(*temp);
}
// We need space to store the register arguments if we need to call a function
// and overwrite these registers so we add `reg_args.len()`

View File

@@ -1,6 +1,6 @@
use backend::*;
use error::Error;
use module::FuncTyStore;
use module::{FuncTyStore, quickhash};
use wasmparser::{FunctionBody, Operator, Type};
// TODO: Use own declared `Type` enum.
@@ -447,8 +447,7 @@ pub fn translate(
// TODO: this implementation assumes that this function is locally defined.
ctx.call_indirect(
(0..translation_ctx.func_count() as u32)
.filter(|i| translation_ctx.func_type_index(*i) == index),
quickhash(callee_ty) as u32,
callee_ty.params.len() as u32,
callee_ty.returns.len() as u32,
);

View File

@@ -99,11 +99,11 @@ pub struct TranslatedModule {
types: FuncTyStore,
// TODO: Should we wrap this in a `Mutex` so that calling functions from multiple
// threads doesn't cause data races?
table: Option<(TableType, Vec<RuntimeFunc>)>,
table: Option<(TableType, Vec<u32>)>,
memory: Option<MemoryType>,
}
fn quickhash<H: Hash>(h: H) -> u64 {
pub fn quickhash<H: Hash>(h: H) -> u64 {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
h.hash(&mut hasher);
hasher.finish()
@@ -113,22 +113,41 @@ impl TranslatedModule {
pub fn instantiate(mut self) -> ExecutableModule {
use std::alloc::{self, Layout};
let slice = self
.table
let slice = {
let code_section = self
.translated_code_section
.as_ref()
.expect("We don't currently support a table section without a code section");
let types = &self.types;
self.table
.as_mut()
.map(|&mut (_, ref mut initial)| {
.map(|&mut (_, ref mut idxs)| {
let mut initial = idxs
.iter()
.map(|i| {
let start = code_section.func_start(*i as _);
let ty = types.func_type(*i);
RuntimeFunc {
func_start: start,
sig_hash: quickhash(ty) as u32,
}
})
.collect::<Vec<_>>();
initial.shrink_to_fit();
let out = BoxSlice {
ptr: initial.as_mut_ptr(),
len: initial.len(),
};
mem::forget(mem::replace(initial, Default::default()));
mem::forget(initial);
out
})
.unwrap_or(BoxSlice {
ptr: std::ptr::NonNull::dangling().as_ptr(),
len: 0,
});
})
};
let mem_size = self.memory.map(|m| m.limits.initial).unwrap_or(0) as usize;
let (layout, _mem_offset) = Layout::new::<VmCtx>()
@@ -138,6 +157,10 @@ impl TranslatedModule {
let ctx = if mem_size > 0 || slice.len > 0 {
let ptr = unsafe { alloc::alloc_zeroed(layout) } as *mut VmCtx;
if ptr.is_null() {
alloc::handle_alloc_error(layout);
}
unsafe {
*ptr = VmCtx {
table: slice,
@@ -235,15 +258,23 @@ impl ExecutableModule {
}
}
type FuncRef = unsafe extern "sysv64" fn();
type FuncRef = *const u8;
#[repr(C)]
pub struct RuntimeFunc {
sig_hash: u32,
func_start: FuncRef,
}
#[repr(C)]
impl RuntimeFunc {
pub fn offset_of_sig_hash() -> usize {
offset_of!(Self, sig_hash)
}
pub fn offset_of_func_start() -> usize {
offset_of!(Self, func_start)
}
}
struct BoxSlice<T> {
len: usize,
ptr: *mut T,
@@ -283,10 +314,6 @@ pub struct FuncTyStore {
const WASM_PAGE_SIZE: usize = 65_536;
impl FuncTyStore {
pub fn func_count(&self) -> usize {
self.func_ty_indicies.len()
}
pub fn func_type_index(&self, func_idx: u32) -> u32 {
self.func_ty_indicies[func_idx as usize]
}
@@ -311,6 +338,7 @@ pub fn translate(data: &[u8]) -> Result<ExecutableModule, Error> {
pub fn translate_only(data: &[u8]) -> Result<TranslatedModule, Error> {
let mut reader = ModuleReader::new(data)?;
let mut output = TranslatedModule::default();
let mut table = None;
reader.skip_custom_sections()?;
if reader.eof() {
@@ -353,10 +381,12 @@ pub fn translate_only(data: &[u8]) -> Result<TranslatedModule, Error> {
if let SectionCode::Table = section.code {
let tables = section.get_table_section_reader()?;
let tables = translate_sections::table(tables)?;
let mut tables = translate_sections::table(tables)?;
assert!(tables.len() <= 1);
table = tables.drain(..).next();
reader.skip_custom_sections()?;
if reader.eof() {
return Ok(output);
@@ -421,7 +451,12 @@ pub fn translate_only(data: &[u8]) -> Result<TranslatedModule, Error> {
if let SectionCode::Element = section.code {
let elements = section.get_element_section_reader()?;
translate_sections::element(elements)?;
let elements = translate_sections::element(elements)?;
output.table = Some((
table.expect("Element section with no table section"),
elements,
));
reader.skip_custom_sections()?;
if reader.eof() {

View File

@@ -268,24 +268,6 @@ mod op64 {
binop_test!(ge_s, |a, b| if a >= b { 1 } else { 0 }, i32);
}
quickcheck! {
fn relop_eq(a: u32, b: u32) -> bool {
static CODE: &str = r#"
(module
(func (param i32) (param i32) (result i32) (i32.eq (get_local 0) (get_local 1)))
)
"#;
lazy_static! {
static ref TRANSLATED: ExecutableModule = translate_wat(CODE);
}
let out = TRANSLATED.execute_func::<(u32, u32), u32>(0, (a, b)).unwrap();
(a == b) == (out == 1)
}
}
quickcheck! {
fn if_then_else(a: u32, b: u32) -> bool {
const CODE: &str = r#"
@@ -1003,7 +985,7 @@ fn call_indirect() {
(table anyfunc
(elem
$dispatch $fac $fib
$fac $fib
)
)
@@ -1019,7 +1001,7 @@ fn call_indirect() {
(get_local 0)
(call_indirect (type $over-i64)
(i64.sub (get_local 0) (i64.const 1))
(i32.const 1)
(i32.const 0)
)
)
)
@@ -1033,11 +1015,11 @@ fn call_indirect() {
(i64.add
(call_indirect (type $over-i64)
(i64.sub (get_local 0) (i64.const 2))
(i32.const 2)
(i32.const 1)
)
(call_indirect (type $over-i64)
(i64.sub (get_local 0) (i64.const 1))
(i32.const 2)
(i32.const 1)
)
)
)
@@ -1051,11 +1033,11 @@ fn call_indirect() {
module.disassemble();
assert_eq!(
module.execute_func::<(i32, i64), i64>(0, (1, 10)).unwrap(),
module.execute_func::<(i32, i64), i64>(0, (0, 10)).unwrap(),
3628800
);
assert_eq!(
module.execute_func::<(i32, i64), i64>(0, (2, 10)).unwrap(),
module.execute_func::<(i32, i64), i64>(0, (1, 10)).unwrap(),
89
);
}

View File

@@ -37,10 +37,7 @@ pub fn function(functions: FunctionSectionReader) -> Result<Vec<u32>, Error> {
/// Parses the Table section of the wasm module.
pub fn table(tables: TableSectionReader) -> Result<Vec<TableType>, Error> {
tables
.into_iter()
.map(|r| r.map_err(Into::into))
.collect()
tables.into_iter().map(|r| r.map_err(Into::into)).collect()
}
/// Parses the Memory section of the wasm module.
@@ -74,11 +71,37 @@ pub fn start(_index: u32) -> Result<(), Error> {
}
/// Parses the Element section of the wasm module.
pub fn element(elements: ElementSectionReader) -> Result<(), Error> {
pub fn element(elements: ElementSectionReader) -> Result<Vec<u32>, Error> {
let mut out = Vec::new();
for entry in elements {
entry?; // TODO
let entry = entry?;
assert_eq!(entry.table_index, 0);
let offset = {
let mut reader = entry.init_expr.get_operators_reader();
let out = match reader.read() {
Ok(Operator::I32Const { value }) => value,
_ => panic!("We only support i32.const table init expressions right now"),
};
//reader.ensure_end()?;
out
};
assert_eq!(offset, out.len() as i32);
let elements = entry
.items
.get_items_reader()?
.into_iter()
.collect::<Result<Vec<_>, _>>()?;
out.extend(elements);
}
Ok(())
Ok(out)
}
/// Parses the Code section of the wasm module.