Do call_indirect properly, by accessing the table section

This commit is contained in:
Jef
2019-01-18 13:01:42 +01:00
parent d06be92a4e
commit e57cec3b3f
5 changed files with 198 additions and 90 deletions

View File

@@ -9,7 +9,7 @@ use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, Executab
use error::Error; use error::Error;
use std::{iter, mem}; use std::{iter, mem};
use module::VmCtx; use module::{RuntimeFunc, VmCtx};
/// Size of a pointer on the target in bytes. /// Size of a pointer on the target in bytes.
const WORD_SIZE: u32 = 8; const WORD_SIZE: u32 = 8;
@@ -45,6 +45,62 @@ const R14: u8 = 14;
const R15: u8 = 15; const R15: u8 = 15;
const NUM_GPRS: u8 = 16; const NUM_GPRS: u8 = 16;
extern "sysv64" fn println(len: u64, args: *const u8) {
println!("{}", unsafe {
std::str::from_utf8_unchecked(std::slice::from_raw_parts(args, len as usize))
});
}
macro_rules! asm_println {
($asm:expr, $($args:tt)*) => {{
use std::mem;
let mut args = format!($($args)*).into_bytes();
let len = args.len();
let ptr = args.as_mut_ptr();
mem::forget(args);
dynasm!($asm
; push rdi
; push rsi
; push rdx
; push rcx
; push r8
; push r9
; push r10
; push r11
; mov rax, QWORD println as *const u8 as i64
; mov rdi, QWORD len as i64
; mov rsi, QWORD ptr as i64
; mov r11, rsp
; and r11, 0b1111
; test r11, r11
; jnz >with_adjusted_stack_ptr
; call rax
; jmp >pop_rest
; with_adjusted_stack_ptr:
; push 1
; call rax
; pop r11
; pop_rest:
; pop r11
; pop r10
; pop r9
; pop r8
; pop rcx
; pop rdx
; pop rsi
; pop rdi
);
}}
}
impl GPRs { impl GPRs {
fn take(&mut self) -> GPR { fn take(&mut self) -> GPR {
let lz = self.bits.trailing_zeros(); let lz = self.bits.trailing_zeros();
@@ -1381,7 +1437,7 @@ impl Context<'_> {
Value::Local(loc) => StackValue::Local(loc), Value::Local(loc) => StackValue::Local(loc),
Value::Immediate(i) => StackValue::Immediate(i), Value::Immediate(i) => StackValue::Immediate(i),
Value::Temp(gpr) => { Value::Temp(gpr) => {
if self.block_state.regs.free_scratch() >= 2 { if self.block_state.regs.free_scratch() >= 3 {
StackValue::Temp(gpr) StackValue::Temp(gpr)
} else { } else {
self.block_state.depth.reserve(1); self.block_state.depth.reserve(1);
@@ -1489,7 +1545,6 @@ impl Context<'_> {
} else { } else {
(self.block_state.regs.take_scratch_gpr(), true) (self.block_state.regs.take_scratch_gpr(), true)
}; };
let offset = self.adjusted_offset(offset);
dynasm!(self.asm dynasm!(self.asm
; mov Rq(reg), [rsp + offset] ; mov Rq(reg), [rsp + offset]
); );
@@ -2088,54 +2143,63 @@ impl Context<'_> {
self.push(Value::Temp(RAX)); self.push(Value::Temp(RAX));
} }
pub fn call_indirect( pub fn call_indirect(&mut self, signature_hash: u32, arg_arity: u32, return_arity: u32) {
&mut self,
valid_indexes: impl IntoIterator<Item = u32>,
arg_arity: u32,
return_arity: u32,
) {
debug_assert!( debug_assert!(
return_arity == 0 || return_arity == 1, return_arity == 0 || return_arity == 1,
"We don't support multiple return yet" "We don't support multiple return yet"
); );
let callee = self.pop(); let callee = self.pop();
let (callee, callee_needs_release) = self.into_reg(callee); let callee = self.into_temp_reg(callee);
let vmctx = StackValue::Local(self.block_state.locals.vmctx_index()); let vmctx_idx = self.block_state.locals.vmctx_index();
let count = self.block_state.stack.len(); let (vmctx_reg, should_release_vmctx_reg) = self.into_reg(Value::Local(vmctx_idx));
let label = self.create_label(); let signature_matches = self.create_label();
let index_reg = self.block_state.regs.take_scratch_gpr(); let temp0 = self.block_state.regs.take_scratch_gpr();
let temp1 = self.block_state.regs.take_scratch_gpr();
// TODO: Generate faster check using bitsets like GCC does? dynasm!(self.asm
for index in valid_indexes { ; imul Rq(callee), Rq(callee), mem::size_of::<RuntimeFunc>() as i32
dynasm!(self.asm ; mov Rq(temp0), [Rq(vmctx_reg) + VmCtx::offset_of_funcs_ptr() as i32]
; lea Rq(index_reg), [=>self.func_starts[index as usize].1] ; mov Rd(temp1), [
; cmp Rd(callee), index as i32 Rq(temp0) +
; je =>label.0 Rq(callee) +
); RuntimeFunc::offset_of_sig_hash() as i32
} ]
; cmp Rd(temp1), signature_hash as i32
; je =>signature_matches.0
);
self.trap(); self.trap();
self.define_label(label);
if callee_needs_release { self.define_label(signature_matches);
self.block_state.regs.release_scratch_gpr(callee); self.block_state.regs.release_scratch_gpr(temp1);
}
// TODO: I believe that this can't cause quadratic runtime but I'm not // TODO: I believe that this can't cause quadratic runtime but I'm not
// certain. // certain.
let vmctx = StackValue::Local(vmctx_idx);
let count = self.block_state.stack.len();
self.block_state self.block_state
.stack .stack
.insert(count - arg_arity as usize, vmctx); .insert(count - arg_arity as usize, vmctx);
let cleanup = self.pass_outgoing_args(arg_arity + 1, return_arity, true); let cleanup = self.pass_outgoing_args(arg_arity + 1, return_arity, true);
dynasm!(self.asm dynasm!(self.asm
; call Rq(index_reg) ; call QWORD [
Rq(temp0) +
Rq(callee) +
RuntimeFunc::offset_of_func_start() as i32
]
); );
self.block_state.regs.release_scratch_gpr(index_reg); self.block_state.regs.release_scratch_gpr(temp0);
self.block_state.regs.release_scratch_gpr(callee);
if should_release_vmctx_reg {
self.block_state.regs.release_scratch_gpr(vmctx_reg);
}
self.post_call_cleanup(cleanup); self.post_call_cleanup(cleanup);
self.push_function_return(return_arity); self.push_function_return(return_arity);
@@ -2176,7 +2240,12 @@ impl Context<'_> {
let arguments = arguments + 1; let arguments = arguments + 1;
let (reg_args, locals_in_gprs) = let (reg_args, locals_in_gprs) =
ARGS_IN_GPRS.split_at((arguments as usize).min(ARGS_IN_GPRS.len())); ARGS_IN_GPRS.split_at((arguments as usize).min(ARGS_IN_GPRS.len()));
let reg_locals = &locals_in_gprs[..(locals as usize).min(locals_in_gprs.len())]; let (reg_locals, temps) =
locals_in_gprs.split_at((locals as usize).min(locals_in_gprs.len()));
for temp in temps {
self.block_state.regs.release_scratch_gpr(*temp);
}
// We need space to store the register arguments if we need to call a function // We need space to store the register arguments if we need to call a function
// and overwrite these registers so we add `reg_args.len()` // and overwrite these registers so we add `reg_args.len()`

View File

@@ -1,6 +1,6 @@
use backend::*; use backend::*;
use error::Error; use error::Error;
use module::FuncTyStore; use module::{FuncTyStore, quickhash};
use wasmparser::{FunctionBody, Operator, Type}; use wasmparser::{FunctionBody, Operator, Type};
// TODO: Use own declared `Type` enum. // TODO: Use own declared `Type` enum.
@@ -447,8 +447,7 @@ pub fn translate(
// TODO: this implementation assumes that this function is locally defined. // TODO: this implementation assumes that this function is locally defined.
ctx.call_indirect( ctx.call_indirect(
(0..translation_ctx.func_count() as u32) quickhash(callee_ty) as u32,
.filter(|i| translation_ctx.func_type_index(*i) == index),
callee_ty.params.len() as u32, callee_ty.params.len() as u32,
callee_ty.returns.len() as u32, callee_ty.returns.len() as u32,
); );

View File

@@ -99,11 +99,11 @@ pub struct TranslatedModule {
types: FuncTyStore, types: FuncTyStore,
// TODO: Should we wrap this in a `Mutex` so that calling functions from multiple // TODO: Should we wrap this in a `Mutex` so that calling functions from multiple
// threads doesn't cause data races? // threads doesn't cause data races?
table: Option<(TableType, Vec<RuntimeFunc>)>, table: Option<(TableType, Vec<u32>)>,
memory: Option<MemoryType>, memory: Option<MemoryType>,
} }
fn quickhash<H: Hash>(h: H) -> u64 { pub fn quickhash<H: Hash>(h: H) -> u64 {
let mut hasher = std::collections::hash_map::DefaultHasher::new(); let mut hasher = std::collections::hash_map::DefaultHasher::new();
h.hash(&mut hasher); h.hash(&mut hasher);
hasher.finish() hasher.finish()
@@ -113,22 +113,41 @@ impl TranslatedModule {
pub fn instantiate(mut self) -> ExecutableModule { pub fn instantiate(mut self) -> ExecutableModule {
use std::alloc::{self, Layout}; use std::alloc::{self, Layout};
let slice = self let slice = {
.table let code_section = self
.as_mut() .translated_code_section
.map(|&mut (_, ref mut initial)| { .as_ref()
initial.shrink_to_fit(); .expect("We don't currently support a table section without a code section");
let out = BoxSlice { let types = &self.types;
ptr: initial.as_mut_ptr(),
len: initial.len(), self.table
}; .as_mut()
mem::forget(mem::replace(initial, Default::default())); .map(|&mut (_, ref mut idxs)| {
out let mut initial = idxs
}) .iter()
.unwrap_or(BoxSlice { .map(|i| {
ptr: std::ptr::NonNull::dangling().as_ptr(), let start = code_section.func_start(*i as _);
len: 0, let ty = types.func_type(*i);
});
RuntimeFunc {
func_start: start,
sig_hash: quickhash(ty) as u32,
}
})
.collect::<Vec<_>>();
initial.shrink_to_fit();
let out = BoxSlice {
ptr: initial.as_mut_ptr(),
len: initial.len(),
};
mem::forget(initial);
out
})
.unwrap_or(BoxSlice {
ptr: std::ptr::NonNull::dangling().as_ptr(),
len: 0,
})
};
let mem_size = self.memory.map(|m| m.limits.initial).unwrap_or(0) as usize; let mem_size = self.memory.map(|m| m.limits.initial).unwrap_or(0) as usize;
let (layout, _mem_offset) = Layout::new::<VmCtx>() let (layout, _mem_offset) = Layout::new::<VmCtx>()
@@ -138,6 +157,10 @@ impl TranslatedModule {
let ctx = if mem_size > 0 || slice.len > 0 { let ctx = if mem_size > 0 || slice.len > 0 {
let ptr = unsafe { alloc::alloc_zeroed(layout) } as *mut VmCtx; let ptr = unsafe { alloc::alloc_zeroed(layout) } as *mut VmCtx;
if ptr.is_null() {
alloc::handle_alloc_error(layout);
}
unsafe { unsafe {
*ptr = VmCtx { *ptr = VmCtx {
table: slice, table: slice,
@@ -235,15 +258,23 @@ impl ExecutableModule {
} }
} }
type FuncRef = unsafe extern "sysv64" fn(); type FuncRef = *const u8;
#[repr(C)]
pub struct RuntimeFunc { pub struct RuntimeFunc {
sig_hash: u32, sig_hash: u32,
func_start: FuncRef, func_start: FuncRef,
} }
#[repr(C)] impl RuntimeFunc {
pub fn offset_of_sig_hash() -> usize {
offset_of!(Self, sig_hash)
}
pub fn offset_of_func_start() -> usize {
offset_of!(Self, func_start)
}
}
struct BoxSlice<T> { struct BoxSlice<T> {
len: usize, len: usize,
ptr: *mut T, ptr: *mut T,
@@ -283,10 +314,6 @@ pub struct FuncTyStore {
const WASM_PAGE_SIZE: usize = 65_536; const WASM_PAGE_SIZE: usize = 65_536;
impl FuncTyStore { impl FuncTyStore {
pub fn func_count(&self) -> usize {
self.func_ty_indicies.len()
}
pub fn func_type_index(&self, func_idx: u32) -> u32 { pub fn func_type_index(&self, func_idx: u32) -> u32 {
self.func_ty_indicies[func_idx as usize] self.func_ty_indicies[func_idx as usize]
} }
@@ -311,6 +338,7 @@ pub fn translate(data: &[u8]) -> Result<ExecutableModule, Error> {
pub fn translate_only(data: &[u8]) -> Result<TranslatedModule, Error> { pub fn translate_only(data: &[u8]) -> Result<TranslatedModule, Error> {
let mut reader = ModuleReader::new(data)?; let mut reader = ModuleReader::new(data)?;
let mut output = TranslatedModule::default(); let mut output = TranslatedModule::default();
let mut table = None;
reader.skip_custom_sections()?; reader.skip_custom_sections()?;
if reader.eof() { if reader.eof() {
@@ -353,10 +381,12 @@ pub fn translate_only(data: &[u8]) -> Result<TranslatedModule, Error> {
if let SectionCode::Table = section.code { if let SectionCode::Table = section.code {
let tables = section.get_table_section_reader()?; let tables = section.get_table_section_reader()?;
let tables = translate_sections::table(tables)?; let mut tables = translate_sections::table(tables)?;
assert!(tables.len() <= 1); assert!(tables.len() <= 1);
table = tables.drain(..).next();
reader.skip_custom_sections()?; reader.skip_custom_sections()?;
if reader.eof() { if reader.eof() {
return Ok(output); return Ok(output);
@@ -421,7 +451,12 @@ pub fn translate_only(data: &[u8]) -> Result<TranslatedModule, Error> {
if let SectionCode::Element = section.code { if let SectionCode::Element = section.code {
let elements = section.get_element_section_reader()?; let elements = section.get_element_section_reader()?;
translate_sections::element(elements)?; let elements = translate_sections::element(elements)?;
output.table = Some((
table.expect("Element section with no table section"),
elements,
));
reader.skip_custom_sections()?; reader.skip_custom_sections()?;
if reader.eof() { if reader.eof() {

View File

@@ -268,24 +268,6 @@ mod op64 {
binop_test!(ge_s, |a, b| if a >= b { 1 } else { 0 }, i32); binop_test!(ge_s, |a, b| if a >= b { 1 } else { 0 }, i32);
} }
quickcheck! {
fn relop_eq(a: u32, b: u32) -> bool {
static CODE: &str = r#"
(module
(func (param i32) (param i32) (result i32) (i32.eq (get_local 0) (get_local 1)))
)
"#;
lazy_static! {
static ref TRANSLATED: ExecutableModule = translate_wat(CODE);
}
let out = TRANSLATED.execute_func::<(u32, u32), u32>(0, (a, b)).unwrap();
(a == b) == (out == 1)
}
}
quickcheck! { quickcheck! {
fn if_then_else(a: u32, b: u32) -> bool { fn if_then_else(a: u32, b: u32) -> bool {
const CODE: &str = r#" const CODE: &str = r#"
@@ -1003,7 +985,7 @@ fn call_indirect() {
(table anyfunc (table anyfunc
(elem (elem
$dispatch $fac $fib $fac $fib
) )
) )
@@ -1019,7 +1001,7 @@ fn call_indirect() {
(get_local 0) (get_local 0)
(call_indirect (type $over-i64) (call_indirect (type $over-i64)
(i64.sub (get_local 0) (i64.const 1)) (i64.sub (get_local 0) (i64.const 1))
(i32.const 1) (i32.const 0)
) )
) )
) )
@@ -1033,11 +1015,11 @@ fn call_indirect() {
(i64.add (i64.add
(call_indirect (type $over-i64) (call_indirect (type $over-i64)
(i64.sub (get_local 0) (i64.const 2)) (i64.sub (get_local 0) (i64.const 2))
(i32.const 2) (i32.const 1)
) )
(call_indirect (type $over-i64) (call_indirect (type $over-i64)
(i64.sub (get_local 0) (i64.const 1)) (i64.sub (get_local 0) (i64.const 1))
(i32.const 2) (i32.const 1)
) )
) )
) )
@@ -1051,11 +1033,11 @@ fn call_indirect() {
module.disassemble(); module.disassemble();
assert_eq!( assert_eq!(
module.execute_func::<(i32, i64), i64>(0, (1, 10)).unwrap(), module.execute_func::<(i32, i64), i64>(0, (0, 10)).unwrap(),
3628800 3628800
); );
assert_eq!( assert_eq!(
module.execute_func::<(i32, i64), i64>(0, (2, 10)).unwrap(), module.execute_func::<(i32, i64), i64>(0, (1, 10)).unwrap(),
89 89
); );
} }

View File

@@ -37,10 +37,7 @@ pub fn function(functions: FunctionSectionReader) -> Result<Vec<u32>, Error> {
/// Parses the Table section of the wasm module. /// Parses the Table section of the wasm module.
pub fn table(tables: TableSectionReader) -> Result<Vec<TableType>, Error> { pub fn table(tables: TableSectionReader) -> Result<Vec<TableType>, Error> {
tables tables.into_iter().map(|r| r.map_err(Into::into)).collect()
.into_iter()
.map(|r| r.map_err(Into::into))
.collect()
} }
/// Parses the Memory section of the wasm module. /// Parses the Memory section of the wasm module.
@@ -74,11 +71,37 @@ pub fn start(_index: u32) -> Result<(), Error> {
} }
/// Parses the Element section of the wasm module. /// Parses the Element section of the wasm module.
pub fn element(elements: ElementSectionReader) -> Result<(), Error> { pub fn element(elements: ElementSectionReader) -> Result<Vec<u32>, Error> {
let mut out = Vec::new();
for entry in elements { for entry in elements {
entry?; // TODO let entry = entry?;
assert_eq!(entry.table_index, 0);
let offset = {
let mut reader = entry.init_expr.get_operators_reader();
let out = match reader.read() {
Ok(Operator::I32Const { value }) => value,
_ => panic!("We only support i32.const table init expressions right now"),
};
//reader.ensure_end()?;
out
};
assert_eq!(offset, out.len() as i32);
let elements = entry
.items
.get_items_reader()?
.into_iter()
.collect::<Result<Vec<_>, _>>()?;
out.extend(elements);
} }
Ok(())
Ok(out)
} }
/// Parses the Code section of the wasm module. /// Parses the Code section of the wasm module.