diff --git a/Cargo.toml b/Cargo.toml index 2881b13085..08c685fcd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ publish = false arrayvec = "0.4" dynasm = "0.2.3" dynasmrt = "0.2.3" -wasmparser = "0.21.6" +wasmparser = { path = "./wasmparser.rs" } capstone = "0.5.0" failure = "0.1.3" failure_derive = "0.1.3" diff --git a/src/backend.rs b/src/backend.rs index cf872573e0..0d380064d8 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -9,6 +9,8 @@ use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, Executab use error::Error; use std::{iter, mem}; +use module::VmCtx; + /// Size of a pointer on the target in bytes. const WORD_SIZE: u32 = 8; @@ -770,15 +772,16 @@ macro_rules! load { vmctx: GPR, (offset, runtime_offset): (i32, Result) ) { + let vmctx_mem_offset = VmCtx::offset_of_memory() as i32; match runtime_offset { Ok(imm) => { dynasm!(ctx.asm - ; mov $reg_ty(dst), [Rq(vmctx) + offset + imm] + ; mov $reg_ty(dst), [Rq(vmctx) + offset + imm + vmctx_mem_offset] ); } Err(offset_reg) => { dynasm!(ctx.asm - ; mov $reg_ty(dst), [Rq(vmctx) + Rq(offset_reg) + offset] + ; mov $reg_ty(dst), [Rq(vmctx) + Rq(offset_reg) + offset + vmctx_mem_offset] ); } } @@ -854,15 +857,16 @@ macro_rules! store { vmctx: GPR, (offset, runtime_offset): (i32, Result) ) { + let vmctx_mem_offset = VmCtx::offset_of_memory() as i32; match runtime_offset { Ok(imm) => { dynasm!(ctx.asm - ; mov [Rq(vmctx) + offset + imm], $reg_ty(src) + ; mov [Rq(vmctx) + offset + imm + vmctx_mem_offset], $reg_ty(src) ); } Err(offset_reg) => { dynasm!(ctx.asm - ; mov [Rq(vmctx) + Rq(offset_reg) + offset], $reg_ty(src) + ; mov [Rq(vmctx) + Rq(offset_reg) + offset + vmctx_mem_offset], $reg_ty(src) ); } } @@ -1003,6 +1007,16 @@ impl Context<'_> { cmp_i64!(i64_gt_s, setg, setnge, |a, b| a > b); cmp_i64!(i64_ge_s, setge, setng, |a, b| a >= b); + pub fn i32_eqz(&mut self) { + self.push(Value::Immediate(0)); + self.i32_eq(); + } + + pub fn i64_eqz(&mut self) { + self.push(Value::Immediate(0)); + self.i64_eq(); + } + /// Pops i32 predicate and branches to the specified label /// if the predicate is equal to zero. pub fn jump_if_false(&mut self, label: Label) { @@ -1297,7 +1311,7 @@ impl Context<'_> { Value::Local(loc) => StackValue::Local(loc), Value::Immediate(i) => StackValue::Immediate(i), Value::Temp(gpr) => { - if self.block_state.regs.free_scratch() >= 1 { + if self.block_state.regs.free_scratch() >= 2 { StackValue::Temp(gpr) } else { self.block_state.depth.reserve(1); @@ -1890,12 +1904,14 @@ impl Context<'_> { fn save_volatile(&mut self) -> ArrayVec<[GPR; SCRATCH_REGS.len()]> { let mut out = ArrayVec::new(); - // TODO: If there are no `StackValue::Pop`s that need to be popped - // before we reach our `Temp` value, we can set the `StackValue` - // for the register to be restored to `StackValue::Pop` (and - // release the register!) instead of restoring it. for ® in SCRATCH_REGS.iter() { - if !self.block_state.regs.is_free(reg) { + if self + .block_state + .stack + .iter() + .filter_map(|v| v.location(&self.block_state.locals)) + .any(|p| p == ValueLocation::Reg(reg)) + { dynasm!(self.asm ; push Rq(reg) ); @@ -1995,6 +2011,59 @@ impl Context<'_> { self.push(Value::Temp(RAX)); } + pub fn call_indirect( + &mut self, + valid_indexes: impl IntoIterator, + arg_arity: u32, + return_arity: u32, + ) { + debug_assert!( + return_arity == 0 || return_arity == 1, + "We don't support multiple return yet" + ); + + let callee = self.pop(); + let (callee, callee_needs_release) = self.into_reg(callee); + + let vmctx = StackValue::Local(self.block_state.locals.vmctx_index()); + let count = self.block_state.stack.len(); + + let label = self.create_label(); + let index_reg = self.block_state.regs.take_scratch_gpr(); + + // TODO: Generate faster check using bitsets like GCC does? + for index in valid_indexes { + dynasm!(self.asm + ; lea Rq(index_reg), [=>self.func_starts[index as usize].1] + ; cmp Rd(callee), index as i32 + ; je =>label.0 + ); + } + + self.trap(); + self.define_label(label); + + if callee_needs_release { + self.block_state.regs.release_scratch_gpr(callee); + } + + // TODO: I believe that this can't cause quadratic runtime but I'm not + // certain. + self.block_state + .stack + .insert(count - arg_arity as usize, vmctx); + let cleanup = self.pass_outgoing_args(arg_arity + 1, return_arity, true); + + dynasm!(self.asm + ; call Rq(index_reg) + ); + + self.block_state.regs.release_scratch_gpr(index_reg); + + self.post_call_cleanup(cleanup); + self.push_function_return(return_arity); + } + /// Call a function with the given index pub fn call_direct(&mut self, index: u32, arg_arity: u32, return_arity: u32) { debug_assert!( diff --git a/src/function_body.rs b/src/function_body.rs index 0948dabd1a..246bc37ccf 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -330,6 +330,7 @@ pub fn translate( } } Operator::I32Eq => ctx.i32_eq(), + Operator::I32Eqz => ctx.i32_eqz(), Operator::I32Ne => ctx.i32_neq(), Operator::I32LtS => ctx.i32_lt_s(), Operator::I32LeS => ctx.i32_le_s(), @@ -346,6 +347,7 @@ pub fn translate( Operator::I32Xor => ctx.i32_xor(), Operator::I32Mul => ctx.i32_mul(), Operator::I64Eq => ctx.i64_eq(), + Operator::I64Eqz => ctx.i64_eqz(), Operator::I64Ne => ctx.i64_neq(), Operator::I64LtS => ctx.i64_lt_s(), Operator::I64LeS => ctx.i64_le_s(), @@ -382,6 +384,20 @@ pub fn translate( callee_ty.returns.len() as u32, ); } + Operator::CallIndirect { index, table_index } => { + assert_eq!(table_index, 0); + + let callee_ty = translation_ctx.signature(index); + + // TODO: this implementation assumes that this function is locally defined. + + ctx.call_indirect( + (0..translation_ctx.func_count() as u32) + .filter(|i| translation_ctx.func_type_index(*i) == index), + callee_ty.params.len() as u32, + callee_ty.returns.len() as u32, + ); + } Operator::Nop => {} op => { unimplemented!("{:?}", op); diff --git a/src/lib.rs b/src/lib.rs index c96301861a..6c744a62e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -#![feature(plugin, test, const_slice_len, never_type)] +#![feature(plugin, test, const_slice_len, never_type, alloc_layout_extra)] #![plugin(dynasm)] extern crate test; @@ -28,5 +28,4 @@ mod translate_sections; #[cfg(test)] mod tests; -pub use module::translate; -pub use module::TranslatedModule; +pub use module::{translate, TranslatedModule, ExecutableModule}; diff --git a/src/module.rs b/src/module.rs index 5fb77bf1b1..ccfda9df4b 100644 --- a/src/module.rs +++ b/src/module.rs @@ -1,21 +1,11 @@ use backend::TranslatedCodeSection; use error::Error; -use std::borrow::Cow; -use std::mem; +use std::{ + hash::{Hash, Hasher}, + mem, +}; use translate_sections; -use wasmparser::{FuncType, ModuleReader, SectionCode, Type}; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Signature { - params: Cow<'static, [Type]>, - returns: Cow<'static, [Type]>, -} - -impl PartialEq for Signature { - fn eq(&self, other: &FuncType) -> bool { - &self.params[..] == &other.params[..] && &self.returns[..] == &other.returns[..] - } -} +use wasmparser::{FuncType, MemoryType, ModuleReader, SectionCode, TableType, Type}; pub trait AsValueType { const TYPE: Type; @@ -51,20 +41,28 @@ impl AsValueType for f64 { const TYPE: Type = Type::F64; } -pub trait FunctionArgs { - unsafe fn call(self, start: *const u8, vm_ctx: *const u8) -> T; +pub trait FunctionArgs { + type FuncType; + + unsafe fn call(self, func: Self::FuncType, vm_ctx: *const u8) -> O; + fn into_func(start: *const u8) -> Self::FuncType; } -type VmCtx = u64; +type VmCtxPtr = u64; macro_rules! impl_function_args { ($first:ident $(, $rest:ident)*) => { - impl<$first, $($rest),*> FunctionArgs for ($first, $($rest),*) { + impl FunctionArgs for ($first, $($rest),*) { + type FuncType = unsafe extern "sysv64" fn(VmCtxPtr, $first $(, $rest)*) -> Output; + #[allow(non_snake_case)] - unsafe fn call(self, start: *const u8, vm_ctx: *const u8) -> T { - let func = mem::transmute::<_, extern "sysv64" fn(VmCtx, $first $(, $rest)*) -> T>(start); + unsafe fn call(self, func: Self::FuncType, vm_ctx: *const u8) -> Output { let ($first, $($rest),*) = self; - func(vm_ctx as VmCtx, $first $(, $rest)*) + func(vm_ctx as VmCtxPtr, $first $(, $rest)*) + } + + fn into_func(start: *const u8) -> Self::FuncType { + unsafe { mem::transmute(start) } } } @@ -75,10 +73,15 @@ macro_rules! impl_function_args { impl_function_args!($($rest),*); }; () => { - impl FunctionArgs for () { - unsafe fn call(self, start: *const u8, vm_ctx: *const u8) -> T { - let func = mem::transmute::<_, extern "sysv64" fn(VmCtx) -> T>(start); - func(vm_ctx as VmCtx) + impl FunctionArgs for () { + type FuncType = unsafe extern "sysv64" fn(VmCtxPtr) -> Output; + + unsafe fn call(self, func: Self::FuncType, vm_ctx: *const u8) -> Output { + func(vm_ctx as VmCtxPtr) + } + + fn into_func(start: *const u8) -> Self::FuncType { + unsafe { mem::transmute(start) } } } @@ -90,58 +93,62 @@ macro_rules! impl_function_args { impl_function_args!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S); -#[derive(Default, Debug)] +#[derive(Default)] pub struct TranslatedModule { translated_code_section: Option, types: FuncTyStore, - // Note: This vector should never be deallocated or reallocated or the pointer - // to its contents otherwise invalidated while the JIT'd code is still - // callable. // TODO: Should we wrap this in a `Mutex` so that calling functions from multiple // threads doesn't cause data races? - memory: Option>, + table: Option<(TableType, Vec)>, + memory: Option, } -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum ExecutionError { - FuncIndexOutOfBounds, - TypeMismatch, +fn quickhash(h: H) -> u64 { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + h.hash(&mut hasher); + hasher.finish() } impl TranslatedModule { - // For testing only. - // TODO: Handle generic signatures. - pub fn execute_func( - &self, - func_idx: u32, - args: Args, - ) -> Result { - let code_section = self - .translated_code_section - .as_ref() - .expect("no code section"); + pub fn instantiate(mut self) -> ExecutableModule { + use std::alloc::{self, Layout}; - if func_idx as usize >= self.types.func_ty_indicies.len() { - return Err(ExecutionError::FuncIndexOutOfBounds); + let slice = self + .table + .as_mut() + .map(|&mut (_, ref mut initial)| { + initial.shrink_to_fit(); + let out = BoxSlice { + ptr: initial.as_mut_ptr(), + len: initial.len(), + }; + mem::forget(mem::replace(initial, Default::default())); + out + }) + .unwrap_or(BoxSlice { + ptr: std::ptr::NonNull::dangling().as_ptr(), + len: 0, + }); + + let mem_size = self.memory.map(|m| m.limits.initial).unwrap_or(0) as usize; + let layout = Layout::new::() + .extend(Layout::array::(mem_size * WASM_PAGE_SIZE).unwrap()) + .unwrap() + .0; + + let ptr = unsafe { alloc::alloc_zeroed(layout) } as *mut VmCtx; + + unsafe { + *ptr = VmCtx { + table: slice, + mem_size, + } } - let type_ = self.types.func_type(func_idx); - - if (&type_.params[..], &type_.returns[..]) != (Args::TYPE_LIST, T::TYPE_LIST) { - return Err(ExecutionError::TypeMismatch); + ExecutableModule { + module: self, + context: Allocation { ptr, layout }, } - - let start_buf = code_section.func_start(func_idx as usize); - - Ok(unsafe { - args.call( - start_buf, - self.memory - .as_ref() - .map(|b| b.as_ptr()) - .unwrap_or(std::ptr::null()), - ) - }) } pub fn disassemble(&self) { @@ -152,24 +159,144 @@ impl TranslatedModule { } } +struct Allocation { + ptr: *mut T, + layout: std::alloc::Layout, +} + +unsafe impl Send for Allocation where T: Send {} +unsafe impl Sync for Allocation where T: Sync {} + +impl Drop for Allocation { + fn drop(&mut self) { + if mem::needs_drop::() { + unsafe { std::ptr::drop_in_place::(self.ptr) }; + } + + unsafe { std::alloc::dealloc(self.ptr as _, self.layout) }; + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ExecutionError { + FuncIndexOutOfBounds, + TypeMismatch, +} + +pub struct ExecutableModule { + module: TranslatedModule, + context: Allocation, +} + +impl ExecutableModule { + // For testing only. + // TODO: Handle generic signatures. + pub fn execute_func + TypeList, T: TypeList>( + &self, + func_idx: u32, + args: Args, + ) -> Result { + let module = &self.module; + let code_section = module + .translated_code_section + .as_ref() + .expect("no code section"); + + if func_idx as usize >= module.types.func_ty_indicies.len() { + return Err(ExecutionError::FuncIndexOutOfBounds); + } + + let type_ = module.types.func_type(func_idx); + + if (&type_.params[..], &type_.returns[..]) != (Args::TYPE_LIST, T::TYPE_LIST) { + return Err(ExecutionError::TypeMismatch); + } + + let start_buf = code_section.func_start(func_idx as usize); + + Ok(unsafe { + args.call( + Args::into_func(start_buf), + self.context.ptr as *const VmCtx as *const u8, + ) + }) + } + + pub fn disassemble(&self) { + self.module.disassemble(); + } +} + +type FuncRef = unsafe extern "sysv64" fn(); + +#[repr(C)] +pub struct RuntimeFunc { + sig_hash: u32, + func_start: FuncRef, +} + +#[repr(C)] +struct BoxSlice { + len: usize, + ptr: *mut T, +} + +#[repr(C)] +pub struct VmCtx { + table: BoxSlice, + mem_size: usize, +} + +unsafe impl Send for VmCtx {} +unsafe impl Sync for VmCtx {} + +impl VmCtx { + pub fn offset_of_memory() -> usize { + mem::size_of::() + } +} + +impl Drop for BoxSlice { + fn drop(&mut self) { + unsafe { Vec::from_raw_parts(self.ptr, self.len, self.len) }; + } +} + #[derive(Default, Debug)] pub struct FuncTyStore { types: Vec, func_ty_indicies: Vec, } +const WASM_PAGE_SIZE: usize = 65_536; + impl FuncTyStore { + pub fn func_count(&self) -> usize { + self.func_ty_indicies.len() + } + + pub fn func_type_index(&self, func_idx: u32) -> u32 { + self.func_ty_indicies[func_idx as usize] + } + + pub fn signature(&self, index: u32) -> &FuncType { + &self.types[index as usize] + } + pub fn func_type(&self, func_idx: u32) -> &FuncType { - // TODO: This assumes that there is no imported functions. - let func_ty_idx = self.func_ty_indicies[func_idx as usize]; - &self.types[func_ty_idx as usize] + // TODO: This assumes that there are no imported functions. + self.signature(self.func_type_index(func_idx)) } // TODO: type of a global } +pub fn translate(data: &[u8]) -> Result { + translate_only(data).map(|m| m.instantiate()) +} + /// Translate from a slice of bytes holding a wasm module. -pub fn translate(data: &[u8]) -> Result { +pub fn translate_only(data: &[u8]) -> Result { let mut reader = ModuleReader::new(data)?; let mut output = TranslatedModule::default(); @@ -214,7 +341,9 @@ pub fn translate(data: &[u8]) -> Result { if let SectionCode::Table = section.code { let tables = section.get_table_section_reader()?; - translate_sections::table(tables)?; + let tables = translate_sections::table(tables)?; + + assert!(tables.len() <= 1); reader.skip_custom_sections()?; if reader.eof() { @@ -235,7 +364,7 @@ pub fn translate(data: &[u8]) -> Result { if !mem.is_empty() { let mem = mem[0]; assert_eq!(Some(mem.limits.initial), mem.limits.maximum); - output.memory = Some(vec![0; mem.limits.initial as usize * 65_536]); + output.memory = Some(mem); } reader.skip_custom_sections()?; diff --git a/src/tests.rs b/src/tests.rs index c0fa89d0f7..3df0d6e852 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,7 +1,7 @@ -use super::{module::ExecutionError, translate, TranslatedModule}; +use super::{module::ExecutionError, translate, ExecutableModule}; use wabt; -fn translate_wat(wat: &str) -> TranslatedModule { +fn translate_wat(wat: &str) -> ExecutableModule { let wasm = wabt::wat2wasm(wat).unwrap(); let compiled = translate(&wasm).unwrap(); compiled @@ -20,18 +20,18 @@ fn empty() { } mod op32 { - use super::{translate_wat, TranslatedModule}; + use super::{translate_wat, ExecutableModule}; macro_rules! binop_test { ($op:ident, $func:expr) => { mod $op { - use super::{translate_wat, TranslatedModule}; + use super::{translate_wat, ExecutableModule}; use std::sync::Once; const OP: &str = stringify!($op); lazy_static! { - static ref AS_PARAMS: TranslatedModule = translate_wat(&format!( + static ref AS_PARAMS: ExecutableModule = translate_wat(&format!( " (module (func (param i32) (param i32) (result i32) (i32.{op} (get_local 0) (get_local 1)))) @@ -101,7 +101,7 @@ mod op32 { } mod op64 { - use super::{translate_wat, TranslatedModule}; + use super::{translate_wat, ExecutableModule}; macro_rules! binop_test { ($op:ident, $func:expr) => { @@ -109,13 +109,13 @@ mod op64 { }; ($op:ident, $func:expr, $retty:ident) => { mod $op { - use super::{translate_wat, TranslatedModule}; + use super::{translate_wat, ExecutableModule}; const RETTY: &str = stringify!($retty); const OP: &str = stringify!($op); lazy_static! { - static ref AS_PARAMS: TranslatedModule = translate_wat(&format!(" + static ref AS_PARAMS: ExecutableModule = translate_wat(&format!(" (module (func (param i64) (param i64) (result {retty}) (i64.{op} (get_local 0) (get_local 1)))) ", retty = RETTY, op = OP)); @@ -200,7 +200,7 @@ quickcheck! { "#; lazy_static! { - static ref TRANSLATED: TranslatedModule = translate_wat(CODE); + static ref TRANSLATED: ExecutableModule = translate_wat(CODE); } let out = TRANSLATED.execute_func::<(u32, u32), u32>(0, (a, b)).unwrap(); @@ -227,7 +227,7 @@ quickcheck! { "#; lazy_static! { - static ref TRANSLATED: TranslatedModule = translate_wat(CODE); + static ref TRANSLATED: ExecutableModule = translate_wat(CODE); } let out = TRANSLATED.execute_func::<(u32, u32), u32>(0, (a, b)); @@ -613,7 +613,7 @@ quickcheck! { } lazy_static! { - static ref TRANSLATED: TranslatedModule = { + static ref TRANSLATED: ExecutableModule = { let out = translate_wat(CODE); out.disassemble(); out @@ -871,6 +871,66 @@ fn nested_storage_calls() { assert_eq!(translated.execute_func::<(), i32>(0, ()), Ok(1)); } + +#[test] +fn call_indirect() { + const CODE: &str = r#" +(module + (type $over-i64 (func (param i64) (result i64))) + + (table anyfunc + (elem + $dispatch $fac $fib + ) + ) + + (func $dispatch (param i32 i64) (result i64) + (call_indirect (type $over-i64) (get_local 1) (get_local 0)) + ) + + (func $fac (type $over-i64) + (if (result i64) (i64.eqz (get_local 0)) + (then (i64.const 1)) + (else + (i64.mul + (get_local 0) + (call_indirect (type $over-i64) + (i64.sub (get_local 0) (i64.const 1)) + (i32.const 1) + ) + ) + ) + ) + ) + + (func $fib (type $over-i64) + (if (result i64) (i64.le_u (get_local 0) (i64.const 1)) + (then (i64.const 1)) + (else + (i64.add + (call_indirect (type $over-i64) + (i64.sub (get_local 0) (i64.const 2)) + (i32.const 2) + ) + (call_indirect (type $over-i64) + (i64.sub (get_local 0) (i64.const 1)) + (i32.const 2) + ) + ) + ) + ) + ) +)"#; + + let wasm = wabt::wat2wasm(CODE).unwrap(); + let module = translate(&wasm).unwrap(); + + module.disassemble(); + + assert_eq!(module.execute_func::<(i32, i64), i64>(0, (1, 10)).unwrap(), 3628800); + assert_eq!(module.execute_func::<(i32, i64), i64>(0, (2, 10)).unwrap(), 89); +} + #[bench] fn bench_fibonacci_compile(b: &mut test::Bencher) { let wasm = wabt::wat2wasm(FIBONACCI).unwrap(); diff --git a/src/translate_sections.rs b/src/translate_sections.rs index 1eaf100f15..3e93fb64f2 100644 --- a/src/translate_sections.rs +++ b/src/translate_sections.rs @@ -7,7 +7,8 @@ use wasmparser::{ CodeSectionReader, Data, DataSectionReader, Element, ElementSectionReader, Export, ExportSectionReader, ExternalKind, FuncType, FunctionSectionReader, Global, GlobalSectionReader, GlobalType, Import, ImportSectionEntryType, ImportSectionReader, - MemorySectionReader, MemoryType, Operator, TableSectionReader, Type, TypeSectionReader, + MemorySectionReader, MemoryType, Operator, TableSectionReader, TableType, Type, + TypeSectionReader, }; /// Parses the Type section of the wasm module. @@ -35,11 +36,11 @@ pub fn function(functions: FunctionSectionReader) -> Result, Error> { } /// Parses the Table section of the wasm module. -pub fn table(tables: TableSectionReader) -> Result<(), Error> { - for entry in tables { - entry?; // TODO - } - Ok(()) +pub fn table(tables: TableSectionReader) -> Result, Error> { + tables + .into_iter() + .map(|r| r.map_err(Into::into)) + .collect() } /// Parses the Memory section of the wasm module.