diff --git a/src/backend.rs b/src/backend.rs index 325502d1d4..25b76ba360 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -9,7 +9,7 @@ use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, Executab use error::Error; use std::{iter, mem}; -use module::VmCtx; +use module::{RuntimeFunc, VmCtx}; /// Size of a pointer on the target in bytes. const WORD_SIZE: u32 = 8; @@ -45,6 +45,62 @@ const R14: u8 = 14; const R15: u8 = 15; const NUM_GPRS: u8 = 16; +extern "sysv64" fn println(len: u64, args: *const u8) { + println!("{}", unsafe { + std::str::from_utf8_unchecked(std::slice::from_raw_parts(args, len as usize)) + }); +} + +macro_rules! asm_println { + ($asm:expr, $($args:tt)*) => {{ + use std::mem; + + let mut args = format!($($args)*).into_bytes(); + + let len = args.len(); + let ptr = args.as_mut_ptr(); + mem::forget(args); + + dynasm!($asm + ; push rdi + ; push rsi + ; push rdx + ; push rcx + ; push r8 + ; push r9 + ; push r10 + ; push r11 + + ; mov rax, QWORD println as *const u8 as i64 + ; mov rdi, QWORD len as i64 + ; mov rsi, QWORD ptr as i64 + + ; mov r11, rsp + ; and r11, 0b1111 + ; test r11, r11 + ; jnz >with_adjusted_stack_ptr + + ; call rax + ; jmp >pop_rest + + ; with_adjusted_stack_ptr: + ; push 1 + ; call rax + ; pop r11 + + ; pop_rest: + ; pop r11 + ; pop r10 + ; pop r9 + ; pop r8 + ; pop rcx + ; pop rdx + ; pop rsi + ; pop rdi + ); + }} +} + impl GPRs { fn take(&mut self) -> GPR { let lz = self.bits.trailing_zeros(); @@ -1381,7 +1437,7 @@ impl Context<'_> { Value::Local(loc) => StackValue::Local(loc), Value::Immediate(i) => StackValue::Immediate(i), Value::Temp(gpr) => { - if self.block_state.regs.free_scratch() >= 2 { + if self.block_state.regs.free_scratch() >= 3 { StackValue::Temp(gpr) } else { self.block_state.depth.reserve(1); @@ -1489,7 +1545,6 @@ impl Context<'_> { } else { (self.block_state.regs.take_scratch_gpr(), true) }; - let offset = self.adjusted_offset(offset); dynasm!(self.asm ; mov Rq(reg), [rsp + offset] ); @@ -2088,54 +2143,63 @@ impl Context<'_> { self.push(Value::Temp(RAX)); } - pub fn call_indirect( - &mut self, - valid_indexes: impl IntoIterator, - arg_arity: u32, - return_arity: u32, - ) { + pub fn call_indirect(&mut self, signature_hash: u32, arg_arity: u32, return_arity: u32) { debug_assert!( return_arity == 0 || return_arity == 1, "We don't support multiple return yet" ); let callee = self.pop(); - let (callee, callee_needs_release) = self.into_reg(callee); + let callee = self.into_temp_reg(callee); - let vmctx = StackValue::Local(self.block_state.locals.vmctx_index()); - let count = self.block_state.stack.len(); + let vmctx_idx = self.block_state.locals.vmctx_index(); + let (vmctx_reg, should_release_vmctx_reg) = self.into_reg(Value::Local(vmctx_idx)); - let label = self.create_label(); - let index_reg = self.block_state.regs.take_scratch_gpr(); + let signature_matches = self.create_label(); + let temp0 = self.block_state.regs.take_scratch_gpr(); + let temp1 = self.block_state.regs.take_scratch_gpr(); - // TODO: Generate faster check using bitsets like GCC does? - for index in valid_indexes { - dynasm!(self.asm - ; lea Rq(index_reg), [=>self.func_starts[index as usize].1] - ; cmp Rd(callee), index as i32 - ; je =>label.0 - ); - } + dynasm!(self.asm + ; imul Rq(callee), Rq(callee), mem::size_of::() as i32 + ; mov Rq(temp0), [Rq(vmctx_reg) + VmCtx::offset_of_funcs_ptr() as i32] + ; mov Rd(temp1), [ + Rq(temp0) + + Rq(callee) + + RuntimeFunc::offset_of_sig_hash() as i32 + ] + ; cmp Rd(temp1), signature_hash as i32 + ; je =>signature_matches.0 + ); self.trap(); - self.define_label(label); - if callee_needs_release { - self.block_state.regs.release_scratch_gpr(callee); - } + self.define_label(signature_matches); + self.block_state.regs.release_scratch_gpr(temp1); // TODO: I believe that this can't cause quadratic runtime but I'm not // certain. + let vmctx = StackValue::Local(vmctx_idx); + + let count = self.block_state.stack.len(); self.block_state .stack .insert(count - arg_arity as usize, vmctx); let cleanup = self.pass_outgoing_args(arg_arity + 1, return_arity, true); dynasm!(self.asm - ; call Rq(index_reg) + ; call QWORD [ + Rq(temp0) + + Rq(callee) + + RuntimeFunc::offset_of_func_start() as i32 + ] ); - self.block_state.regs.release_scratch_gpr(index_reg); + self.block_state.regs.release_scratch_gpr(temp0); + self.block_state.regs.release_scratch_gpr(callee); + + if should_release_vmctx_reg { + self.block_state.regs.release_scratch_gpr(vmctx_reg); + } self.post_call_cleanup(cleanup); self.push_function_return(return_arity); @@ -2176,7 +2240,12 @@ impl Context<'_> { let arguments = arguments + 1; let (reg_args, locals_in_gprs) = ARGS_IN_GPRS.split_at((arguments as usize).min(ARGS_IN_GPRS.len())); - let reg_locals = &locals_in_gprs[..(locals as usize).min(locals_in_gprs.len())]; + let (reg_locals, temps) = + locals_in_gprs.split_at((locals as usize).min(locals_in_gprs.len())); + + for temp in temps { + self.block_state.regs.release_scratch_gpr(*temp); + } // We need space to store the register arguments if we need to call a function // and overwrite these registers so we add `reg_args.len()` diff --git a/src/function_body.rs b/src/function_body.rs index 5415f7d6f6..0289f3bcb7 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -1,6 +1,6 @@ use backend::*; use error::Error; -use module::FuncTyStore; +use module::{FuncTyStore, quickhash}; use wasmparser::{FunctionBody, Operator, Type}; // TODO: Use own declared `Type` enum. @@ -447,8 +447,7 @@ pub fn translate( // TODO: this implementation assumes that this function is locally defined. ctx.call_indirect( - (0..translation_ctx.func_count() as u32) - .filter(|i| translation_ctx.func_type_index(*i) == index), + quickhash(callee_ty) as u32, callee_ty.params.len() as u32, callee_ty.returns.len() as u32, ); diff --git a/src/module.rs b/src/module.rs index 8702eaef58..acc5b2caf1 100644 --- a/src/module.rs +++ b/src/module.rs @@ -99,11 +99,11 @@ pub struct TranslatedModule { types: FuncTyStore, // TODO: Should we wrap this in a `Mutex` so that calling functions from multiple // threads doesn't cause data races? - table: Option<(TableType, Vec)>, + table: Option<(TableType, Vec)>, memory: Option, } -fn quickhash(h: H) -> u64 { +pub fn quickhash(h: H) -> u64 { let mut hasher = std::collections::hash_map::DefaultHasher::new(); h.hash(&mut hasher); hasher.finish() @@ -113,22 +113,41 @@ impl TranslatedModule { pub fn instantiate(mut self) -> ExecutableModule { use std::alloc::{self, Layout}; - let slice = self - .table - .as_mut() - .map(|&mut (_, ref mut initial)| { - initial.shrink_to_fit(); - let out = BoxSlice { - ptr: initial.as_mut_ptr(), - len: initial.len(), - }; - mem::forget(mem::replace(initial, Default::default())); - out - }) - .unwrap_or(BoxSlice { - ptr: std::ptr::NonNull::dangling().as_ptr(), - len: 0, - }); + let slice = { + let code_section = self + .translated_code_section + .as_ref() + .expect("We don't currently support a table section without a code section"); + let types = &self.types; + + self.table + .as_mut() + .map(|&mut (_, ref mut idxs)| { + let mut initial = idxs + .iter() + .map(|i| { + let start = code_section.func_start(*i as _); + let ty = types.func_type(*i); + + RuntimeFunc { + func_start: start, + sig_hash: quickhash(ty) as u32, + } + }) + .collect::>(); + initial.shrink_to_fit(); + let out = BoxSlice { + ptr: initial.as_mut_ptr(), + len: initial.len(), + }; + mem::forget(initial); + out + }) + .unwrap_or(BoxSlice { + ptr: std::ptr::NonNull::dangling().as_ptr(), + len: 0, + }) + }; let mem_size = self.memory.map(|m| m.limits.initial).unwrap_or(0) as usize; let (layout, _mem_offset) = Layout::new::() @@ -138,6 +157,10 @@ impl TranslatedModule { let ctx = if mem_size > 0 || slice.len > 0 { let ptr = unsafe { alloc::alloc_zeroed(layout) } as *mut VmCtx; + if ptr.is_null() { + alloc::handle_alloc_error(layout); + } + unsafe { *ptr = VmCtx { table: slice, @@ -235,15 +258,23 @@ impl ExecutableModule { } } -type FuncRef = unsafe extern "sysv64" fn(); +type FuncRef = *const u8; -#[repr(C)] pub struct RuntimeFunc { sig_hash: u32, func_start: FuncRef, } -#[repr(C)] +impl RuntimeFunc { + pub fn offset_of_sig_hash() -> usize { + offset_of!(Self, sig_hash) + } + + pub fn offset_of_func_start() -> usize { + offset_of!(Self, func_start) + } +} + struct BoxSlice { len: usize, ptr: *mut T, @@ -283,10 +314,6 @@ pub struct FuncTyStore { const WASM_PAGE_SIZE: usize = 65_536; impl FuncTyStore { - pub fn func_count(&self) -> usize { - self.func_ty_indicies.len() - } - pub fn func_type_index(&self, func_idx: u32) -> u32 { self.func_ty_indicies[func_idx as usize] } @@ -311,6 +338,7 @@ pub fn translate(data: &[u8]) -> Result { pub fn translate_only(data: &[u8]) -> Result { let mut reader = ModuleReader::new(data)?; let mut output = TranslatedModule::default(); + let mut table = None; reader.skip_custom_sections()?; if reader.eof() { @@ -353,10 +381,12 @@ pub fn translate_only(data: &[u8]) -> Result { if let SectionCode::Table = section.code { let tables = section.get_table_section_reader()?; - let tables = translate_sections::table(tables)?; + let mut tables = translate_sections::table(tables)?; assert!(tables.len() <= 1); + table = tables.drain(..).next(); + reader.skip_custom_sections()?; if reader.eof() { return Ok(output); @@ -421,7 +451,12 @@ pub fn translate_only(data: &[u8]) -> Result { if let SectionCode::Element = section.code { let elements = section.get_element_section_reader()?; - translate_sections::element(elements)?; + let elements = translate_sections::element(elements)?; + + output.table = Some(( + table.expect("Element section with no table section"), + elements, + )); reader.skip_custom_sections()?; if reader.eof() { diff --git a/src/tests.rs b/src/tests.rs index 7f28b791ab..f08d66291a 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -268,24 +268,6 @@ mod op64 { binop_test!(ge_s, |a, b| if a >= b { 1 } else { 0 }, i32); } -quickcheck! { - fn relop_eq(a: u32, b: u32) -> bool { - static CODE: &str = r#" - (module - (func (param i32) (param i32) (result i32) (i32.eq (get_local 0) (get_local 1))) - ) - "#; - - lazy_static! { - static ref TRANSLATED: ExecutableModule = translate_wat(CODE); - } - - let out = TRANSLATED.execute_func::<(u32, u32), u32>(0, (a, b)).unwrap(); - - (a == b) == (out == 1) - } -} - quickcheck! { fn if_then_else(a: u32, b: u32) -> bool { const CODE: &str = r#" @@ -1003,7 +985,7 @@ fn call_indirect() { (table anyfunc (elem - $dispatch $fac $fib + $fac $fib ) ) @@ -1019,7 +1001,7 @@ fn call_indirect() { (get_local 0) (call_indirect (type $over-i64) (i64.sub (get_local 0) (i64.const 1)) - (i32.const 1) + (i32.const 0) ) ) ) @@ -1033,11 +1015,11 @@ fn call_indirect() { (i64.add (call_indirect (type $over-i64) (i64.sub (get_local 0) (i64.const 2)) - (i32.const 2) + (i32.const 1) ) (call_indirect (type $over-i64) (i64.sub (get_local 0) (i64.const 1)) - (i32.const 2) + (i32.const 1) ) ) ) @@ -1051,11 +1033,11 @@ fn call_indirect() { module.disassemble(); assert_eq!( - module.execute_func::<(i32, i64), i64>(0, (1, 10)).unwrap(), + module.execute_func::<(i32, i64), i64>(0, (0, 10)).unwrap(), 3628800 ); assert_eq!( - module.execute_func::<(i32, i64), i64>(0, (2, 10)).unwrap(), + module.execute_func::<(i32, i64), i64>(0, (1, 10)).unwrap(), 89 ); } diff --git a/src/translate_sections.rs b/src/translate_sections.rs index 3e93fb64f2..bfd342c397 100644 --- a/src/translate_sections.rs +++ b/src/translate_sections.rs @@ -37,10 +37,7 @@ pub fn function(functions: FunctionSectionReader) -> Result, Error> { /// Parses the Table section of the wasm module. pub fn table(tables: TableSectionReader) -> Result, Error> { - tables - .into_iter() - .map(|r| r.map_err(Into::into)) - .collect() + tables.into_iter().map(|r| r.map_err(Into::into)).collect() } /// Parses the Memory section of the wasm module. @@ -74,11 +71,37 @@ pub fn start(_index: u32) -> Result<(), Error> { } /// Parses the Element section of the wasm module. -pub fn element(elements: ElementSectionReader) -> Result<(), Error> { +pub fn element(elements: ElementSectionReader) -> Result, Error> { + let mut out = Vec::new(); + for entry in elements { - entry?; // TODO + let entry = entry?; + + assert_eq!(entry.table_index, 0); + let offset = { + let mut reader = entry.init_expr.get_operators_reader(); + let out = match reader.read() { + Ok(Operator::I32Const { value }) => value, + _ => panic!("We only support i32.const table init expressions right now"), + }; + + //reader.ensure_end()?; + + out + }; + + assert_eq!(offset, out.len() as i32); + + let elements = entry + .items + .get_items_reader()? + .into_iter() + .collect::, _>>()?; + + out.extend(elements); } - Ok(()) + + Ok(out) } /// Parses the Code section of the wasm module.