diff --git a/README.md b/README.md index 8900d4864b..a393542e62 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ Now obviously I'm not advocating for replacing FireFox's optimising compiler wit ## Specification compliance -It's hard to judge, since each test in the spec testsuite covers a wide range of features (to check their interactions), but currently 62 out of 74 of the spec suite tests pass when run in Wasmtime with Lightbeam as a backend. Here's the full test output: +It's hard to judge, since each test in the spec testsuite covers a wide range of features (to check their interactions), but currently 65 out of 74 of the spec suite tests pass when run in Wasmtime with Lightbeam as a backend. Here's the full test output: ``` running 74 tests @@ -178,17 +178,17 @@ test spec_testsuite::br_if ... ok test spec_testsuite::address ... ok test spec_testsuite::comments ... ok test spec_testsuite::const_ ... ok -test spec_testsuite::conversions ... FAILED -test spec_testsuite::custom ... ok -test spec_testsuite::custom_section ... ok test spec_testsuite::call ... ok -test spec_testsuite::br_table ... FAILED +test spec_testsuite::custom ... ok +test spec_testsuite::conversions ... FAILED +test spec_testsuite::custom_section ... ok test spec_testsuite::data ... ok -test spec_testsuite::exports ... ok +test spec_testsuite::br_table ... FAILED test spec_testsuite::elem ... ok +test spec_testsuite::call_indirect ... ok +test spec_testsuite::exports ... ok test spec_testsuite::endianness ... ok test spec_testsuite::f32_bitwise ... ok -test spec_testsuite::call_indirect ... ok test spec_testsuite::f64_bitwise ... ok test spec_testsuite::f32_cmp ... ok test spec_testsuite::fac ... ok @@ -197,38 +197,38 @@ test spec_testsuite::f32 ... ok test spec_testsuite::float_memory ... ok test spec_testsuite::f64_cmp ... ok test spec_testsuite::forward ... ok -test spec_testsuite::float_literals ... ok -test spec_testsuite::func_ptrs ... FAILED test spec_testsuite::float_misc ... ok +test spec_testsuite::func_ptrs ... ok +test spec_testsuite::float_literals ... ok test spec_testsuite::get_local ... FAILED -test spec_testsuite::float_exprs ... FAILED test spec_testsuite::func ... ok +test spec_testsuite::float_exprs ... FAILED test spec_testsuite::globals ... ok -test spec_testsuite::imports ... FAILED -test spec_testsuite::inline_module ... ok test spec_testsuite::i32 ... ok -test spec_testsuite::i64 ... ok +test spec_testsuite::inline_module ... ok test spec_testsuite::if_ ... ok -test spec_testsuite::int_literals ... ok +test spec_testsuite::i64 ... ok +test spec_testsuite::imports ... ok test spec_testsuite::labels ... ok +test spec_testsuite::int_literals ... ok test spec_testsuite::linking ... ok test spec_testsuite::int_exprs ... ok test spec_testsuite::loop_ ... ok test spec_testsuite::memory_redundancy ... ok -test spec_testsuite::memory_trap ... FAILED -test spec_testsuite::memory_grow ... FAILED test spec_testsuite::left_to_right ... ok test spec_testsuite::memory ... ok +test spec_testsuite::memory_trap ... ok test spec_testsuite::resizing ... ok +test spec_testsuite::memory_grow ... ok test spec_testsuite::return_minimal ... ok test spec_testsuite::select ... ok test spec_testsuite::return_ ... ok +test spec_testsuite::set_local ... FAILED test spec_testsuite::skip_stack_guard_page ... FAILED test spec_testsuite::nop ... ok -test spec_testsuite::set_local ... FAILED +test spec_testsuite::start ... ok test spec_testsuite::store_retval ... ok test spec_testsuite::stack ... ok -test spec_testsuite::start ... ok test spec_testsuite::token ... ok test spec_testsuite::switch ... ok test spec_testsuite::type_ ... ok @@ -240,11 +240,11 @@ test spec_testsuite::utf8_custom_section_id ... ok test spec_testsuite::utf8_import_field ... ok test spec_testsuite::utf8_import_module ... ok test spec_testsuite::utf8_invalid_encoding ... ok -test spec_testsuite::unreachable ... ok test spec_testsuite::tee_local ... FAILED +test spec_testsuite::unreachable ... ok test spec_testsuite::names ... ok -test result: FAILED. 61 passed; 14 failed; 0 ignored; 0 measured; 0 filtered out +test result: FAILED. 65 passed; 9 failed; 0 ignored; 0 measured; 0 filtered out ``` ## Getting involved diff --git a/src/backend.rs b/src/backend.rs index e81fa2d08a..e9c767fe45 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -406,14 +406,14 @@ impl Registers { } #[derive(Debug, Clone)] -pub struct CallingConvention { +pub struct BlockCallingConvention { pub stack_depth: StackDepth, pub arguments: Vec, } -impl CallingConvention { +impl BlockCallingConvention { pub fn function_start(args: impl IntoIterator) -> Self { - CallingConvention { + BlockCallingConvention { // We start and return the function with stack depth 1 since we must // allow space for the saved return address. stack_depth: StackDepth(1), @@ -941,6 +941,7 @@ macro_rules! conversion { dynasm!(self.asm ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), [rsp + offset] ); + ValueLocation::Reg(temp) } ValueLocation::Reg(_) => { @@ -951,6 +952,7 @@ macro_rules! conversion { dynasm!(self.asm ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), $in_reg_ty(reg.$in_reg_fn().unwrap()) ); + ValueLocation::Reg(temp) } }; @@ -1856,12 +1858,12 @@ macro_rules! store { } }; dynasm!(ctx.asm - ; cmp [ + ; cmp Rq(addr_reg.rq().unwrap()), [ Rq(reg.unwrap_or(vmctx).rq().unwrap()) + mem_offset + ctx.module_context.vmmemory_definition_current_length() as i32 - ], Rq(addr_reg.rq().unwrap()) - ; jna =>trap_label.0 + ] + ; jae =>trap_label.0 ); ctx.block_state.regs.release(addr_reg); } @@ -1879,7 +1881,6 @@ macro_rules! store { } let src = $match_offset(ctx, mem_ptr_reg, runtime_offset, offset, src); ctx.block_state.regs.release(mem_ptr_reg); - ctx.block_state.regs.release(src); } @@ -2300,6 +2301,7 @@ impl<'module, M: ModuleContext> Context<'module, M> { ; pop Rq(trash.rq().unwrap()) ); } + self.block_state.regs.release(trash); } self.block_state.depth = depth; @@ -2320,7 +2322,7 @@ impl<'module, M: ModuleContext> Context<'module, M> { } } - pub fn pass_block_args(&mut self, cc: &CallingConvention) { + pub fn pass_block_args(&mut self, cc: &BlockCallingConvention) { let args = &cc.arguments; for (remaining, &dst) in args .iter() @@ -2347,7 +2349,7 @@ impl<'module, M: ModuleContext> Context<'module, M> { /// Puts all stack values into "real" locations so that they can i.e. be set to different /// values on different iterations of a loop - pub fn serialize_args(&mut self, count: u32) -> CallingConvention { + pub fn serialize_args(&mut self, count: u32) -> BlockCallingConvention { let mut out = Vec::with_capacity(count as _); // TODO: We can make this more efficient now that `pop` isn't so complicated @@ -2362,24 +2364,39 @@ impl<'module, M: ModuleContext> Context<'module, M> { out.reverse(); - CallingConvention { + BlockCallingConvention { stack_depth: self.block_state.depth, arguments: out, } } pub fn get_global(&mut self, global_idx: u32) { - let offset = self.module_context.vmctx_vmglobal_definition( + let (reg, offset) = self.module_context .defined_global_index(global_idx) - .expect("TODO: Support imported globals"), - ); + .map(|defined_global_index| { + (None, self.module_context + .vmctx_vmglobal_definition(defined_global_index)) + }) + .unwrap_or_else(|| { + let reg = self.block_state.regs.take(I64); + + dynasm!(self.asm + ; mov Rq(reg.rq().unwrap()), [ + Rq(VMCTX) + + self.module_context.vmctx_vmglobal_import_from(global_idx) as i32 + ] + ); + + (Some(reg), 0) + }); let out = self.block_state.regs.take(GPRType::Rq); + let vmctx = GPR::Rq(VMCTX); - // We always use `Rq` (even for floats) since the globals are not necessarily aligned to 128 bits + // TODO: Are globals necessarily aligned to 128 bits? We can load directly to an XMM reg if so dynasm!(self.asm - ; mov Rq(out.rq().unwrap()), [Rq(VMCTX) + offset as i32] + ; mov Rq(out.rq().unwrap()), [Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32] ); self.push(ValueLocation::Reg(out)); @@ -2561,7 +2578,7 @@ impl<'module, M: ModuleContext> Context<'module, M> { self.block_state.depth = state.depth; } - pub fn apply_cc(&mut self, cc: &CallingConvention) { + pub fn apply_cc(&mut self, cc: &BlockCallingConvention) { let stack = cc.arguments.iter(); self.block_state.stack = Vec::with_capacity(stack.size_hint().0); @@ -4010,7 +4027,15 @@ impl<'module, M: ModuleContext> Context<'module, M> { args: impl IntoIterator, rets: impl IntoIterator, ) { - self.pass_outgoing_args(&arg_locs(args)); + self.block_state.depth.reserve(1); + dynasm!(self.asm + ; push Rq(VMCTX) + ); + let depth = self.block_state.depth.clone(); + + let locs = arg_locs(args); + + self.pass_outgoing_args(&locs); // 2 bytes for the 64-bit `mov` opcode + register ident, the rest is the immediate self.reloc_sink.reloc_external( (self.asm.offset().0 @@ -4028,8 +4053,20 @@ impl<'module, M: ModuleContext> Context<'module, M> { ; mov Rq(temp.rq().unwrap()), QWORD 0xdeadbeefdeadbeefu64 as i64 ; call Rq(temp.rq().unwrap()) ); + self.block_state.regs.release(temp); + + for i in locs { + self.free_value(i.into()); + } + self.push_function_returns(rets); + + self.set_stack_depth(depth); + dynasm!(self.asm + ; pop Rq(VMCTX) + ); + self.block_state.depth.free(1); } // TODO: Other memory indices @@ -4132,7 +4169,12 @@ impl<'module, M: ModuleContext> Context<'module, M> { }) .max() .unwrap_or(0); - let depth = self.block_state.depth.0 + total_stack_space; + let mut depth = self.block_state.depth.0 + total_stack_space; + + if depth & 1 != 0 { + self.set_stack_depth(StackDepth(self.block_state.depth.0 + 1)); + depth += 1; + } let mut pending = Vec::<(ValueLocation, ValueLocation)>::new(); @@ -4168,13 +4210,8 @@ impl<'module, M: ModuleContext> Context<'module, M> { } } - let mut try_count = 10; while !pending.is_empty() { - try_count -= 1; - - if try_count == 0 { - unimplemented!("We can't handle cycles in the register allocation right now"); - } + let start_len = pending.len(); for (src, dst) in mem::replace(&mut pending, vec![]) { if let ValueLocation::Reg(r) = dst { @@ -4188,6 +4225,13 @@ impl<'module, M: ModuleContext> Context<'module, M> { self.copy_value(&src, &mut { dst }); self.free_value(src); } + + if pending.len() == start_len { + unimplemented!( + "We can't handle cycles in the register allocator: {:?}", + pending + ); + } } self.set_stack_depth(StackDepth(depth)); @@ -4382,6 +4426,8 @@ impl<'module, M: ModuleContext> Context<'module, M> { ; call Rq(callee.rq().unwrap()) ); + self.block_state.regs.release(callee); + for i in locs { self.free_value(i.into()); } @@ -4400,7 +4446,8 @@ impl<'module, M: ModuleContext> Context<'module, M> { /// Writes the function prologue and stores the arguments as locals pub fn start_function(&mut self, params: impl IntoIterator) { let locs = Vec::from_iter(arg_locs(params)); - self.apply_cc(&CallingConvention::function_start(locs)); + + self.apply_cc(&BlockCallingConvention::function_start(locs)); } pub fn ret(&mut self) { @@ -4443,7 +4490,7 @@ impl<'module, M: ModuleContext> Context<'module, M> { self.align(16); self.define_label(l); dynasm!(self.asm - ; .dword -2147483648 + ; .dword SIGN_MASK_F32 as i32 ); self.labels.neg_const_f32 = Some(Pending::defined(l)); } @@ -4457,8 +4504,7 @@ impl<'module, M: ModuleContext> Context<'module, M> { self.align(16); self.define_label(l); dynasm!(self.asm - ; .dword 0 - ; .dword -2147483648 + ; .qword SIGN_MASK_F64 as i64 ); self.labels.neg_const_f64 = Some(Pending::defined(l)); } @@ -4472,7 +4518,7 @@ impl<'module, M: ModuleContext> Context<'module, M> { self.align(16); self.define_label(l); dynasm!(self.asm - ; .dword 2147483647 + ; .dword (!SIGN_MASK_F32) as i32 ); self.labels.abs_const_f32 = Some(Pending::defined(l)); } @@ -4486,7 +4532,7 @@ impl<'module, M: ModuleContext> Context<'module, M> { self.align(16); self.define_label(l); dynasm!(self.asm - ; .qword 9223372036854775807 + ; .qword (!SIGN_MASK_F64) as i64 ); self.labels.abs_const_f64 = Some(Pending::defined(l)); } diff --git a/src/function_body.rs b/src/function_body.rs index bc077a17b6..a357c3400f 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -10,7 +10,7 @@ use std::{collections::HashMap, convert::TryInto, hash::Hash}; #[derive(Debug)] struct Block { label: BrTarget