diff --git a/Cargo.toml b/Cargo.toml index 5e8c250527..3b95e3f59b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ readme = "README.md" categories = ["wasm"] keywords = ["webassembly", "wasm", "compile", "compiler", "jit"] publish = false +edition = "2018" [dependencies] smallvec = "0.6" diff --git a/README.md b/README.md index dee79b196c..0c61588199 100644 --- a/README.md +++ b/README.md @@ -6,86 +6,88 @@ Lightbeam is an optimising one-pass streaming compiler for WebAssembly, intended ## Specification compliance -It's hard to judge, since each test in the spec testsuite covers a wide range of features (to check their interactions), but currently 28 out of 77 of the spec suite tests pass when run in Wasmtime with Lightbeam as a backend. Here's the full test output: +It's hard to judge, since each test in the spec testsuite covers a wide range of features (to check their interactions), but currently 31 out of 77 of the spec suite tests pass when run in Wasmtime with Lightbeam as a backend. Here's the full test output: ``` running 76 tests -test misc_testsuite::stack_overflow ... ok -test spec_testsuite::align ... FAILED -test spec_testsuite::binary ... ok -test misc_testsuite::misc_traps ... ok -test spec_testsuite::address ... FAILED -test spec_testsuite::br_if ... FAILED -test spec_testsuite::break_drop ... ok -test spec_testsuite::block ... FAILED -test spec_testsuite::call ... FAILED -test spec_testsuite::comments ... ok -test spec_testsuite::call_indirect ... FAILED -test spec_testsuite::const_ ... ok -test spec_testsuite::custom ... ok -test spec_testsuite::custom_section ... ok -test spec_testsuite::data ... ok -test spec_testsuite::elem ... FAILED -test spec_testsuite::conversions ... FAILED -test spec_testsuite::endianness ... FAILED -test spec_testsuite::exports ... ok -test spec_testsuite::f32_bitwise ... FAILED -test spec_testsuite::br ... ok -test spec_testsuite::br_table ... FAILED -test spec_testsuite::f32 ... FAILED -test spec_testsuite::f64_bitwise ... FAILED -test spec_testsuite::f32_cmp ... FAILED -test spec_testsuite::fac ... ok -test spec_testsuite::f64 ... FAILED -test spec_testsuite::float_memory ... FAILED -test spec_testsuite::float_literals ... FAILED -test spec_testsuite::forward ... ok -test spec_testsuite::func ... FAILED -test spec_testsuite::func_ptrs ... FAILED -test spec_testsuite::float_misc ... FAILED -test spec_testsuite::get_local ... FAILED -test spec_testsuite::globals ... FAILED -test spec_testsuite::i64 ... FAILED -test spec_testsuite::i32 ... FAILED -test spec_testsuite::float_exprs ... FAILED -test spec_testsuite::inline_module ... ok -test spec_testsuite::if_ ... FAILED -test spec_testsuite::imports ... FAILED -test spec_testsuite::int_exprs ... FAILED -test spec_testsuite::left_to_right ... FAILED -test spec_testsuite::linking ... FAILED -test spec_testsuite::labels ... ok -test spec_testsuite::int_literals ... ok -test spec_testsuite::loop_ ... FAILED -test spec_testsuite::f64_cmp ... FAILED -test spec_testsuite::memory ... FAILED -test spec_testsuite::memory_redundancy ... FAILED -test spec_testsuite::memory_grow ... FAILED -test spec_testsuite::resizing ... FAILED -test spec_testsuite::memory_trap ... FAILED -test spec_testsuite::return_minimal ... ok -test spec_testsuite::nop ... FAILED -test spec_testsuite::set_local ... FAILED -test spec_testsuite::select ... FAILED -test spec_testsuite::stack ... FAILED -test spec_testsuite::start ... FAILED -test spec_testsuite::store_retval ... ok -test spec_testsuite::switch ... FAILED -test spec_testsuite::tee_local ... FAILED -test spec_testsuite::token ... ok -test spec_testsuite::traps ... FAILED -test spec_testsuite::skip_stack_guard_page ... FAILED -test spec_testsuite::type_ ... ok -test spec_testsuite::typecheck ... ok -test spec_testsuite::unreached_invalid ... ok -test spec_testsuite::unwind ... FAILED -test spec_testsuite::return_ ... ok +test misc_testsuite::stack_overflow ... ok +test misc_testsuite::misc_traps ... ok +test spec_testsuite::binary ... ok +test spec_testsuite::align ... FAILED +test spec_testsuite::block ... FAILED +test spec_testsuite::br_if ... FAILED +test spec_testsuite::break_drop ... ok +test spec_testsuite::call ... FAILED +test spec_testsuite::call_indirect ... FAILED +test spec_testsuite::comments ... ok +test spec_testsuite::address ... FAILED +test spec_testsuite::const_ ... ok +test spec_testsuite::custom ... ok +test spec_testsuite::custom_section ... ok +test spec_testsuite::data ... ok +test spec_testsuite::elem ... FAILED +test spec_testsuite::conversions ... FAILED +test spec_testsuite::endianness ... FAILED +test spec_testsuite::br ... ok +test spec_testsuite::exports ... ok +test spec_testsuite::f32_bitwise ... FAILED +test spec_testsuite::br_table ... FAILED +test spec_testsuite::f64_bitwise ... FAILED +test spec_testsuite::f32 ... FAILED +test spec_testsuite::f32_cmp ... FAILED +test spec_testsuite::fac ... ok +test spec_testsuite::float_literals ... FAILED +test spec_testsuite::f64 ... FAILED +test spec_testsuite::float_misc ... FAILED +test spec_testsuite::forward ... ok +test spec_testsuite::f64_cmp ... FAILED +test spec_testsuite::func_ptrs ... FAILED +test spec_testsuite::get_local ... FAILED +test spec_testsuite::float_memory ... ok +test spec_testsuite::globals ... FAILED +test spec_testsuite::float_exprs ... FAILED +test spec_testsuite::i64 ... FAILED +test spec_testsuite::i32 ... FAILED +test spec_testsuite::imports ... FAILED +test spec_testsuite::inline_module ... ok +test spec_testsuite::if_ ... FAILED +test spec_testsuite::int_exprs ... FAILED +test spec_testsuite::labels ... ok +test spec_testsuite::left_to_right ... FAILED +test spec_testsuite::int_literals ... ok +test spec_testsuite::linking ... FAILED +test spec_testsuite::func ... FAILED +test spec_testsuite::memory_grow ... FAILED +test spec_testsuite::loop_ ... FAILED +test spec_testsuite::memory_redundancy ... ok +test spec_testsuite::memory ... FAILED +test spec_testsuite::memory_trap ... FAILED +test spec_testsuite::resizing ... FAILED +test spec_testsuite::nop ... FAILED +test spec_testsuite::return_minimal ... ok +test spec_testsuite::set_local ... FAILED +test spec_testsuite::select ... FAILED +test spec_testsuite::stack ... FAILED +test spec_testsuite::start ... FAILED +test spec_testsuite::store_retval ... ok +test spec_testsuite::skip_stack_guard_page ... FAILED +test spec_testsuite::switch ... ok +test spec_testsuite::token ... ok +test spec_testsuite::tee_local ... FAILED +test spec_testsuite::type_ ... ok +test spec_testsuite::traps ... FAILED +test spec_testsuite::typecheck ... ok +test spec_testsuite::unreached_invalid ... ok +test spec_testsuite::unwind ... FAILED test spec_testsuite::utf8_custom_section_id ... ok -test spec_testsuite::utf8_import_field ... ok -test spec_testsuite::utf8_import_module ... ok -test spec_testsuite::utf8_invalid_encoding ... ok -test spec_testsuite::unreachable ... ok -test spec_testsuite::names ... FAILED +test spec_testsuite::utf8_import_field ... ok +test spec_testsuite::utf8_import_module ... ok +test spec_testsuite::utf8_invalid_encoding ... ok +test spec_testsuite::return_ ... ok +test spec_testsuite::unreachable ... ok +test spec_testsuite::names ... FAILED + +test result: FAILED. 31 passed; 45 failed; 0 ignored; 0 measured; 0 filtered out ``` ## Getting involved diff --git a/src/backend.rs b/src/backend.rs index 171388e9c1..a26d738fd9 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -1,13 +1,13 @@ #![allow(dead_code)] // for now -use microwasm::{BrTarget, SignlessType, Type, F32, F64, I32, I64}; +use crate::microwasm::{BrTarget, SignlessType, Type, F32, F64, I32, I64}; use self::registers::*; +use crate::error::Error; +use crate::microwasm::Value; +use crate::module::{ModuleContext, RuntimeFunc}; use dynasmrt::x64::Assembler; use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer}; -use error::Error; -use microwasm::Value; -use module::{ModuleContext, RuntimeFunc}; use std::{ iter::{self, FromIterator}, mem, @@ -372,8 +372,8 @@ impl Registers { #[derive(Debug, Clone)] pub struct CallingConvention { - stack_depth: StackDepth, - arguments: Vec, + pub stack_depth: StackDepth, + pub arguments: Vec, } impl CallingConvention { @@ -570,7 +570,7 @@ impl TranslatedCodeSection { } pub fn disassemble(&self) { - ::disassemble::disassemble(&*self.exec_buf).unwrap(); + crate::disassemble::disassemble(&*self.exec_buf).unwrap(); } } @@ -1275,8 +1275,8 @@ macro_rules! binop { } macro_rules! load { - (@inner $name:ident, $reg_ty:ident, $emit_fn:expr) => { - pub fn $name(&mut self, ty: impl Into, offset: u32) -> Result<(), Error> { + (@inner $name:ident, $rtype:expr, $reg_ty:ident, $emit_fn:expr) => { + pub fn $name(&mut self, offset: u32) { fn load_to_reg<_M: ModuleContext>( ctx: &mut Context<_M>, dst: GPR, @@ -1291,11 +1291,9 @@ macro_rules! load { ctx.block_state.regs.release(mem_ptr_reg); } - assert!(offset <= i32::max_value() as u32); - let base = self.pop(); - let temp = self.block_state.regs.take(ty); + let temp = self.block_state.regs.take($rtype); match base { ValueLocation::Immediate(i) => { @@ -1309,54 +1307,54 @@ macro_rules! load { } self.push(ValueLocation::Reg(temp)); - - Ok(()) } }; - ($name:ident, $reg_ty:ident, NONE) => { + ($name:ident, $rtype:expr, $reg_ty:ident, NONE, $rq_instr:ident, $ty:ident) => { load!(@inner $name, + $rtype, $reg_ty, |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32| { match runtime_offset { Ok(imm) => { dynasm!(ctx.asm - ; mov $reg_ty(dst.rq().unwrap()), [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] + ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] ); } Err(offset_reg) => { dynasm!(ctx.asm - ; mov $reg_ty(dst.rq().unwrap()), [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] + ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] ); } } } ); }; - ($name:ident, $reg_ty:ident, $xmm_instr:ident) => { + ($name:ident, $rtype:expr, $reg_ty:ident, $xmm_instr:ident, $rq_instr:ident, $ty:ident) => { load!(@inner $name, + $rtype, $reg_ty, |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32| { match (dst, runtime_offset) { (GPR::Rq(r), Ok(imm)) => { dynasm!(ctx.asm - ; mov $reg_ty(r), [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] + ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] ); } (GPR::Rx(r), Ok(imm)) => { dynasm!(ctx.asm - ; $xmm_instr Rx(r), [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] + ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] ); } (GPR::Rq(r), Err(offset_reg)) => { dynasm!(ctx.asm - ; mov $reg_ty(r), [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] + ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] ); } (GPR::Rx(r), Err(offset_reg)) => { dynasm!(ctx.asm - ; $xmm_instr Rx(r), [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] + ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] ); } } @@ -1367,7 +1365,7 @@ macro_rules! load { macro_rules! store { (@inner $name:ident, $int_reg_ty:ident, $match_offset:expr, $size:ident) => { - pub fn $name(&mut self, offset: u32) -> Result<(), Error> { + pub fn $name(&mut self, offset: u32) { fn store_from_reg<_M: ModuleContext>( ctx: &mut Context<_M>, src: GPR, @@ -1401,8 +1399,6 @@ macro_rules! store { self.block_state.regs.release(gpr); } } - - Ok(()) } }; ($name:ident, $int_reg_ty:ident, NONE, $size:ident) => { @@ -1424,7 +1420,7 @@ macro_rules! store { ); } } - + src_reg }, $size @@ -1496,11 +1492,11 @@ impl TryInto for i64 { #[derive(Debug, Clone)] pub struct VirtualCallingConvention { - stack: Stack, - depth: StackDepth, + pub stack: Stack, + pub depth: StackDepth, } -impl Context<'_, M> { +impl<'module, M: ModuleContext> Context<'module, M> { pub fn debug(&mut self, d: std::fmt::Arguments) { asm_println!(self.asm, "{}", d); } @@ -1628,10 +1624,11 @@ impl Context<'_, M> { /// Pops i32 predicate and branches to the specified label /// if the predicate is equal to zero. - pub fn br_if_false(&mut self, label: Label, f: impl FnOnce(&mut Self)) { + pub fn br_if_false(&mut self, target: impl Into>, pass_args: impl FnOnce(&mut Self)) { let val = self.pop(); + let label = target.into().label().map(|c| *c).unwrap_or_else(|| self.ret_label()); - f(self); + pass_args(self); let predicate = self.into_reg(I32, val); @@ -1645,10 +1642,11 @@ impl Context<'_, M> { /// Pops i32 predicate and branches to the specified label /// if the predicate is not equal to zero. - pub fn br_if_true(&mut self, label: Label, f: impl FnOnce(&mut Self)) { + pub fn br_if_true(&mut self, target: impl Into>, pass_args: impl FnOnce(&mut Self)) { let val = self.pop(); + let label = target.into().label().map(|c| *c).unwrap_or_else(|| self.ret_label()); - f(self); + pass_args(self); let predicate = self.into_reg(I32, val); @@ -1661,10 +1659,13 @@ impl Context<'_, M> { } /// Branch unconditionally to the specified label. - pub fn br(&mut self, label: Label) { - dynasm!(self.asm - ; jmp =>label.0 - ); + pub fn br(&mut self, label: impl Into>) { + match label.into() { + BrTarget::Return => self.ret(), + BrTarget::Label(label) => dynasm!(self.asm + ; jmp =>label.0 + ), + } } /// If `default` is `None` then the default is just continuing execution @@ -1802,6 +1803,7 @@ impl Context<'_, M> { pub fn serialize_args(&mut self, count: u32) -> CallingConvention { let mut out = Vec::with_capacity(count as _); + // TODO: We can make this more efficient now that `pop` isn't so complicated for _ in 0..count { let val = self.pop(); // TODO: We can use stack slots for values already on the stack but we @@ -1820,26 +1822,30 @@ impl Context<'_, M> { } fn immediate_to_reg(&mut self, reg: GPR, val: Value) { - match reg { - GPR::Rq(r) => { - let val = val.as_bytes(); - if (val as u64) <= u32::max_value() as u64 { - dynasm!(self.asm - ; mov Rd(r), val as i32 - ); - } else { - dynasm!(self.asm - ; mov Rq(r), QWORD val - ); + if val.as_bytes() == 0 { + self.zero_reg(reg); + } else { + match reg { + GPR::Rq(r) => { + let val = val.as_bytes(); + if (val as u64) <= u32::max_value() as u64 { + dynasm!(self.asm + ; mov Rd(r), val as i32 + ); + } else { + dynasm!(self.asm + ; mov Rq(r), QWORD val + ); + } + } + GPR::Rx(r) => { + let temp = self.block_state.regs.take(I64); + self.immediate_to_reg(temp, val); + dynasm!(self.asm + ; movq Rx(r), Rq(temp.rq().unwrap()) + ); + self.block_state.regs.release(temp); } - } - GPR::Rx(r) => { - let temp = self.block_state.regs.take(I64); - self.immediate_to_reg(temp, val); - dynasm!(self.asm - ; movq Rx(r), Rq(temp.rq().unwrap()) - ); - self.block_state.regs.release(temp); } } } @@ -1990,10 +1996,22 @@ impl Context<'_, M> { self.block_state.depth = cc.stack_depth; } - load!(load8, Rb, NONE); - load!(load16, Rw, NONE); - load!(load32, Rd, movd); - load!(load64, Rq, movq); + load!(i32_load, GPRType::Rq, Rd, movd, mov, DWORD); + load!(i64_load, GPRType::Rq, Rq, movq, mov, QWORD); + load!(f32_load, GPRType::Rx, Rd, movd, mov, DWORD); + load!(f64_load, GPRType::Rx, Rq, movq, mov, QWORD); + + load!(i32_load8_u, GPRType::Rq, Rd, NONE, movzx, BYTE); + load!(i32_load8_s, GPRType::Rq, Rd, NONE, movsx, BYTE); + load!(i32_load16_u, GPRType::Rq, Rd, NONE, movzx, WORD); + load!(i32_load16_s, GPRType::Rq, Rd, NONE, movsx, WORD); + + load!(i64_load8_u, GPRType::Rq, Rq, NONE, movzx, BYTE); + load!(i64_load8_s, GPRType::Rq, Rq, NONE, movsx, BYTE); + load!(i64_load16_u, GPRType::Rq, Rq, NONE, movzx, WORD); + load!(i64_load16_s, GPRType::Rq, Rq, NONE, movsx, WORD); + load!(i64_load32_u, GPRType::Rq, Rd, movd, mov, DWORD); + load!(i64_load32_s, GPRType::Rq, Rq, NONE, movsxd, DWORD); store!(store8, Rb, NONE, DWORD); store!(store16, Rw, NONE, QWORD); @@ -2072,10 +2090,11 @@ impl Context<'_, M> { /// Puts this value into a register so that it can be efficiently read // TODO: We should allow choosing which reg type we want to allocate here (Rx/Rq) fn into_reg(&mut self, ty: impl Into>, val: ValueLocation) -> GPR { + let ty = ty.into(); match val { - ValueLocation::Reg(r) if ty.into().map(|t| t == r.type_()).unwrap_or(true) => r, + ValueLocation::Reg(r) if ty.map(|t| t == r.type_()).unwrap_or(true) => r, val => { - let scratch = self.block_state.regs.take(ty.into().unwrap_or(GPRType::Rq)); + let scratch = self.block_state.regs.take(ty.unwrap_or(GPRType::Rq)); self.copy_value(&val, &mut ValueLocation::Reg(scratch)); self.free_value(val); @@ -2155,6 +2174,82 @@ impl Context<'_, M> { unop!(i64_clz, lzcnt, Rq, u64, |a: u64| a.leading_zeros() as u64); unop!(i32_ctz, tzcnt, Rd, u32, u32::trailing_zeros); unop!(i64_ctz, tzcnt, Rq, u64, |a: u64| a.trailing_zeros() as u64); + + pub fn i32_extend_u(&mut self) { + let val = self.pop(); + + self.free_value(val); + let new_reg = self.block_state.regs.take(I64); + + let out = if let ValueLocation::Immediate(imm) = val { + self.block_state.regs.release(new_reg); + ValueLocation::Immediate((imm.as_i32().unwrap() as u32 as u64).into()) + } else { + match val { + ValueLocation::Reg(GPR::Rx(rxreg)) => { + dynasm!(self.asm + ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg) + ); + } + ValueLocation::Reg(GPR::Rq(rqreg)) => { + dynasm!(self.asm + ; mov Rd(new_reg.rq().unwrap()), Rd(rqreg) + ); + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + + dynasm!(self.asm + ; mov Rd(new_reg.rq().unwrap()), [rsp + offset] + ); + } + _ => unreachable!(), + } + + ValueLocation::Reg(new_reg) + }; + + self.push(out); + } + + pub fn i32_extend_s(&mut self) { + let val = self.pop(); + + self.free_value(val); + let new_reg = self.block_state.regs.take(I64); + + let out = if let ValueLocation::Immediate(imm) = val { + self.block_state.regs.release(new_reg); + ValueLocation::Immediate((imm.as_i32().unwrap() as i64).into()) + } else { + match val { + ValueLocation::Reg(GPR::Rx(rxreg)) => { + dynasm!(self.asm + ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg) + ; movsxd Rq(new_reg.rq().unwrap()), Rd(new_reg.rq().unwrap()) + ); + } + ValueLocation::Reg(GPR::Rq(rqreg)) => { + dynasm!(self.asm + ; movsxd Rq(new_reg.rq().unwrap()), Rd(rqreg) + ); + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + + dynasm!(self.asm + ; movsxd Rq(new_reg.rq().unwrap()), DWORD [rsp + offset] + ); + } + _ => unreachable!(), + } + + ValueLocation::Reg(new_reg) + }; + + self.push(out); + } + unop!(i32_popcnt, popcnt, Rd, u32, u32::count_ones); unop!(i64_popcnt, popcnt, Rq, u64, |a: u64| a.count_ones() as u64); @@ -2260,7 +2355,11 @@ impl Context<'_, M> { divisor: ValueLocation, quotient: ValueLocation, do_div: impl FnOnce(&mut Self, ValueLocation), - ) -> (ValueLocation, ValueLocation, Option) { + ) -> ( + ValueLocation, + ValueLocation, + impl Iterator + Clone + 'module, + ) { let divisor = if ValueLocation::Reg(RAX) == divisor { let new_reg = self.block_state.regs.take(I32); self.copy_value(&divisor, &mut ValueLocation::Reg(new_reg)); @@ -2274,6 +2373,7 @@ impl Context<'_, M> { self.free_value(quotient); let should_save_rax = !self.block_state.regs.is_free(RAX); + let should_save_rdx = !self.block_state.regs.is_free(RDX); if let ValueLocation::Reg(r) = quotient { self.block_state.regs.mark_used(r); @@ -2289,16 +2389,42 @@ impl Context<'_, M> { None }; - do_div(self, divisor); + let saved_rdx = if should_save_rdx { + let new_reg = self.block_state.regs.take(I32); + dynasm!(self.asm + ; mov Rq(new_reg.rq().unwrap()), rdx + ); + Some(new_reg) + } else { + None + }; - (divisor, ValueLocation::Reg(RAX), saved_rax) + dynasm!(self.asm + ; cdq + ); + + do_div(self, divisor); + self.block_state.regs.mark_used(RAX); + + ( + divisor, + ValueLocation::Reg(RAX), + saved_rax + .map(|s| (s, RAX)) + .into_iter() + .chain(saved_rdx.map(|s| (s, RDX))), + ) } fn i32_full_div_u( &mut self, divisor: ValueLocation, quotient: ValueLocation, - ) -> (ValueLocation, ValueLocation, Option) { + ) -> ( + ValueLocation, + ValueLocation, + impl Iterator + Clone + 'module, + ) { self.i32_full_div(divisor, quotient, |this, divisor| match divisor { ValueLocation::Stack(offset) => { let offset = this.adjusted_offset(offset); @@ -2319,7 +2445,11 @@ impl Context<'_, M> { &mut self, divisor: ValueLocation, quotient: ValueLocation, - ) -> (ValueLocation, ValueLocation, Option) { + ) -> ( + ValueLocation, + ValueLocation, + impl Iterator + Clone + 'module, + ) { self.i32_full_div(divisor, quotient, |this, divisor| match divisor { ValueLocation::Stack(offset) => { let offset = this.adjusted_offset(offset); @@ -2336,6 +2466,14 @@ impl Context<'_, M> { }) } + fn cleanup_gprs(&mut self, gprs: impl Iterator) { + for (src, dst) in gprs { + self.copy_value(&ValueLocation::Reg(src), &mut ValueLocation::Reg(dst)); + self.block_state.regs.release(src); + self.block_state.regs.mark_used(dst); + } + } + // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when // emitting Wasm. pub fn i32_div_u(&mut self) { @@ -2355,15 +2493,10 @@ impl Context<'_, M> { return; } - let (div, rem, saved_rax) = self.i32_full_div_u(divisor, quotient); + let (div, rem, saved) = self.i32_full_div_u(divisor, quotient); self.free_value(rem); - - if let Some(saved) = saved_rax { - self.copy_value(&ValueLocation::Reg(saved), &mut ValueLocation::Reg(RAX)); - self.block_state.regs.release(saved); - self.block_state.regs.mark_used(RAX); - } + self.cleanup_gprs(saved); self.push(div); } @@ -2384,21 +2517,23 @@ impl Context<'_, M> { return; } - let (div, rem, saved_rax) = self.i32_full_div_u(divisor, quotient); + let (div, rem, saved) = self.i32_full_div_u(divisor, quotient); self.free_value(div); - let rem = if let Some(saved) = saved_rax { - let new_gpr = self.block_state.regs.take(I32); - self.copy_value(&ValueLocation::Reg(RAX), &mut ValueLocation::Reg(new_gpr)); - self.copy_value(&ValueLocation::Reg(saved), &mut ValueLocation::Reg(RAX)); - self.block_state.regs.release(saved); - ValueLocation::Reg(new_gpr) + let rem = if saved.clone().any(|(_, dst)| dst == RAX) { + let new = self.block_state.regs.take(I32); + dynasm!(self.asm + ; mov Rq(new.rq().unwrap()), rax + ); + new } else { - rem + RAX }; - self.push(rem); + self.cleanup_gprs(saved); + + self.push(ValueLocation::Reg(rem)); } pub fn i32_rem_s(&mut self) { @@ -2415,21 +2550,23 @@ impl Context<'_, M> { return; } - let (div, rem, saved_rax) = self.i32_full_div_s(divisor, quotient); + let (div, rem, saved) = self.i32_full_div_s(divisor, quotient); self.free_value(div); - let rem = if let Some(saved) = saved_rax { - let new_gpr = self.block_state.regs.take(I32); - self.copy_value(&ValueLocation::Reg(RAX), &mut ValueLocation::Reg(new_gpr)); - self.copy_value(&ValueLocation::Reg(saved), &mut ValueLocation::Reg(RAX)); - self.block_state.regs.release(saved); - ValueLocation::Reg(new_gpr) + let rem = if saved.clone().any(|(_, dst)| dst == RAX) { + let new = self.block_state.regs.take(I32); + dynasm!(self.asm + ; mov Rq(new.rq().unwrap()), rax + ); + new } else { - rem + RAX }; - self.push(rem); + self.cleanup_gprs(saved); + + self.push(ValueLocation::Reg(rem)); } // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when @@ -2451,15 +2588,10 @@ impl Context<'_, M> { return; } - let (div, rem, saved_rax) = self.i32_full_div_s(divisor, quotient); - + let (div, rem, saved) = self.i32_full_div_s(divisor, quotient); self.free_value(rem); - if let Some(saved) = saved_rax { - self.copy_value(&ValueLocation::Reg(saved), &mut ValueLocation::Reg(RAX)); - self.block_state.regs.release(saved); - self.block_state.regs.mark_used(RAX); - } + self.cleanup_gprs(saved); self.push(div); } @@ -2467,106 +2599,124 @@ impl Context<'_, M> { // `i32_mul` needs to be separate because the immediate form of the instruction // has a different syntax to the immediate form of the other instructions. pub fn i32_mul(&mut self) { - let op0 = self.pop(); - let op1 = self.pop(); + let right = self.pop(); + let left = self.pop(); - if let Some(i1) = op1.immediate() { - if let Some(i0) = op0.immediate() { + if let Some(right) = right.immediate() { + if let Some(left) = left.immediate() { self.push(ValueLocation::Immediate( - i32::wrapping_mul(i1.as_i32().unwrap(), i0.as_i32().unwrap()).into(), + i32::wrapping_mul(right.as_i32().unwrap(), left.as_i32().unwrap()).into(), )); return; } } - let (op1, op0) = match op1 { - ValueLocation::Reg(_) => (self.into_temp_reg(I32, op1), op0), + let (left, right) = match left { + ValueLocation::Reg(_) => (left, right), _ => { - if op0.immediate().is_some() { - (self.into_temp_reg(I32, op1), op0) + if right.immediate().is_some() { + (left, right) } else { - (self.into_temp_reg(I32, op0), op1) + (right, left) } } }; - match op0 { + let out = match right { ValueLocation::Reg(reg) => { + let left = self.into_temp_reg(I32, left); dynasm!(self.asm - ; imul Rd(op1.rq().unwrap()), Rd(reg.rq().unwrap()) + ; imul Rd(left.rq().unwrap()), Rd(reg.rq().unwrap()) ); + left } ValueLocation::Stack(offset) => { let offset = self.adjusted_offset(offset); + + let left = self.into_temp_reg(I32, left); dynasm!(self.asm - ; imul Rd(op1.rq().unwrap()), [rsp + offset] + ; imul Rd(left.rq().unwrap()), [rsp + offset] ); + left } ValueLocation::Immediate(i) => { + let left = self.into_reg(I32, left); + self.block_state.regs.release(left); + let new_reg = self.block_state.regs.take(I32); dynasm!(self.asm - ; imul Rd(op1.rq().unwrap()), Rd(op1.rq().unwrap()), i.as_i32().unwrap() + ; imul Rd(new_reg.rq().unwrap()), Rd(left.rq().unwrap()), i.as_i32().unwrap() ); + new_reg } - } + }; - self.push(ValueLocation::Reg(op1)); - self.free_value(op0); + self.push(ValueLocation::Reg(out)); + self.free_value(right); } // `i64_mul` needs to be separate because the immediate form of the instruction // has a different syntax to the immediate form of the other instructions. pub fn i64_mul(&mut self) { - let op0 = self.pop(); - let op1 = self.pop(); + let right = self.pop(); + let left = self.pop(); - if let Some(i1) = op1.imm_i64() { - if let Some(i0) = op0.imm_i64() { - self.block_state - .stack - .push(ValueLocation::Immediate(i64::wrapping_mul(i1, i0).into())); + if let Some(right) = right.immediate() { + if let Some(left) = left.immediate() { + self.push(ValueLocation::Immediate( + i64::wrapping_mul(right.as_i64().unwrap(), left.as_i64().unwrap()).into(), + )); return; } } - let (op1, op0) = match op1 { - ValueLocation::Reg(_) => (self.into_temp_reg(I64, op1), op0), + let (left, right) = match left { + ValueLocation::Reg(_) => (left, right), _ => { - if op0.immediate().is_some() { - (self.into_temp_reg(I64, op1), op0) + if right.immediate().is_some() { + (left, right) } else { - (self.into_temp_reg(I64, op0), op1) + (right, left) } } }; - match op0 { + let out = match right { ValueLocation::Reg(reg) => { + let left = self.into_temp_reg(I64, left); dynasm!(self.asm - ; imul Rq(op1.rq().unwrap()), Rq(reg.rq().unwrap()) + ; imul Rq(left.rq().unwrap()), Rq(reg.rq().unwrap()) ); + left } ValueLocation::Stack(offset) => { let offset = self.adjusted_offset(offset); + + let left = self.into_temp_reg(I64, left); dynasm!(self.asm - ; imul Rq(op1.rq().unwrap()), [rsp + offset] + ; imul Rq(left.rq().unwrap()), [rsp + offset] ); + left } ValueLocation::Immediate(i) => { - let i = i.as_int().unwrap(); + let left = self.into_reg(I64, left); + self.block_state.regs.release(left); + let new_reg = self.block_state.regs.take(I64); + + let i = i.as_i64().unwrap(); if let Some(i) = i.try_into() { dynasm!(self.asm - ; imul Rq(op1.rq().unwrap()), Rq(op1.rq().unwrap()), i + ; imul Rq(new_reg.rq().unwrap()), Rq(left.rq().unwrap()), i ); } else { - unimplemented!(concat!( - "Unsupported `imul` with large 64-bit immediate operand" - )); + unimplemented!(); } - } - } - self.push(ValueLocation::Reg(op1)); - self.free_value(op0); + new_reg + } + }; + + self.push(ValueLocation::Reg(out)); + self.free_value(right); } pub fn select(&mut self) { @@ -2642,6 +2792,20 @@ impl Context<'_, M> { self.push(ValueLocation::Immediate(imm)); } + pub fn memory_size(&mut self) { + let tmp = self.block_state.regs.take(I32); + + // 16 is log2(64KiB as bytes) + dynasm!(self.asm + ; mov Rd(tmp.rq().unwrap()), [ + rdi + self.module_context.offset_of_memory_len() as i32 + ] + ; shr Rd(tmp.rq().unwrap()), 16 + ); + + self.push(ValueLocation::Reg(tmp)); + } + // TODO: Use `ArrayVec`? // TODO: This inefficiently duplicates registers but it's not really possible // to double up stack space right now. diff --git a/src/disassemble.rs b/src/disassemble.rs index e846d12341..0977c016ea 100644 --- a/src/disassemble.rs +++ b/src/disassemble.rs @@ -1,5 +1,5 @@ +use crate::error::Error; use capstone::prelude::*; -use error::Error; use std::fmt::Write; #[allow(dead_code)] diff --git a/src/function_body.rs b/src/function_body.rs index a7c1efb06a..6ae0efff42 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -79,9 +79,17 @@ where L: Hash + Clone + Eq, Operator: std::fmt::Display, { + fn drop_elements(stack: &mut Vec, depths: std::ops::RangeInclusive) { + let real_range = + stack.len() - 1 - *depths.end() as usize..=stack.len() - 1 - *depths.start() as usize; + + stack.drain(real_range); + } + let func_type = session.module_context.defined_func_type(func_idx); let mut body = body.into_iter().peekable(); + let module_context = &*session.module_context; let ctx = &mut session.new_context(func_idx); let params = func_type @@ -241,7 +249,7 @@ where } } Operator::BrIf { then, else_ } => { - let (then_block, else_block) = blocks.pair_mut(&then, &else_); + let (then_block, else_block) = blocks.pair_mut(&then.target, &else_.target); // TODO: If actual_num_callers == num_callers then we can remove this block from the hashmap. // This frees memory and acts as a kind of verification that `num_callers` is set // correctly. It doesn't help for loops and block ends generated from Wasm. @@ -251,9 +259,6 @@ where let then_block_parts = (then_block.is_next, then_block.label); let else_block_parts = (else_block.is_next, else_block.label); - // TODO: Use "compatible" cc - assert_eq!(then_block.params, else_block.params); - // TODO: The blocks should have compatible (one must be subset of other?) calling // conventions or else at least one must have no calling convention. This // should always be true for converting from WebAssembly AIUI. @@ -272,16 +277,37 @@ where ctx.pass_block_args(cc); } (ref mut then_cc @ None, ref mut else_cc @ None) => { + let max_params = then_block.params.max(else_block.params); let cc = if then_block_should_serialize_args { - Some(Left(ctx.serialize_args(then_block.params))) + Some(Left(ctx.serialize_args(max_params))) } else if else_block_should_serialize_args { - Some(Left(ctx.serialize_args(else_block.params))) + Some(Left(ctx.serialize_args(max_params))) } else { Some(Right(ctx.virtual_calling_convention())) }; - **then_cc = cc.clone(); - **else_cc = cc; + **then_cc = { + let mut cc = cc.clone(); + if let (Some(cc), Some(to_drop)) = (cc.as_mut(), then.to_drop.clone()) + { + match cc { + Left(cc) => drop_elements(&mut cc.arguments, to_drop), + Right(cc) => drop_elements(&mut cc.stack, to_drop), + } + } + cc + }; + **else_cc = { + let mut cc = cc; + if let (Some(cc), Some(to_drop)) = (cc.as_mut(), else_.to_drop.clone()) + { + match cc { + Left(cc) => drop_elements(&mut cc.arguments, to_drop), + Right(cc) => drop_elements(&mut cc.stack, to_drop), + } + } + cc + }; } _ => unimplemented!( "Can't pass different params to different sides of `br_if` yet" @@ -290,13 +316,13 @@ where }; match (then_block_parts, else_block_parts) { - ((true, _), (false, BrTarget::Label(else_))) => { + ((true, _), (false, else_)) => { ctx.br_if_false(else_, f); } - ((false, BrTarget::Label(then)), (true, _)) => { + ((false, then), (true, _)) => { ctx.br_if_true(then, f); } - ((false, BrTarget::Label(then)), (false, BrTarget::Label(else_))) => { + ((false, then), (false, else_)) => { ctx.br_if_true(then, f); ctx.br(else_); } @@ -307,16 +333,13 @@ where use itertools::Itertools; let (def, params) = { - let def = &blocks[&default]; - ( - if def.is_next { None } else { Some(def.label) }, - def.params, - ) + let def = &blocks[&default.target]; + (if def.is_next { None } else { Some(def.label) }, def.params) }; let target_labels = targets .iter() - .map(|target| blocks[target].label) + .map(|target| blocks[&target.target].label) .collect::>(); ctx.br_table(target_labels, def, |ctx| { @@ -324,7 +347,7 @@ where let mut max_num_callers = Some(0); for target in targets.iter().chain(std::iter::once(&default)).unique() { - let block = blocks.get_mut(target).unwrap(); + let block = blocks.get_mut(&target.target).unwrap(); block.actual_num_callers += 1; if block.calling_convention.is_some() { @@ -334,13 +357,15 @@ where if let Some(max) = max_num_callers { max_num_callers = block.num_callers.map(|n| max.max(n)); + } else { + max_num_callers = block.num_callers; } } if let Some(Left(cc)) = &cc { ctx.pass_block_args(cc); } - + let cc = cc.unwrap_or_else(|| if max_num_callers == Some(1) { Right(ctx.virtual_calling_convention()) @@ -350,8 +375,15 @@ where ); for target in targets.iter().chain(std::iter::once(&default)).unique() { - let block = blocks.get_mut(target).unwrap(); - block.calling_convention = Some(cc.clone()); + let block = blocks.get_mut(&target.target).unwrap(); + let mut cc = cc.clone(); + if let Some(to_drop) = target.to_drop.clone() { + match &mut cc { + Left(cc) => drop_elements(&mut cc.arguments, to_drop), + Right(cc) => drop_elements(&mut cc.stack, to_drop), + } + } + block.calling_convention = Some(cc); } }); } @@ -429,34 +461,77 @@ where Operator::Le(SF64) => ctx.f64_le(), Operator::Drop(range) => ctx.drop(range), Operator::Const(val) => ctx.const_(val), - Operator::Load8 { ty: _, memarg } => ctx.load8(GPRType::Rq, memarg.offset)?, - Operator::Load16 { ty: _, memarg } => ctx.load16(GPRType::Rq, memarg.offset)?, - Operator::Load { ty: ty @ I32, memarg } | Operator::Load { ty: ty @ F32, memarg } => ctx.load32(ty, memarg.offset)?, - Operator::Load { ty: ty @ I64, memarg } | Operator::Load { ty: ty @ F64, memarg } => ctx.load64(ty, memarg.offset)?, - Operator::Store8 { ty: _, memarg } => { - ctx.store8(memarg.offset)? - } - Operator::Store16 { ty: _, memarg } => { - ctx.store16(memarg.offset)? - } - Operator::Store32 { memarg } => { - ctx.store32(memarg.offset)? - } + Operator::I32WrapFromI64 => {} + Operator::Extend { + sign: Signedness::Unsigned, + } => ctx.i32_extend_u(), + Operator::Extend { + sign: Signedness::Signed, + } => ctx.i32_extend_s(), + Operator::Load8 { + ty: sint::U32, + memarg, + } => ctx.i32_load8_u(memarg.offset), + Operator::Load16 { + ty: sint::U32, + memarg, + } => ctx.i32_load16_u(memarg.offset), + Operator::Load8 { + ty: sint::I32, + memarg, + } => ctx.i32_load8_s(memarg.offset), + Operator::Load16 { + ty: sint::I32, + memarg, + } => ctx.i32_load16_s(memarg.offset), + Operator::Load8 { + ty: sint::U64, + memarg, + } => ctx.i64_load8_u(memarg.offset), + Operator::Load16 { + ty: sint::U64, + memarg, + } => ctx.i64_load16_u(memarg.offset), + Operator::Load8 { + ty: sint::I64, + memarg, + } => ctx.i64_load8_s(memarg.offset), + Operator::Load16 { + ty: sint::I64, + memarg, + } => ctx.i64_load16_s(memarg.offset), + Operator::Load32 { + sign: Signedness::Unsigned, + memarg, + } => ctx.i64_load32_u(memarg.offset), + Operator::Load32 { + sign: Signedness::Signed, + memarg, + } => ctx.i64_load32_s(memarg.offset), + Operator::Load { ty: I32, memarg } => ctx.i32_load(memarg.offset), + Operator::Load { ty: F32, memarg } => ctx.f32_load(memarg.offset), + Operator::Load { ty: I64, memarg } => ctx.i64_load(memarg.offset), + Operator::Load { ty: F64, memarg } => ctx.f64_load(memarg.offset), + Operator::Store8 { ty: _, memarg } => ctx.store8(memarg.offset), + Operator::Store16 { ty: _, memarg } => ctx.store16(memarg.offset), + Operator::Store32 { memarg } => ctx.store32(memarg.offset), Operator::Store { ty: I32, memarg } | Operator::Store { ty: F32, memarg } => { - ctx.store32(memarg.offset)? + ctx.store32(memarg.offset) } Operator::Store { ty: I64, memarg } | Operator::Store { ty: F64, memarg } => { - ctx.store64(memarg.offset)? + ctx.store64(memarg.offset) } Operator::Select => { ctx.select(); } + Operator::MemorySize { reserved: _ } => { + ctx.memory_size(); + } Operator::Call { function_index } => { - let function_index = session - .module_context + let function_index = module_context .defined_func_index(function_index) .expect("We don't support host calls yet"); - let callee_ty = session.module_context.func_type(function_index); + let callee_ty = module_context.func_type(function_index); // TODO: this implementation assumes that this function is locally defined. ctx.call_direct( @@ -471,7 +546,7 @@ where } => { assert_eq!(table_index, 0); - let callee_ty = session.module_context.signature(type_index); + let callee_ty = module_context.signature(type_index); // TODO: this implementation assumes that this function is locally defined. diff --git a/src/lib.rs b/src/lib.rs index d431826bce..42bbd88424 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,6 +44,6 @@ mod translate_sections; #[cfg(test)] mod tests; -pub use backend::CodeGenSession; -pub use function_body::translate_wasm as translate_function; -pub use module::{translate, ExecutableModule, ModuleContext, Signature, TranslatedModule}; +pub use crate::backend::CodeGenSession; +pub use crate::function_body::translate_wasm as translate_function; +pub use crate::module::{translate, ExecutableModule, ModuleContext, Signature, TranslatedModule}; diff --git a/src/microwasm.rs b/src/microwasm.rs index 0c8b54512a..af31a2c791 100644 --- a/src/microwasm.rs +++ b/src/microwasm.rs @@ -1,5 +1,4 @@ use crate::module::{ModuleContext, SigType, Signature}; -use cranelift_codegen::ir::Signature as CraneliftSignature; use smallvec::SmallVec; use std::{ convert::TryFrom, @@ -310,8 +309,8 @@ impl TryFrom for SignlessType { #[derive(Debug, Clone)] pub struct BrTable { - pub targets: Vec>, - pub default: BrTarget, + pub targets: Vec>, + pub default: BrTargetDrop, } #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] @@ -347,6 +346,12 @@ impl BrTarget { } } +impl From for BrTarget { + fn from(other: L) -> Self { + BrTarget::Label(other) + } +} + impl fmt::Display for BrTarget { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -367,6 +372,40 @@ impl fmt::Display for BrTarget<&str> { } } +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct BrTargetDrop { + pub target: BrTarget, + pub to_drop: Option>, +} + +impl From> for BrTargetDrop { + fn from(other: BrTarget) -> Self { + BrTargetDrop { + target: other, + to_drop: None, + } + } +} + +impl fmt::Display for BrTargetDrop +where + BrTarget: fmt::Display, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(drop) = &self.to_drop { + write!( + f, + "({}, drop {}..={})", + self.target, + drop.start(), + drop.end() + ) + } else { + write!(f, "{}", self.target) + } + } +} + // TODO: Explicit VmCtx? #[derive(Debug, Clone)] pub enum Operator