#![allow(dead_code)] // for now use microwasm::{BrTarget, SignlessType, Type, F32, F64, I32, I64}; use self::registers::*; use dynasmrt::x64::Assembler; use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer}; use error::Error; use microwasm::Value; use module::{ModuleContext, RuntimeFunc}; use std::{ iter::{self, FromIterator}, mem, ops::RangeInclusive, }; /// Size of a pointer on the target in bytes. const WORD_SIZE: u32 = 8; type RegId = u8; #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] pub enum GPR { Rq(RegId), Rx(RegId), } #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] pub enum GPRType { Rq, Rx, } impl From for GPRType { fn from(other: SignlessType) -> GPRType { match other { I32 | I64 => GPRType::Rq, F32 | F64 => GPRType::Rx, } } } impl From for Option { fn from(other: SignlessType) -> Self { Some(other.into()) } } impl GPR { fn type_(&self) -> GPRType { match self { GPR::Rq(_) => GPRType::Rq, GPR::Rx(_) => GPRType::Rx, } } fn rq(self) -> Option { match self { GPR::Rq(r) => Some(r), GPR::Rx(_) => None, } } fn rx(self) -> Option { match self { GPR::Rx(r) => Some(r), GPR::Rq(_) => None, } } } pub fn arg_locs(types: impl IntoIterator) -> Vec { let types = types.into_iter(); let mut out = Vec::with_capacity(types.size_hint().0); // TODO: VmCtx is in the first register let mut int_gpr_iter = INTEGER_ARGS_IN_GPRS.into_iter(); let mut float_gpr_iter = FLOAT_ARGS_IN_GPRS.into_iter(); let mut stack_idx = 0; for ty in types { match ty { I32 | I64 => out.push(int_gpr_iter.next().map(|&r| CCLoc::Reg(r)).unwrap_or_else( || { let out = CCLoc::Stack(stack_idx); stack_idx += 1; out }, )), F32 | F64 => out.push( float_gpr_iter .next() .map(|&r| CCLoc::Reg(r)) .expect("Float args on stack not yet supported"), ), } } out } pub fn ret_locs(types: impl IntoIterator) -> Vec { let types = types.into_iter(); let mut out = Vec::with_capacity(types.size_hint().0); // TODO: VmCtx is in the first register let mut int_gpr_iter = INTEGER_RETURN_GPRS.into_iter(); let mut float_gpr_iter = FLOAT_RETURN_GPRS.into_iter(); for ty in types { match ty { I32 | I64 => out.push(CCLoc::Reg( *int_gpr_iter .next() .expect("We don't support stack returns yet"), )), F32 | F64 => out.push(CCLoc::Reg( *float_gpr_iter .next() .expect("We don't support stack returns yet"), )), } } out } #[derive(Debug, Copy, Clone)] struct GPRs { bits: u16, } impl GPRs { fn new() -> Self { Self { bits: 0 } } } pub mod registers { use super::{RegId, GPR}; pub mod rq { use super::RegId; pub const RAX: RegId = 0; pub const RCX: RegId = 1; pub const RDX: RegId = 2; pub const RBX: RegId = 3; pub const RSP: RegId = 4; pub const RBP: RegId = 5; pub const RSI: RegId = 6; pub const RDI: RegId = 7; pub const R8: RegId = 8; pub const R9: RegId = 9; pub const R10: RegId = 10; pub const R11: RegId = 11; pub const R12: RegId = 12; pub const R13: RegId = 13; pub const R14: RegId = 14; pub const R15: RegId = 15; } pub const RAX: GPR = GPR::Rq(self::rq::RAX); pub const RCX: GPR = GPR::Rq(self::rq::RCX); pub const RDX: GPR = GPR::Rq(self::rq::RDX); pub const RBX: GPR = GPR::Rq(self::rq::RBX); pub const RSP: GPR = GPR::Rq(self::rq::RSP); pub const RBP: GPR = GPR::Rq(self::rq::RBP); pub const RSI: GPR = GPR::Rq(self::rq::RSI); pub const RDI: GPR = GPR::Rq(self::rq::RDI); pub const R8: GPR = GPR::Rq(self::rq::R8); pub const R9: GPR = GPR::Rq(self::rq::R9); pub const R10: GPR = GPR::Rq(self::rq::R10); pub const R11: GPR = GPR::Rq(self::rq::R11); pub const R12: GPR = GPR::Rq(self::rq::R12); pub const R13: GPR = GPR::Rq(self::rq::R13); pub const R14: GPR = GPR::Rq(self::rq::R14); pub const R15: GPR = GPR::Rq(self::rq::R15); pub const XMM0: GPR = GPR::Rx(0); pub const XMM1: GPR = GPR::Rx(1); pub const XMM2: GPR = GPR::Rx(2); pub const XMM3: GPR = GPR::Rx(3); pub const XMM4: GPR = GPR::Rx(4); pub const XMM5: GPR = GPR::Rx(5); pub const XMM6: GPR = GPR::Rx(6); pub const XMM7: GPR = GPR::Rx(7); pub const XMM8: GPR = GPR::Rx(8); pub const XMM9: GPR = GPR::Rx(9); pub const XMM10: GPR = GPR::Rx(10); pub const XMM11: GPR = GPR::Rx(11); pub const XMM12: GPR = GPR::Rx(12); pub const XMM13: GPR = GPR::Rx(13); pub const XMM14: GPR = GPR::Rx(14); pub const XMM15: GPR = GPR::Rx(15); pub const NUM_GPRS: u8 = 16; } extern "sysv64" fn println(len: u64, args: *const u8) { println!("{}", unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(args, len as usize)) }); } #[allow(unused_macros)] macro_rules! asm_println { ($asm:expr) => {asm_println!($asm,)}; ($asm:expr, $($args:tt)*) => {{ use std::mem; let mut args = format!($($args)*).into_bytes(); let len = args.len(); let ptr = args.as_mut_ptr(); mem::forget(args); dynasm!($asm ; push rdi ; push rsi ; push rdx ; push rcx ; push r8 ; push r9 ; push r10 ; push r11 ; mov rax, QWORD println as *const u8 as i64 ; mov rdi, QWORD len as i64 ; mov rsi, QWORD ptr as i64 ; test rsp, 0b1111 ; jnz >with_adjusted_stack_ptr ; call rax ; jmp >pop_rest ; with_adjusted_stack_ptr: ; push 1 ; call rax ; pop r11 ; pop_rest: ; pop r11 ; pop r10 ; pop r9 ; pop r8 ; pop rcx ; pop rdx ; pop rsi ; pop rdi ); }} } impl GPRs { fn take(&mut self) -> RegId { let lz = self.bits.trailing_zeros(); debug_assert!(lz < 16, "ran out of free GPRs"); let gpr = lz as RegId; self.mark_used(gpr); gpr } fn mark_used(&mut self, gpr: RegId) { self.bits &= !(1 << gpr as u16); } fn release(&mut self, gpr: RegId) { debug_assert!( !self.is_free(gpr), "released register {} was already free", gpr ); self.bits |= 1 << gpr; } fn free_count(&self) -> u32 { self.bits.count_ones() } fn is_free(&self, gpr: RegId) -> bool { (self.bits & (1 << gpr)) != 0 } } #[derive(Debug, Copy, Clone)] pub struct Registers { /// Registers at 64 bits and below (al/ah/ax/eax/rax, for example) scratch_64: (GPRs, [u8; NUM_GPRS as usize]), /// Registers at 128 bits (xmm0, for example) scratch_128: (GPRs, [u8; NUM_GPRS as usize]), } impl Default for Registers { fn default() -> Self { Self::new() } } impl Registers { pub fn new() -> Self { let mut result = Self { scratch_64: (GPRs::new(), [1; NUM_GPRS as _]), scratch_128: (GPRs::new(), [1; NUM_GPRS as _]), }; // Give ourselves a few scratch registers to work with, for now. for &scratch in SCRATCH_REGS { result.release(scratch); } result } fn scratch_counts_mut(&mut self, gpr: GPR) -> (u8, &mut (GPRs, [u8; NUM_GPRS as usize])) { match gpr { GPR::Rq(r) => (r, &mut self.scratch_64), GPR::Rx(r) => (r, &mut self.scratch_128), } } fn scratch_counts(&self, gpr: GPR) -> (u8, &(GPRs, [u8; NUM_GPRS as usize])) { match gpr { GPR::Rq(r) => (r, &self.scratch_64), GPR::Rx(r) => (r, &self.scratch_128), } } pub fn mark_used(&mut self, gpr: GPR) { let (gpr, scratch_counts) = self.scratch_counts_mut(gpr); scratch_counts.0.mark_used(gpr); scratch_counts.1[gpr as usize] += 1; } pub fn num_usages(&self, gpr: GPR) -> u8 { let (gpr, scratch_counts) = self.scratch_counts(gpr); scratch_counts.1[gpr as usize] } pub fn take(&mut self, ty: impl Into) -> GPR { let (mk_gpr, scratch_counts) = match ty.into() { GPRType::Rq => (GPR::Rq as fn(_) -> _, &mut self.scratch_64), GPRType::Rx => (GPR::Rx as fn(_) -> _, &mut self.scratch_128), }; let out = scratch_counts.0.take(); scratch_counts.1[out as usize] += 1; mk_gpr(out) } pub fn release(&mut self, gpr: GPR) { let (gpr, scratch_counts) = self.scratch_counts_mut(gpr); let c = &mut scratch_counts.1[gpr as usize]; *c -= 1; if *c == 0 { scratch_counts.0.release(gpr); } } pub fn is_free(&self, gpr: GPR) -> bool { let (gpr, scratch_counts) = self.scratch_counts(gpr); scratch_counts.0.is_free(gpr) } pub fn free_64(&self) -> u32 { self.scratch_64.0.free_count() } pub fn free_128(&self) -> u32 { self.scratch_128.0.free_count() } } #[derive(Debug, Clone)] pub struct CallingConvention { stack_depth: StackDepth, arguments: Vec, } impl CallingConvention { pub fn function_start(args: impl IntoIterator) -> Self { CallingConvention { // We start and return the function with stack depth 1 since we must // allow space for the saved return address. stack_depth: StackDepth(1), arguments: Vec::from_iter(args), } } } // TODO: Combine this with `ValueLocation`? #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum CCLoc { /// Value exists in a register. Reg(GPR), /// Value exists on the stack. Stack(i32), } // TODO: Allow pushing condition codes to stack? We'd have to immediately // materialise them into a register if anything is pushed above them. /// Describes location of a value. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum ValueLocation { /// Value exists in a register. Reg(GPR), /// Value exists on the stack. Note that this offset is from the rsp as it /// was when we entered the function. Stack(i32), /// Value is a literal Immediate(Value), } impl From for ValueLocation { fn from(other: CCLoc) -> Self { match other { CCLoc::Reg(r) => ValueLocation::Reg(r), CCLoc::Stack(o) => ValueLocation::Stack(o), } } } impl ValueLocation { fn immediate(self) -> Option { match self { ValueLocation::Immediate(i) => Some(i), _ => None, } } fn imm_i32(self) -> Option { self.immediate().and_then(Value::as_i32) } fn imm_i64(self) -> Option { self.immediate().and_then(Value::as_i64) } fn imm_f32(self) -> Option { self.immediate().and_then(Value::as_f32) } fn imm_f64(self) -> Option { self.immediate().and_then(Value::as_f64) } } // TODO: This assumes only system-v calling convention. // In system-v calling convention the first 6 arguments are passed via registers. // All rest arguments are passed on the stack. const INTEGER_ARGS_IN_GPRS: &[GPR] = &[RSI, RDX, RCX, R8, R9]; const INTEGER_RETURN_GPRS: &[GPR] = &[RAX, RDX]; const FLOAT_ARGS_IN_GPRS: &[GPR] = &[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]; const FLOAT_RETURN_GPRS: &[GPR] = &[XMM0, XMM1]; // List of scratch registers taken from https://wiki.osdev.org/System_V_ABI const SCRATCH_REGS: &[GPR] = &[ RSI, RDX, RCX, R8, R9, RAX, R10, R11, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, ]; const VMCTX: RegId = rq::RDI; #[must_use] #[derive(Debug, Clone)] pub struct FunctionEnd { should_generate_epilogue: bool, } pub struct CodeGenSession<'a, M> { assembler: Assembler, pub module_context: &'a M, func_starts: Vec<(Option, DynamicLabel)>, } impl<'a, M> CodeGenSession<'a, M> { pub fn new(func_count: u32, module_context: &'a M) -> Self { let mut assembler = Assembler::new().unwrap(); let func_starts = iter::repeat_with(|| (None, assembler.new_dynamic_label())) .take(func_count as usize) .collect::>(); CodeGenSession { assembler, func_starts, module_context, } } pub fn new_context(&mut self, func_idx: u32) -> Context<'_, M> { { let func_start = &mut self.func_starts[func_idx as usize]; // At this point we know the exact start address of this function. Save it // and define dynamic label at this location. func_start.0 = Some(self.assembler.offset()); self.assembler.dynamic_label(func_start.1); } Context { asm: &mut self.assembler, func_starts: &self.func_starts, labels: Default::default(), block_state: Default::default(), module_context: self.module_context, } } pub fn into_translated_code_section(self) -> Result { let exec_buf = self .assembler .finalize() .map_err(|_asm| Error::Assembler("assembler error".to_owned()))?; let func_starts = self .func_starts .iter() .map(|(offset, _)| offset.unwrap()) .collect::>(); Ok(TranslatedCodeSection { exec_buf, func_starts, // TODO relocatable_accesses: vec![], }) } } #[derive(Debug)] struct RelocateAddress { reg: Option, imm: usize, } #[derive(Debug)] struct RelocateAccess { position: AssemblyOffset, dst_reg: GPR, address: RelocateAddress, } #[derive(Debug)] pub struct UninitializedCodeSection(TranslatedCodeSection); #[derive(Debug)] pub struct TranslatedCodeSection { exec_buf: ExecutableBuffer, func_starts: Vec, relocatable_accesses: Vec, } impl TranslatedCodeSection { pub fn func_start(&self, idx: usize) -> *const u8 { let offset = self.func_starts[idx]; self.exec_buf.ptr(offset) } pub fn func_range(&self, idx: usize) -> std::ops::Range { let end = self .func_starts .get(idx + 1) .map(|i| i.0) .unwrap_or(self.exec_buf.len()); self.func_starts[idx].0..end } pub fn funcs<'a>(&'a self) -> impl Iterator> + 'a { (0..self.func_starts.len()).map(move |i| self.func_range(i)) } pub fn buffer(&self) -> &[u8] { &*self.exec_buf } pub fn disassemble(&self) { ::disassemble::disassemble(&*self.exec_buf).unwrap(); } } /// A value on the logical stack. The logical stack is the value stack as it /// is visible to the WebAssembly, whereas the physical stack is the stack as /// it exists on the machine (i.e. as offsets in memory relative to `rsp`). #[derive(Debug, Copy, Clone, PartialEq, Eq)] enum StackValue { /// This value has a "real" location, either in a register, on the stack, /// in an immediate, etc. Value(ValueLocation), /// This value is on the physical stack and so should be accessed /// with the `pop` instruction. // TODO: This complicates a lot of our code, it'd be great if we could get rid of it. Pop, } impl StackValue { /// Returns either the location that this value can be accessed at /// if possible. If this value is `Pop`, you can only access it by /// popping the physical stack and so this function returns `None`. /// /// Of course, we could calculate the location of the value on the /// physical stack, but that would be unncessary computation for /// our usecases. fn location(&self) -> Option { match *self { StackValue::Value(loc) => Some(loc), StackValue::Pop => None, } } } #[derive(Debug, Default, Clone)] pub struct BlockState { stack: Stack, depth: StackDepth, regs: Registers, } type Stack = Vec; pub enum MemoryAccessMode { /// This is slower than using `Unchecked` mode, but works in /// any scenario (the most important scenario being when we're /// running on a system that can't index much more memory than /// the Wasm). Checked, /// This means that checks are _not emitted by the compiler_! /// If you're using WebAssembly to run untrusted code, you /// _must_ delegate bounds checking somehow (probably by /// allocating 2^33 bytes of memory with the second half set /// to unreadable/unwriteable/unexecutable) Unchecked, } // TODO: We can share one trap/constant for all functions by reusing this struct #[derive(Default)] struct Labels { trap: Option