diff --git a/Cargo.toml b/Cargo.toml index a6e3028e0a..2881b13085 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ keywords = ["webassembly", "wasm", "compile", "compiler", "jit"] publish = false [dependencies] +arrayvec = "0.4" dynasm = "0.2.3" dynasmrt = "0.2.3" wasmparser = "0.21.6" diff --git a/src/backend.rs b/src/backend.rs index 1c1dc144ec..f01eeecdf1 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -1,5 +1,9 @@ #![allow(dead_code)] // for now +// Since we want this to be linear-time, we never want to iterate over a `Vec`. `ArrayVec`s have a hard, +// small maximum size and so we can consider iterating over them to be essentially constant-time. +use arrayvec::ArrayVec; + use dynasmrt::x64::Assembler; use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer}; use error::Error; @@ -166,7 +170,7 @@ impl CodeGenSession { asm: &mut self.assembler, func_starts: &self.func_starts, block_state: Default::default(), - locals: Default::default(), + original_locals: Default::default(), } } @@ -222,7 +226,7 @@ impl Value { fn location(&self, locals: &Locals) -> ValueLocation { match *self { - Value::Local(loc) => local_location(locals, loc), + Value::Local(loc) => locals.get(loc), Value::Temp(reg) => ValueLocation::Reg(reg), Value::Immediate(reg) => ValueLocation::Immediate(reg), } @@ -240,7 +244,7 @@ enum StackValue { impl StackValue { fn location(&self, locals: &Locals) -> Option { match *self { - StackValue::Local(loc) => Some(local_location(locals, loc)), + StackValue::Local(loc) => Some(locals.get(loc)), StackValue::Immediate(i) => Some(ValueLocation::Immediate(i)), StackValue::Temp(reg) => Some(ValueLocation::Reg(reg)), StackValue::Pop => None, @@ -248,10 +252,30 @@ impl StackValue { } } -#[derive(Default)] +#[derive(Default, Clone)] struct Locals { - // TODO: Use `ArrayVec` since we have a hard maximum (the number of registers) - locs: Vec, + register_arguments: ArrayVec<[ValueLocation; ARGS_IN_GPRS.len()]>, + num_stack_args: u32, + num_local_stack_slots: u32, +} + +impl Locals { + fn get(&self, index: u32) -> ValueLocation { + self.register_arguments + .get(index as usize) + .cloned() + .unwrap_or_else(|| { + let stack_index = index - self.register_arguments.len() as u32; + if stack_index < self.num_stack_args { + ValueLocation::Stack( + ((stack_index + self.num_local_stack_slots + 2) * WORD_SIZE) as _, + ) + } else { + let stack_index = stack_index - self.num_stack_args; + ValueLocation::Stack((stack_index * WORD_SIZE) as _) + } + }) + } } #[derive(Default, Clone)] @@ -259,22 +283,16 @@ pub struct BlockState { stack: Stack, pub depth: StackDepth, regs: Registers, + /// This is the _current_ locals, since we can shuffle them about during function calls. + /// We will restore this to be the same state as the `Locals` in `Context` at the end + /// of a block. + locals: Locals, } fn adjusted_offset(ctx: &mut Context, offset: i32) -> i32 { (ctx.block_state.depth.0 * WORD_SIZE) as i32 + offset } -fn local_location(locals: &Locals, index: u32) -> ValueLocation { - locals - .locs - .get(index as usize) - .cloned() - .unwrap_or(ValueLocation::Stack( - (index.saturating_sub(ARGS_IN_GPRS.len() as u32) * WORD_SIZE) as _, - )) -} - type Stack = Vec; pub struct Context<'a> { @@ -282,7 +300,7 @@ pub struct Context<'a> { func_starts: &'a Vec<(Option, DynamicLabel)>, /// Each push and pop on the value stack increments or decrements this value by 1 respectively. block_state: BlockState, - locals: Locals, + original_locals: Locals, } impl<'a> Context<'a> {} @@ -323,42 +341,36 @@ pub fn current_block_state(ctx: &Context) -> BlockState { } pub fn return_from_block(ctx: &mut Context) { - if let Some(loc) = ctx.block_state.stack.last().unwrap().location(&ctx.locals) { - match loc { - ValueLocation::Reg(r) => { - dynasm!(ctx.asm - ; push Rq(r) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; push QWORD [rsp + offset] - ); - } - ValueLocation::Immediate(imm) => { - dynasm!(ctx.asm - ; push imm - ); - } - } - } - // If `location` is `None` then we don't need to do anything. + free_return_register(ctx, 1); + pop_i32_into(ctx, ValueLocation::Reg(RAX)) } pub fn push_block_return_value(ctx: &mut Context) { - ctx.block_state.depth.reserve(1); - ctx.block_state.stack.push(StackValue::Pop); + ctx.block_state.stack.push(StackValue::Temp(RAX)); } -pub fn restore_block_state(ctx: &mut Context, block_state: BlockState) { - ctx.block_state = block_state; +pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) { + restore_locals(ctx); + ctx.block_state = parent_block_state; } pub fn push_return_value(ctx: &mut Context) { ctx.block_state.stack.push(StackValue::Temp(RAX)); } +fn restore_locals(ctx: &mut Context) { + for (src, dst) in ctx + .block_state + .locals + .register_arguments + .clone() + .iter() + .zip(&ctx.original_locals.register_arguments.clone()) + { + copy_value(ctx, *src, *dst); + } +} + fn push_i32(ctx: &mut Context, value: Value) { let stack_loc = match value { Value::Local(loc) => StackValue::Local(loc), @@ -421,7 +433,8 @@ fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) { } }; - let src = to_move.location(&ctx.locals); + let src = to_move.location(&ctx.block_state.locals); + println!("{:?}, {:?}", src, dst); copy_value(ctx, src, dst); free_val(ctx, to_move); } @@ -435,7 +448,7 @@ fn free_val(ctx: &mut Context, val: Value) { /// Puts this value into a register so that it can be efficiently read fn into_reg(ctx: &mut Context, val: Value) -> GPR { - match val.location(&ctx.locals) { + match val.location(&ctx.block_state.locals) { ValueLocation::Stack(offset) => { let offset = adjusted_offset(ctx, offset); let scratch = ctx.block_state.regs.take_scratch_gpr(); @@ -462,7 +475,7 @@ fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR { Value::Local(loc) => { let scratch = ctx.block_state.regs.take_scratch_gpr(); - match local_location(&ctx.locals, loc) { + match ctx.block_state.locals.get(loc) { ValueLocation::Stack(offset) => { let offset = adjusted_offset(ctx, offset); dynasm!(ctx.asm @@ -512,7 +525,7 @@ macro_rules! commutative_binop { _ => (into_temp_reg(ctx, op0), op1), }; - match op0.location(&ctx.locals) { + match op0.location(&ctx.block_state.locals) { ValueLocation::Reg(reg) => { dynasm!(ctx.asm ; $instr Rd(op1), Rd(reg) @@ -538,12 +551,14 @@ macro_rules! commutative_binop { } } -commutative_binop!(i32_add, add, |a, b| a + b); +commutative_binop!(i32_add, add, i32::wrapping_add); commutative_binop!(i32_and, and, |a, b| a & b); commutative_binop!(i32_or, or, |a, b| a | b); commutative_binop!(i32_xor, xor, |a, b| a ^ b); -commutative_binop!(i32_mul, imul, |a, b| a * b); +commutative_binop!(i32_mul, imul, i32::wrapping_mul); +// `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1` +// temp register as the output) pub fn i32_sub(ctx: &mut Context) { let op0 = pop_i32(ctx); let op1 = pop_i32(ctx); @@ -556,7 +571,7 @@ pub fn i32_sub(ctx: &mut Context) { } let op1 = into_temp_reg(ctx, op1); - match op0.location(&ctx.locals) { + match op0.location(&ctx.block_state.locals) { ValueLocation::Reg(reg) => { dynasm!(ctx.asm ; sub Rd(op1), Rd(reg) @@ -588,8 +603,18 @@ pub fn get_local_i32(ctx: &mut Context, local_idx: u32) { // back into registers here. pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { let val = pop_i32(ctx); - let val_loc = val.location(&ctx.locals); - let dst_loc = local_location(&ctx.locals, local_idx); + let val_loc = val.location(&ctx.block_state.locals); + let dst_loc = ctx.original_locals.get(local_idx); + + if let Some(cur) = ctx + .block_state + .locals + .register_arguments + .get_mut(local_idx as usize) + { + *cur = dst_loc; + } + copy_value(ctx, val_loc, dst_loc); free_val(ctx, val); } @@ -604,7 +629,7 @@ pub fn relop_eq_i32(ctx: &mut Context) { let result = ctx.block_state.regs.take_scratch_gpr(); if let Some(i) = left.immediate() { - match right.location(&ctx.locals) { + match right.location(&ctx.block_state.locals) { ValueLocation::Stack(offset) => { let offset = adjusted_offset(ctx, offset); dynasm!(ctx.asm @@ -629,7 +654,7 @@ pub fn relop_eq_i32(ctx: &mut Context) { } } else { let lreg = into_reg(ctx, left); - match right.location(&ctx.locals) { + match right.location(&ctx.block_state.locals) { ValueLocation::Stack(offset) => { let offset = adjusted_offset(ctx, offset); dynasm!(ctx.asm @@ -733,7 +758,7 @@ fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) { #[must_use] pub struct CallCleanup { - restore_registers: Vec, + restore_registers: ArrayVec<[GPR; SCRATCH_REGS.len()]>, stack_depth: i32, } @@ -748,15 +773,16 @@ fn free_arg_registers(ctx: &mut Context, count: u32) { return; } - for i in 0..ctx.locals.locs.len() { - match ctx.locals.locs[i] { + // This is bound to the maximum size of the `ArrayVec` amd so preserves linear runtime + for i in 0..ctx.block_state.locals.register_arguments.len() { + match ctx.block_state.locals.register_arguments[i] { ValueLocation::Reg(reg) => { if ARGS_IN_GPRS.contains(®) { let offset = adjusted_offset(ctx, (i as u32 * WORD_SIZE) as _); dynasm!(ctx.asm ; mov [rsp + offset], Rq(reg) ); - ctx.locals.locs[i] = ValueLocation::Stack(offset); + ctx.block_state.locals.register_arguments[i] = ValueLocation::Stack(offset); } } _ => {} @@ -770,7 +796,7 @@ fn free_return_register(ctx: &mut Context, count: u32) { } for stack_val in &mut ctx.block_state.stack { - match stack_val.location(&ctx.locals) { + match stack_val.location(&ctx.block_state.locals) { // For now it's impossible for a local to be in RAX but that might be // possible in the future, so we check both cases. Some(ValueLocation::Reg(RAX)) => { @@ -787,8 +813,8 @@ fn free_return_register(ctx: &mut Context, count: u32) { // TODO: Use `ArrayVec`? /// Saves volatile (i.e. caller-saved) registers before a function call, if they are used. -fn save_volatile(ctx: &mut Context) -> Vec { - let mut out = vec![]; +fn save_volatile(ctx: &mut Context) -> ArrayVec<[GPR; SCRATCH_REGS.len()]> { + let mut out = ArrayVec::new(); // TODO: If there are no `StackValue::Pop`s that need to be popped // before we reach our `Temp` value, we can set the `StackValue` @@ -811,11 +837,6 @@ fn save_volatile(ctx: &mut Context) -> Vec { fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup { let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32; - let out = CallCleanup { - stack_depth: num_stack_args, - restore_registers: save_volatile(ctx), - }; - // We pop stack arguments first - arguments are RTL if num_stack_args > 0 { let size = num_stack_args * WORD_SIZE as i32; @@ -847,7 +868,10 @@ fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup { pop_i32_into(ctx, ValueLocation::Reg(*reg)); } - out + CallCleanup { + stack_depth: num_stack_args, + restore_registers: save_volatile(ctx), + } } /// Frees up the stack space used for stack-passed arguments and restores the value @@ -901,29 +925,23 @@ pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) { // Align stack slots to the nearest even number. This is required // by x86-64 ABI. let aligned_stack_slots = (locals + 1) & !1; - let framesize: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32; + let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32; - ctx.locals.locs = reg_args - .iter() - .cloned() - .map(ValueLocation::Reg) - .chain( - (0..arguments.saturating_sub(ARGS_IN_GPRS.len() as _)) - // We add 2 here because 1 stack slot is used for the stack pointer and another is - // used for the return address. It's a magic number but there's not really a way - // around this. - .map(|arg_i| ValueLocation::Stack(((arg_i + 2) * WORD_SIZE) as i32 + framesize)), - ) - .collect(); + ctx.original_locals.register_arguments = + reg_args.iter().cloned().map(ValueLocation::Reg).collect(); + ctx.original_locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _); + ctx.original_locals.num_local_stack_slots = locals; + ctx.block_state.locals = ctx.original_locals.clone(); dynasm!(ctx.asm ; push rbp ; mov rbp, rsp ); - if framesize > 0 { + // ctx.block_state.depth.reserve(aligned_stack_slots - locals); + if frame_size > 0 { dynasm!(ctx.asm - ; sub rsp, framesize + ; sub rsp, frame_size ); } } diff --git a/src/function_body.rs b/src/function_body.rs index d35bf3d05b..7a835fe4e2 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -158,7 +158,7 @@ pub fn translate( // 0 it will branch here. // After that reset stack depth to the value before entering `if` block. define_label(&mut ctx, if_not); - restore_block_state(&mut ctx, block_state.clone()); + end_block(&mut ctx, block_state.clone()); // Carry over the `end_label`, so it will be resolved when the corresponding `end` // is encountered. @@ -199,7 +199,7 @@ pub fn translate( prepare_return_value(&mut ctx); } - restore_block_state(&mut ctx, control_frame.block_state); + end_block(&mut ctx, control_frame.block_state); push_block_return_value(&mut ctx); } Operator::I32Eq => relop_eq_i32(&mut ctx), diff --git a/src/lib.rs b/src/lib.rs index ea1a4697d1..b725112611 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,9 @@ -#![feature(plugin, test)] +#![feature(plugin, test, const_slice_len)] #![plugin(dynasm)] extern crate test; +extern crate arrayvec; extern crate capstone; extern crate failure; extern crate wasmparser; diff --git a/src/tests.rs b/src/tests.rs index d9368f10b7..5c17de5008 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -250,6 +250,7 @@ fn function_write_args_spill_to_stack() { assert_eq!( { let translated = translate_wat(code); + translated.disassemble(); let out: u32 = unsafe { translated.execute_func(0, (11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) }; out @@ -330,6 +331,7 @@ fn fib() { } let translated = translate_wat(FIBONACCI); + translated.disassemble(); for x in 0..30 { unsafe {