From 1eebc65c9ebf1ca266e0e82baa312f6598c22153 Mon Sep 17 00:00:00 2001 From: Jef Date: Fri, 11 Jan 2019 15:20:32 +0100 Subject: [PATCH] First baby steps to supporting memory accesses Currently we don't actually sandbox the memory at all, so you can do evil things like read and write the host's memory. We also don't support growing memory or cranelift-compatible ABI that passes the memory offset as an argument. We also always immediately allocate the buffer when encountering a memory section, there is preliminary support for translating a buffer which can then have the real offset replaced using relocations (and returning a different type when doing so) but I haven't written the code that actually does relocation so it doesn't work yet. --- src/backend.rs | 2665 +++++++++++++++++++++---------------- src/error.rs | 6 + src/function_body.rs | 191 +-- src/lib.rs | 2 +- src/module.rs | 20 +- src/tests.rs | 67 +- src/translate_sections.rs | 27 +- 7 files changed, 1741 insertions(+), 1237 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index 589709fd05..fe006265b7 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -57,7 +57,11 @@ impl GPRs { } fn release(&mut self, gpr: GPR) { - debug_assert!(!self.is_free(gpr), "released register was already free",); + debug_assert!( + !self.is_free(gpr), + "released register {} was already free", + gpr + ); self.bits |= 1 << gpr; } @@ -139,13 +143,93 @@ const ARGS_IN_GPRS: &[GPR] = &[RDI, RSI, RDX, RCX, R8, R9]; // List of scratch registers taken from https://wiki.osdev.org/System_V_ABI const SCRATCH_REGS: &[GPR] = &[RAX, R10, R11]; -pub struct CodeGenSession { - assembler: Assembler, - func_starts: Vec<(Option, DynamicLabel)>, +#[must_use] +pub struct Function { + should_generate_epilogue: bool, } -impl CodeGenSession { +/// A memory section has already been allocated. +pub struct HasMemory; +/// This module has a memory section, but it has not yet been allocated. In this case +/// we just generated dummy values that we can overwrite later. +pub struct DummyMemory; +/// This module has no memory section at all, and should error if a load or store is encountered. +pub struct NoMemory; + +pub trait Memory { + type Ref: Clone; + type OutputCodeSection; + type Error; + + fn output_from_code_section(section: TranslatedCodeSection) -> Self::OutputCodeSection; + fn offset(base: &Self::Ref, offset: u32) -> Result; +} + +impl Memory for NoMemory { + type Ref = (); + type OutputCodeSection = TranslatedCodeSection; + type Error = Error; + + fn output_from_code_section(section: TranslatedCodeSection) -> Self::OutputCodeSection { + section + } + + fn offset(_: &(), _: u32) -> Result { + Err(Error::Input(format!( + "Unexpected load or store encountered - this module has no memory section!" + ))) + } +} + +impl Memory for HasMemory { + type Ref = *mut u8; + type OutputCodeSection = TranslatedCodeSection; + type Error = !; + + fn output_from_code_section(section: TranslatedCodeSection) -> Self::OutputCodeSection { + section + } + + fn offset(&base: &Self::Ref, offset: u32) -> Result { + Ok(base as i64 + offset as i64) + } +} + +impl Memory for DummyMemory { + type Ref = (); + type OutputCodeSection = UninitializedCodeSection; + type Error = !; + + fn output_from_code_section(section: TranslatedCodeSection) -> Self::OutputCodeSection { + UninitializedCodeSection(section) + } + + fn offset(_: &(), _: u32) -> Result { + Ok(i64::max_value()) + } +} + +pub struct CodeGenSession { + assembler: Assembler, + func_starts: Vec<(Option, DynamicLabel)>, + memory_base: T::Ref, + _phantom: std::marker::PhantomData, +} + +impl CodeGenSession +where + T: Memory, +{ pub fn new(func_count: u32) -> Self { + Self::with_memory(func_count, ()) + } +} + +impl CodeGenSession +where + T: Memory, +{ + pub fn with_memory(func_count: u32, memory_base: T::Ref) -> Self { let mut assembler = Assembler::new().unwrap(); let func_starts = iter::repeat_with(|| (None, assembler.new_dynamic_label())) .take(func_count as usize) @@ -154,10 +238,14 @@ impl CodeGenSession { CodeGenSession { assembler, func_starts, + memory_base, + _phantom: Default::default(), } } +} - pub fn new_context(&mut self, func_idx: u32) -> Context { +impl CodeGenSession { + pub fn new_context(&mut self, func_idx: u32) -> Context { { let func_start = &mut self.func_starts[func_idx as usize]; @@ -169,12 +257,14 @@ impl CodeGenSession { Context { asm: &mut self.assembler, + memory_base: self.memory_base.clone(), func_starts: &self.func_starts, block_state: Default::default(), + _phantom: Default::default(), } } - pub fn into_translated_code_section(self) -> Result { + pub fn into_translated_code_section(self) -> Result { let exec_buf = self .assembler .finalize() @@ -184,17 +274,36 @@ impl CodeGenSession { .iter() .map(|(offset, _)| offset.unwrap()) .collect::>(); - Ok(TranslatedCodeSection { + Ok(T::output_from_code_section(TranslatedCodeSection { exec_buf, func_starts, - }) + // TODO + relocatable_accesses: vec![], + })) } } +#[derive(Debug)] +struct RelocateAddress { + reg: Option, + imm: usize, +} + +#[derive(Debug)] +struct RelocateAccess { + position: AssemblyOffset, + dst_reg: GPR, + address: RelocateAddress, +} + +#[derive(Debug)] +pub struct UninitializedCodeSection(TranslatedCodeSection); + #[derive(Debug)] pub struct TranslatedCodeSection { exec_buf: ExecutableBuffer, func_starts: Vec, + relocatable_accesses: Vec, } impl TranslatedCodeSection { @@ -289,44 +398,41 @@ pub struct BlockState { return_register: Option, regs: Registers, /// This is the _current_ locals, since we can shuffle them about during function calls. - /// We will restore this to be the same state as the `Locals` in `Context` at the end + /// We will restore this to be the same state as the `Locals` in `Context` at the end /// of a block. locals: Locals, parent_locals: Locals, } -fn adjusted_offset(ctx: &mut Context, offset: i32) -> i32 { - (ctx.block_state.depth.0 * WORD_SIZE) as i32 + offset -} - type Stack = Vec; -pub struct Context<'a> { +pub enum MemoryAccessMode { + /// This is slower than using `Unchecked` mode, but works in + /// any scenario, running on a system that can't index more + /// memory than the compiled Wasm can being the most important + /// one. + Checked, + /// This means that checks are _not emitted by the compiler_! + /// If you're using WebAssembly to run untrusted code, you + /// _must_ delegate bounds checking somehow (probably by + /// allocating 2^33 bytes of memory with the second half set + /// to unreadable/unwriteable/unexecutable) + Unchecked, +} + +pub struct Context<'a, T: Memory> { asm: &'a mut Assembler, func_starts: &'a Vec<(Option, DynamicLabel)>, /// Each push and pop on the value stack increments or decrements this value by 1 respectively. block_state: BlockState, + memory_base: T::Ref, + _phantom: std::marker::PhantomData, } -impl<'a> Context<'a> {} - /// Label in code. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct Label(DynamicLabel); -/// Create a new undefined label. -pub fn create_label(ctx: &mut Context) -> Label { - Label(ctx.asm.new_dynamic_label()) -} - -/// Define the given label at the current position. -/// -/// Multiple labels can be defined at the same position. However, a label -/// can be defined only once. -pub fn define_label(ctx: &mut Context, label: Label) { - ctx.asm.dynamic_label(label.0); -} - /// Offset from starting value of SP counted in words. #[derive(Default, Debug, Copy, Clone, PartialEq, Eq)] pub struct StackDepth(u32); @@ -341,744 +447,27 @@ impl StackDepth { } } -fn expand_stack(ctx: &mut Context, by: u32) { - use std::iter; - - if by == 0 { - return; - } - - let new_stack_size = (ctx.block_state.stack_map.len() + by as usize).next_power_of_two(); - let additional_elements = new_stack_size - ctx.block_state.stack_map.len(); - ctx.block_state - .stack_map - .extend(iter::repeat(false).take(additional_elements)); - - dynasm!(ctx.asm - ; sub rsp, additional_elements as i32 - ); -} - -// TODO: Make this generic over `Vec` or `ArrayVec`? -fn stack_slots(ctx: &mut Context, count: u32) -> Vec { - let mut out = Vec::with_capacity(count as usize); - - let offset_if_taken = |(i, is_taken): (usize, bool)| { - if !is_taken { - Some(i as i32 * WORD_SIZE as i32) - } else { - None - } - }; - - out.extend( - ctx.block_state - .stack_map - .iter() - .cloned() - .enumerate() - .filter_map(offset_if_taken), - ); - - let remaining = count as usize - out.len(); - - if remaining > 0 { - expand_stack(ctx, remaining as u32); - out.extend( - ctx.block_state - .stack_map - .iter() - .cloned() - .enumerate() - .filter_map(offset_if_taken), - ); - } - - out -} - -fn stack_slot(ctx: &mut Context) -> i32 { - if let Some(pos) = ctx - .block_state - .stack_map - .iter() - .position(|is_taken| !is_taken) - { - ctx.block_state.stack_map[pos] = true; - pos as i32 * WORD_SIZE as i32 - } else { - expand_stack(ctx, 1); - stack_slot(ctx) - } -} - -// We use `put` instead of `pop` since with `BrIf` it's possible -// that the block will continue after returning. -pub fn return_from_block(ctx: &mut Context, arity: u32, is_function_end: bool) { - // This should just be an optimisation, passing `false` should always result - // in correct code. - if !is_function_end { - restore_locals(ctx); - } - - if arity == 0 { - return; - } - - let stack_top = *ctx.block_state.stack.last().expect("Stack is empty"); - if let Some(reg) = ctx.block_state.return_register { - put_stack_val_into(ctx, stack_top, ValueLocation::Reg(reg)); - } else { - let out_reg = match stack_top { - StackValue::Temp(r) => r, - other => { - let new_scratch = ctx.block_state.regs.take_scratch_gpr(); - put_stack_val_into(ctx, other, ValueLocation::Reg(new_scratch)); - new_scratch - } - }; - - ctx.block_state.return_register = Some(out_reg); - } -} - -pub fn start_block(ctx: &mut Context) -> BlockState { - use std::mem; - - // OPTIMISATION: We cannot use the parent's stack values (it is disallowed by the spec) - // so we start a new stack, using `mem::replace` to ensure that we never - // clone or deallocate anything. - // - // I believe that it would be possible to cause a compiler bomb if we did - // not do this, since cloning iterates over the whole `Vec`. - let out_stack = mem::replace(&mut ctx.block_state.stack, vec![]); - let mut current_state = ctx.block_state.clone(); - current_state.stack = out_stack; - - ctx.block_state.parent_locals = ctx.block_state.locals.clone(); - ctx.block_state.return_register = None; - current_state -} - -// To start the next subblock of a block (for `if..then..else..end`). -// The only difference is that choices we made in the first subblock -// (for now only the return register) must be maintained in the next -// subblocks. -pub fn reset_block(ctx: &mut Context, parent_block_state: BlockState) { - let return_reg = ctx.block_state.return_register; - - ctx.block_state = parent_block_state; - - ctx.block_state.return_register = return_reg; -} - -pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) { - // TODO: This should currently never be called, but is important for if we want to - // have a more complex stack spilling scheme. - debug_assert_eq!( - ctx.block_state.depth, parent_block_state.depth, - "Imbalanced pushes and pops" - ); - if ctx.block_state.depth != parent_block_state.depth { - dynasm!(ctx.asm - ; add rsp, ((ctx.block_state.depth.0 - parent_block_state.depth.0) * WORD_SIZE) as i32 - ); - } - - let return_reg = ctx.block_state.return_register; - ctx.block_state = parent_block_state; - - if let Some(reg) = return_reg { - ctx.block_state.regs.mark_used(reg); - ctx.block_state.stack.push(StackValue::Temp(reg)); - } -} - -fn restore_locals(ctx: &mut Context) { - for (src, dst) in ctx - .block_state - .locals - .register_arguments - .clone() - .iter() - .zip(&ctx.block_state.parent_locals.register_arguments.clone()) - { - copy_value(ctx, *src, *dst); - } -} - -fn push(ctx: &mut Context, value: Value) { - let stack_loc = match value { - Value::Local(loc) => StackValue::Local(loc), - Value::Immediate(i) => StackValue::Immediate(i), - Value::Temp(gpr) => { - if ctx.block_state.regs.free_scratch() >= 1 { - StackValue::Temp(gpr) - } else { - ctx.block_state.depth.reserve(1); - // TODO: Proper stack allocation scheme - dynasm!(ctx.asm - ; push Rq(gpr) - ); - ctx.block_state.regs.release_scratch_gpr(gpr); - StackValue::Pop - } - } - }; - - ctx.block_state.stack.push(stack_loc); -} - -fn pop(ctx: &mut Context) -> Value { - match ctx.block_state.stack.pop().expect("Stack is empty") { - StackValue::Local(loc) => Value::Local(loc), - StackValue::Immediate(i) => Value::Immediate(i), - StackValue::Temp(reg) => Value::Temp(reg), - StackValue::Pop => { - ctx.block_state.depth.free(1); - let gpr = ctx.block_state.regs.take_scratch_gpr(); - dynasm!(ctx.asm - ; pop Rq(gpr) - ); - Value::Temp(gpr) - } - } -} - -/// Warning: this _will_ pop the runtime stack, but will _not_ pop the compile-time -/// stack. It's specifically for mid-block breaks like `Br` and `BrIf`. -fn put_stack_val_into(ctx: &mut Context, val: StackValue, dst: ValueLocation) { - let to_move = match val { - StackValue::Local(loc) => Value::Local(loc), - StackValue::Immediate(i) => Value::Immediate(i), - StackValue::Temp(reg) => Value::Temp(reg), - StackValue::Pop => { - ctx.block_state.depth.free(1); - match dst { - ValueLocation::Reg(r) => dynasm!(ctx.asm - ; pop Rq(r) - ), - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; pop QWORD [rsp + offset] - ) - } - ValueLocation::Immediate(_) => panic!("Tried to write to literal!"), - } - - // DO NOT DO A `copy_val` - return; - } - }; - - let src = to_move.location(&ctx.block_state.locals); - copy_value(ctx, src, dst); - if src != dst { - free_value(ctx, to_move); - } -} - -pub fn drop(ctx: &mut Context) { - match ctx.block_state.stack.pop().expect("Stack is empty") { - StackValue::Pop => { - ctx.block_state.depth.free(1); - dynasm!(ctx.asm - ; add rsp, WORD_SIZE as i32 - ); - } - StackValue::Temp(gpr) => free_value(ctx, Value::Temp(gpr)), - StackValue::Local(loc) => free_value(ctx, Value::Local(loc)), - StackValue::Immediate(imm) => free_value(ctx, Value::Immediate(imm)), - } -} - -fn pop_into(ctx: &mut Context, dst: ValueLocation) { - let val = ctx.block_state.stack.pop().expect("Stack is empty"); - put_stack_val_into(ctx, val, dst); -} - -fn free_value(ctx: &mut Context, val: Value) { - match val { - Value::Temp(reg) => ctx.block_state.regs.release_scratch_gpr(reg), - Value::Local(_) | Value::Immediate(_) => {} - } -} - -/// Puts this value into a register so that it can be efficiently read -fn into_reg(ctx: &mut Context, val: Value) -> GPR { - match val.location(&ctx.block_state.locals) { - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - let scratch = ctx.block_state.regs.take_scratch_gpr(); - dynasm!(ctx.asm - ; mov Rq(scratch), [rsp + offset] - ); - scratch - } - ValueLocation::Immediate(i) => { - let scratch = ctx.block_state.regs.take_scratch_gpr(); - immediate_to_reg(ctx, scratch, i); - scratch - } - ValueLocation::Reg(reg) => reg, - } -} - -/// Puts this value into a temporary register so that operations -/// on that register don't write to a local. -fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR { - match val { - Value::Local(loc) => { - let scratch = ctx.block_state.regs.take_scratch_gpr(); - - match ctx.block_state.locals.get(loc) { - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; mov Rq(scratch), [rsp + offset] - ); - } - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; mov Rq(scratch), Rq(reg) - ); - } - ValueLocation::Immediate(_) => { - panic!("We shouldn't be storing immediates in locals for now") - } - } - - scratch - } - Value::Immediate(i) => { - let scratch = ctx.block_state.regs.take_scratch_gpr(); - - immediate_to_reg(ctx, scratch, i); - - scratch - } - Value::Temp(reg) => reg, - } -} - -macro_rules! commutative_binop_i32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - pub fn $name(ctx: &mut Context) { - let op0 = pop(ctx); - let op1 = pop(ctx); - - if let Some(i1) = op1.immediate() { - if let Some(i0) = op0.immediate() { - ctx.block_state.stack.push(StackValue::Immediate($const_fallback(i1 as i32, i0 as i32) as _)); - return; - } - } - - let (op1, op0) = match op1 { - Value::Temp(reg) => (reg, op0), - _ => if op0.immediate().is_some() { - (into_temp_reg(ctx, op1), op0) - } else { - (into_temp_reg(ctx, op0), op1) - } - }; - - match op0.location(&ctx.block_state.locals) { - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; $instr Rd(op1), Rd(reg) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; $instr Rd(op1), [rsp + offset] - ); - } - ValueLocation::Immediate(i) => { - dynasm!(ctx.asm - ; $instr Rd(op1), i as i32 - ); - } - } - - ctx.block_state.stack.push(StackValue::Temp(op1)); - free_value(ctx, op0); - } - } -} - -macro_rules! commutative_binop_i64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - pub fn $name(ctx: &mut Context) { - let op0 = pop(ctx); - let op1 = pop(ctx); - - if let Some(i1) = op1.immediate() { - if let Some(i0) = op0.immediate() { - ctx.block_state.stack.push(StackValue::Immediate($const_fallback(i1, i0))); - return; - } - } - - let (op1, op0) = match op1 { - Value::Temp(reg) => (reg, op0), - _ => if op0.immediate().is_some() { - (into_temp_reg(ctx, op1), op0) - } else { - (into_temp_reg(ctx, op0), op1) - } - }; - - match op0.location(&ctx.block_state.locals) { - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; $instr Rq(op1), Rq(reg) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; $instr Rq(op1), [rsp + offset] - ); - } - ValueLocation::Immediate(i) => { - if let Some(i) = i.try_into() { - dynasm!(ctx.asm - ; $instr Rq(op1), i - ); - } else { - let scratch = ctx.block_state.regs.take_scratch_gpr(); - - dynasm!(ctx.asm - ; mov Rq(scratch), QWORD i - ; $instr Rq(op1), Rq(scratch) - ); - - ctx.block_state.regs.release_scratch_gpr(scratch); - } - } - } - - ctx.block_state.stack.push(StackValue::Temp(op1)); - free_value(ctx, op0); - } - } -} - -// TODO: Use `inc`/`dec` where possible? -commutative_binop_i32!(i32_add, add, |a, b| (a as i32).wrapping_add(b as i32)); -commutative_binop_i32!(i32_and, and, |a, b| a & b); -commutative_binop_i32!(i32_or, or, |a, b| a | b); -commutative_binop_i32!(i32_xor, xor, |a, b| a ^ b); - -commutative_binop_i64!(i64_add, add, i64::wrapping_add); -commutative_binop_i64!(i64_and, and, |a, b| a & b); -commutative_binop_i64!(i64_or, or, |a, b| a | b); -commutative_binop_i64!(i64_xor, xor, |a, b| a ^ b); - -trait TryInto { - fn try_into(self) -> Option; -} - -impl TryInto for i64 { - fn try_into(self) -> Option { - let min = i32::min_value() as i64; - let max = i32::max_value() as i64; - - if self > min && self < max { - Some(self as i32) - } else { - None - } - } -} - -// `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1` -// temp register as the output) -pub fn i64_sub(ctx: &mut Context) { - let op0 = pop(ctx); - let op1 = pop(ctx); - - if let Some(i1) = op1.immediate() { - if let Some(i0) = op0.immediate() { - ctx.block_state.stack.push(StackValue::Immediate(i1 - i0)); - return; - } - } - - let op1 = into_temp_reg(ctx, op1); - match op0.location(&ctx.block_state.locals) { - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; sub Rq(op1), Rq(reg) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; sub Rq(op1), [rsp + offset] - ); - } - ValueLocation::Immediate(i) => { - if let Some(i) = i.try_into() { - dynasm!(ctx.asm - ; sub Rq(op1), i - ); - } else { - unimplemented!(concat!( - "Unsupported `sub` with large 64-bit immediate operand" - )); - } - } - } - - ctx.block_state.stack.push(StackValue::Temp(op1)); - free_value(ctx, op0); -} - -// `i64_mul` needs to be seperate because the immediate form of the instruction -// has a different syntax to the immediate form of the other instructions. -pub fn i64_mul(ctx: &mut Context) { - let op0 = pop(ctx); - let op1 = pop(ctx); - - if let Some(i1) = op1.immediate() { - if let Some(i0) = op0.immediate() { - ctx.block_state - .stack - .push(StackValue::Immediate(i64::wrapping_mul(i1, i0))); - return; - } - } - - let (op1, op0) = match op1 { - Value::Temp(reg) => (reg, op0), - _ => { - if op0.immediate().is_some() { - (into_temp_reg(ctx, op1), op0) - } else { - (into_temp_reg(ctx, op0), op1) - } - } - }; - - match op0.location(&ctx.block_state.locals) { - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; imul Rq(op1), Rq(reg) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; imul Rq(op1), [rsp + offset] - ); - } - ValueLocation::Immediate(i) => { - if let Some(i) = i.try_into() { - dynasm!(ctx.asm - ; imul Rq(op1), Rq(op1), i - ); - } else { - unimplemented!(concat!( - "Unsupported `imul` with large 64-bit immediate operand" - )); - } - } - } - - ctx.block_state.stack.push(StackValue::Temp(op1)); - free_value(ctx, op0); -} - -// `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1` -// temp register as the output) -pub fn i32_sub(ctx: &mut Context) { - let op0 = pop(ctx); - let op1 = pop(ctx); - - if let Some(i1) = op1.immediate() { - if let Some(i0) = op0.immediate() { - ctx.block_state.stack.push(StackValue::Immediate(i1 - i0)); - return; - } - } - - let op1 = into_temp_reg(ctx, op1); - match op0.location(&ctx.block_state.locals) { - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; sub Rd(op1), Rd(reg) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; sub Rd(op1), [rsp + offset] - ); - } - ValueLocation::Immediate(i) => { - if i == 1 { - dynasm!(ctx.asm - ; dec Rd(op1) - ); - } else { - dynasm!(ctx.asm - ; sub Rd(op1), i as i32 - ); - } - } - } - - ctx.block_state.stack.push(StackValue::Temp(op1)); - free_value(ctx, op0); -} - -// `i32_mul` needs to be seperate because the immediate form of the instruction -// has a different syntax to the immediate form of the other instructions. -pub fn i32_mul(ctx: &mut Context) { - let op0 = pop(ctx); - let op1 = pop(ctx); - - if let Some(i1) = op1.immediate() { - if let Some(i0) = op0.immediate() { - ctx.block_state.stack.push(StackValue::Immediate( - i32::wrapping_mul(i1 as i32, i0 as i32) as _, - )); - return; - } - } - - let (op1, op0) = match op1 { - Value::Temp(reg) => (reg, op0), - _ => { - if op0.immediate().is_some() { - (into_temp_reg(ctx, op1), op0) - } else { - (into_temp_reg(ctx, op0), op1) - } - } - }; - - match op0.location(&ctx.block_state.locals) { - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; imul Rd(op1), Rd(reg) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; imul Rd(op1), [rsp + offset] - ); - } - ValueLocation::Immediate(i) => { - dynasm!(ctx.asm - ; imul Rd(op1), Rd(op1), i as i32 - ); - } - } - - ctx.block_state.stack.push(StackValue::Temp(op1)); - free_value(ctx, op0); -} - -pub fn get_local_i32(ctx: &mut Context, local_idx: u32) { - push(ctx, Value::Local(local_idx)); -} - -// TODO: We can put locals that were spilled to the stack -// back into registers here. -pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { - let val = pop(ctx); - let val_loc = val.location(&ctx.block_state.locals); - let dst_loc = ctx.block_state.parent_locals.get(local_idx); - - materialize_local(ctx, local_idx); - - if let Some(cur) = ctx - .block_state - .locals - .register_arguments - .get_mut(local_idx as usize) - { - *cur = dst_loc; - } - - copy_value(ctx, val_loc, dst_loc); - free_value(ctx, val); -} - -fn materialize_local(ctx: &mut Context, local_idx: u32) { - // TODO: With real stack allocation we can make this constant-time. We can have a kind of - // on-the-fly SSA transformation where we mark each `StackValue::Local` with an ID - // that increases with each assignment (this can be stored in block state and so - // is reset when the block ends). We then refcount the storage associated with each - // "value ID" and in `pop` we free up slots whose refcount hits 0. This means we - // can have even cleaner assembly than we currently do while giving us back - // linear runtime. - for index in (0..ctx.block_state.stack.len()).rev() { - match ctx.block_state.stack[index] { - // For now it's impossible for a local to be in RAX but that might be - // possible in the future, so we check both cases. - StackValue::Local(i) if i == local_idx => { - ctx.block_state.depth.reserve(1); - ctx.block_state.stack[index] = StackValue::Pop; - match ctx.block_state.locals.get(local_idx) { - ValueLocation::Reg(r) => dynasm!(ctx.asm - ; push Rq(r) - ), - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; push QWORD [rsp + offset] - ) - } - _ => unreachable!(), - } - } - StackValue::Pop => { - // We don't need to fail if the `Pop` is lower in the stack than the last instance of this - // local, but we might as well fail for now since we want to reimplement this using proper - // stack allocation anyway. - panic!("Tried to materialize local but the stack already contains elements"); - } - _ => {} - } - } -} - -pub fn literal_i32(ctx: &mut Context, imm: i32) { - push(ctx, Value::Immediate(imm as _)); -} - -pub fn literal_i64(ctx: &mut Context, imm: i64) { - push(ctx, Value::Immediate(imm)); -} - macro_rules! cmp_i32 { ($name:ident, $instr:ident, $reverse_instr:ident, $const_fallback:expr) => { - pub fn $name(ctx: &mut Context) { - let right = pop(ctx); - let left = pop(ctx); + pub fn $name(&mut self) { + let right = self.pop(); + let left = self.pop(); let out = if let Some(i) = left.immediate() { - match right.location(&ctx.block_state.locals) { + match right.location(&self.block_state.locals) { ValueLocation::Stack(offset) => { - let result = ctx.block_state.regs.take_scratch_gpr(); - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm + let result = self.block_state.regs.take_scratch_gpr(); + let offset = self.adjusted_offset(offset); + dynasm!(self.asm ; xor Rd(result), Rd(result) ; cmp DWORD [rsp + offset], i as i32 - ; $instr Rb(result) + ; $reverse_instr Rb(result) ); Value::Temp(result) } ValueLocation::Reg(rreg) => { - let result = ctx.block_state.regs.take_scratch_gpr(); - dynasm!(ctx.asm + let result = self.block_state.regs.take_scratch_gpr(); + dynasm!(self.asm ; xor Rd(result), Rd(result) ; cmp Rd(rreg), i as i32 ; $reverse_instr Rb(result) @@ -1090,27 +479,27 @@ macro_rules! cmp_i32 { } } } else { - let lreg = into_reg(ctx, left); - let result = ctx.block_state.regs.take_scratch_gpr(); + let lreg = self.into_reg(left); + let result = self.block_state.regs.take_scratch_gpr(); - match right.location(&ctx.block_state.locals) { + match right.location(&self.block_state.locals) { ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm + let offset = self.adjusted_offset(offset); + dynasm!(self.asm ; xor Rd(result), Rd(result) ; cmp Rd(lreg), [rsp + offset] ; $instr Rb(result) ); } ValueLocation::Reg(rreg) => { - dynasm!(ctx.asm + dynasm!(self.asm ; xor Rd(result), Rd(result) ; cmp Rd(lreg), Rd(rreg) ; $instr Rb(result) ); } ValueLocation::Immediate(i) => { - dynasm!(ctx.asm + dynasm!(self.asm ; xor Rd(result), Rd(result) ; cmp Rd(lreg), i as i32 ; $instr Rb(result) @@ -1118,32 +507,37 @@ macro_rules! cmp_i32 { } } + if left != Value::Temp(lreg) && !self.block_state.regs.is_free(lreg) { + self.block_state.regs.release_scratch_gpr(lreg); + } + Value::Temp(result) }; - push(ctx, out); - free_value(ctx, left); - free_value(ctx, right); + self.free_value(left); + self.free_value(right); + + self.push(out); } } } macro_rules! cmp_i64 { ($name:ident, $instr:ident, $reverse_instr:ident, $const_fallback:expr) => { - pub fn $name(ctx: &mut Context) { - let right = pop(ctx); - let left = pop(ctx); + pub fn $name(&mut self) { + let right = self.pop(); + let left = self.pop(); let out = if let Some(i) = left.immediate() { - match right.location(&ctx.block_state.locals) { + match right.location(&self.block_state.locals) { ValueLocation::Stack(offset) => { - let result = ctx.block_state.regs.take_scratch_gpr(); - let offset = adjusted_offset(ctx, offset); + let result = self.block_state.regs.take_scratch_gpr(); + let offset = self.adjusted_offset(offset); if let Some(i) = i.try_into() { - dynasm!(ctx.asm + dynasm!(self.asm ; xor Rd(result), Rd(result) ; cmp QWORD [rsp + offset], i - ; $instr Rb(result) + ; $reverse_instr Rb(result) ); } else { unimplemented!("Unsupported `cmp` with large 64-bit immediate operand"); @@ -1151,9 +545,9 @@ macro_rules! cmp_i64 { Value::Temp(result) } ValueLocation::Reg(rreg) => { - let result = ctx.block_state.regs.take_scratch_gpr(); + let result = self.block_state.regs.take_scratch_gpr(); if let Some(i) = i.try_into() { - dynasm!(ctx.asm + dynasm!(self.asm ; xor Rd(result), Rd(result) ; cmp Rq(rreg), i ; $reverse_instr Rb(result) @@ -1168,20 +562,20 @@ macro_rules! cmp_i64 { } } } else { - let lreg = into_reg(ctx, left); - let result = ctx.block_state.regs.take_scratch_gpr(); + let lreg = self.into_reg(left); + let result = self.block_state.regs.take_scratch_gpr(); - match right.location(&ctx.block_state.locals) { + match right.location(&self.block_state.locals) { ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm + let offset = self.adjusted_offset(offset); + dynasm!(self.asm ; xor Rd(result), Rd(result) ; cmp Rq(lreg), [rsp + offset] ; $instr Rb(result) ); } ValueLocation::Reg(rreg) => { - dynasm!(ctx.asm + dynasm!(self.asm ; xor Rd(result), Rd(result) ; cmp Rq(lreg), Rq(rreg) ; $instr Rb(result) @@ -1189,7 +583,7 @@ macro_rules! cmp_i64 { } ValueLocation::Immediate(i) => { if let Some(i) = i.try_into() { - dynasm!(ctx.asm + dynasm!(self.asm ; xor Rd(result), Rd(result) ; cmp Rq(lreg), i ; $instr Rb(result) @@ -1200,403 +594,1428 @@ macro_rules! cmp_i64 { } } + if left != Value::Temp(lreg) && !self.block_state.regs.is_free(lreg) { + self.block_state.regs.release_scratch_gpr(lreg); + } + Value::Temp(result) }; - push(ctx, out); - free_value(ctx, left); - free_value(ctx, right); + self.free_value(left); + self.free_value(right); + self.push(out); } } } -cmp_i32!(i32_eq, sete, sete, |a, b| a == b); -cmp_i32!(i32_neq, setne, setne, |a, b| a != b); -// `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous -cmp_i32!(i32_lt_u, setnae, seta, |a, b| (a as u32) < (b as u32)); -cmp_i32!(i32_le_u, setbe, setae, |a, b| (a as u32) <= (b as u32)); -cmp_i32!(i32_gt_u, seta, setnae, |a, b| (a as u32) > (b as u32)); -cmp_i32!(i32_ge_u, setae, setna, |a, b| (a as u32) >= (b as u32)); -cmp_i32!(i32_lt_s, setl, setnle, |a, b| a < b); -cmp_i32!(i32_le_s, setle, setnl, |a, b| a <= b); -cmp_i32!(i32_gt_s, setg, setnge, |a, b| a > b); -cmp_i32!(i32_ge_s, setge, setng, |a, b| a >= b); - -cmp_i64!(i64_eq, sete, sete, |a, b| a == b); -cmp_i64!(i64_neq, setne, setne, |a, b| a != b); -// `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous -cmp_i64!(i64_lt_u, setnae, seta, |a, b| (a as u64) < (b as u64)); -cmp_i64!(i64_le_u, setbe, setae, |a, b| (a as u64) <= (b as u64)); -cmp_i64!(i64_gt_u, seta, setnae, |a, b| (a as u64) > (b as u64)); -cmp_i64!(i64_ge_u, setae, setna, |a, b| (a as u64) >= (b as u64)); -cmp_i64!(i64_lt_s, setl, setnle, |a, b| a < b); -cmp_i64!(i64_le_s, setle, setnl, |a, b| a <= b); -cmp_i64!(i64_gt_s, setg, setnge, |a, b| a > b); -cmp_i64!(i64_ge_s, setge, setng, |a, b| a >= b); - -/// Pops i32 predicate and branches to the specified label -/// if the predicate is equal to zero. -pub fn jump_if_false(ctx: &mut Context, label: Label) { - let val = pop(ctx); - let predicate = into_temp_reg(ctx, val); - dynasm!(ctx.asm - ; test Rd(predicate), Rd(predicate) - ; je =>label.0 - ); - ctx.block_state.regs.release_scratch_gpr(predicate); -} - -/// Branch unconditionally to the specified label. -pub fn br(ctx: &mut Context, label: Label) { - dynasm!(ctx.asm - ; jmp =>label.0 - ); -} - -fn immediate_to_reg(ctx: &mut Context, reg: GPR, val: i64) { - if (val as u64) <= u32::max_value() as u64 { - dynasm!(ctx.asm - ; mov Rd(reg), val as i32 - ); - } else { - dynasm!(ctx.asm - ; mov Rq(reg), QWORD val - ); - } -} - -fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) { - match (src, dst) { - (ValueLocation::Stack(in_offset), ValueLocation::Stack(out_offset)) => { - let in_offset = adjusted_offset(ctx, in_offset); - let out_offset = adjusted_offset(ctx, out_offset); - if in_offset != out_offset { - let gpr = ctx.block_state.regs.take_scratch_gpr(); - dynasm!(ctx.asm - ; mov Rq(gpr), [rsp + in_offset] - ; mov [rsp + out_offset], Rq(gpr) - ); - ctx.block_state.regs.release_scratch_gpr(gpr); - } - } - (ValueLocation::Reg(in_reg), ValueLocation::Stack(out_offset)) => { - let out_offset = adjusted_offset(ctx, out_offset); - dynasm!(ctx.asm - ; mov [rsp + out_offset], Rq(in_reg) - ); - } - (ValueLocation::Immediate(i), ValueLocation::Stack(out_offset)) => { - let out_offset = adjusted_offset(ctx, out_offset); - if (i as u64) <= u32::max_value() as u64 { - dynasm!(ctx.asm - ; mov DWORD [rsp + out_offset], i as i32 - ); - } else { - let scratch = ctx.block_state.regs.take_scratch_gpr(); - - dynasm!(ctx.asm - ; mov Rq(scratch), QWORD i - ; mov [rsp + out_offset], Rq(scratch) - ); - - ctx.block_state.regs.release_scratch_gpr(scratch); - } - } - (ValueLocation::Stack(in_offset), ValueLocation::Reg(out_reg)) => { - let in_offset = adjusted_offset(ctx, in_offset); - dynasm!(ctx.asm - ; mov Rq(out_reg), [rsp + in_offset] - ); - } - (ValueLocation::Reg(in_reg), ValueLocation::Reg(out_reg)) => { - if in_reg != out_reg { - dynasm!(ctx.asm - ; mov Rq(out_reg), Rq(in_reg) - ); - } - } - (ValueLocation::Immediate(i), ValueLocation::Reg(out_reg)) => { - immediate_to_reg(ctx, out_reg, i); - } - // TODO: Have separate `ReadLocation` and `WriteLocation`? - (_, ValueLocation::Immediate(_)) => panic!("Tried to copy to an immediate value!"), - } -} - #[must_use] pub struct CallCleanup { restore_registers: ArrayVec<[GPR; SCRATCH_REGS.len()]>, stack_depth: i32, } -/// Make sure that any argument registers that will be used by the call are free -/// by storing them to the stack. -/// -/// Unfortunately, we can't elide this store if we're just passing arguments on -/// because these registers are caller-saved and so the callee can use them as -/// scratch space. -fn free_arg_registers(ctx: &mut Context, count: u32) { - if count == 0 { - return; - } +macro_rules! commutative_binop_i32 { + ($name:ident, $instr:ident, $const_fallback:expr) => { + pub fn $name(&mut self) { + let op0 = self.pop(); + let op1 = self.pop(); - // This is bound to the maximum size of the `ArrayVec` amd so can be considered to have constant - // runtime - for i in 0..ctx.block_state.locals.register_arguments.len() { - match ctx.block_state.locals.register_arguments[i] { - ValueLocation::Reg(reg) => { - if ARGS_IN_GPRS.contains(®) { - let dst = ValueLocation::Stack( - ((ctx.block_state.locals.num_local_stack_slots - 1 - i as u32) * WORD_SIZE) - as _, - ); - copy_value(ctx, ValueLocation::Reg(reg), dst); - ctx.block_state.locals.register_arguments[i] = dst; + if let Some(i1) = op1.immediate() { + if let Some(i0) = op0.immediate() { + self.block_state.stack.push(StackValue::Immediate($const_fallback(i1 as i32, i0 as i32) as _)); + return; } } - _ => {} + + let (op1, op0) = match op1 { + Value::Temp(reg) => (reg, op0), + _ => if op0.immediate().is_some() { + (self.into_temp_reg(op1), op0) + } else { + (self.into_temp_reg(op0), op1) + } + }; + + match op0.location(&self.block_state.locals) { + ValueLocation::Reg(reg) => { + dynasm!(self.asm + ; $instr Rd(op1), Rd(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + dynasm!(self.asm + ; $instr Rd(op1), [rsp + offset] + ); + } + ValueLocation::Immediate(i) => { + dynasm!(self.asm + ; $instr Rd(op1), i as i32 + ); + } + } + + self.free_value(op0); + self.push(Value::Temp(op1)); } } } -fn free_return_register(ctx: &mut Context, count: u32) { - if count == 0 { - return; - } +macro_rules! commutative_binop_i64 { + ($name:ident, $instr:ident, $const_fallback:expr) => { + pub fn $name(&mut self) { + let op0 = self.pop(); + let op1 = self.pop(); - free_register(ctx, RAX); + if let Some(i1) = op1.immediate() { + if let Some(i0) = op0.immediate() { + self.block_state.stack.push(StackValue::Immediate($const_fallback(i1, i0))); + return; + } + } + + let (op1, op0) = match op1 { + Value::Temp(reg) => (reg, op0), + _ => if op0.immediate().is_some() { + (self.into_temp_reg(op1), op0) + } else { + (self.into_temp_reg(op0), op1) + } + }; + + match op0.location(&self.block_state.locals) { + ValueLocation::Reg(reg) => { + dynasm!(self.asm + ; $instr Rq(op1), Rq(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + dynasm!(self.asm + ; $instr Rq(op1), [rsp + offset] + ); + } + ValueLocation::Immediate(i) => { + if let Some(i) = i.try_into() { + dynasm!(self.asm + ; $instr Rq(op1), i + ); + } else { + let scratch = self.block_state.regs.take_scratch_gpr(); + + dynasm!(self.asm + ; mov Rq(scratch), QWORD i + ; $instr Rq(op1), Rq(scratch) + ); + + self.block_state.regs.release_scratch_gpr(scratch); + } + } + } + + self.free_value(op0); + self.push(Value::Temp(op1)); + } + } } -fn free_register(ctx: &mut Context, reg: GPR) { - let mut to_repush = 0; - let mut out = None; +macro_rules! load { + ($name:ident, $reg_ty:ident) => { + pub fn $name(&mut self, offset: u32) -> Result<(), T::Error> { + fn load_to_reg( + ctx: &mut Context, + reg: GPR, + (offset, gpr): (i64, Option) + ) { + let dst_components: (Result, _) = if let Some(offset) = offset.try_into() { + (Ok(offset), gpr) + } else { + (Err(ctx.into_temp_reg(Value::Immediate(offset))), gpr) + }; - if ctx.block_state.regs.is_free(reg) { - return; + match dst_components { + (Ok(offset), Some(offset_reg)) => { + dynasm!(ctx.asm + ; mov $reg_ty(reg), [offset + Rq(offset_reg)] + ); + } + (Ok(offset), None) => { + dynasm!(ctx.asm + ; mov $reg_ty(reg), [offset] + ); + } + (Err(left), Some(right)) => { + dynasm!(ctx.asm + ; mov $reg_ty(reg), [Rq(left) + Rq(right)] + ); + } + (Err(offset_reg), None) => { + dynasm!(ctx.asm + ; mov $reg_ty(reg), [Rq(offset_reg)] + ); + } + } + + if let Err(gpr) = dst_components.0 { + ctx.block_state.regs.release_scratch_gpr(gpr); + } + } + + let base = self.pop(); + let address = T::offset(&self.memory_base, offset)?; + + let temp = self.block_state.regs.take_scratch_gpr(); + + match base.location(&self.block_state.locals) { + // TODO: Do compilers (to wasm) actually emit load-with-immediate when doing + // constant loads? There isn't a `load` variant that _doesn't_ take a + // runtime parameter. + ValueLocation::Immediate(i) => { + let address = address + i as i32 as i64; + + load_to_reg(self, temp, (address, None)); + + // TODO: Push relocation + } + ValueLocation::Reg(gpr) => { + load_to_reg(self, temp, (address, Some(gpr))); + // TODO: Push relocation + } + ValueLocation::Stack(_) => { + let gpr = self.into_temp_reg(base); + load_to_reg(self, temp, (address, Some(gpr))); + self.block_state.regs.release_scratch_gpr(gpr); + // TODO: Push relocation + } + } + + self.free_value(base); + self.push(Value::Temp(temp)); + + Ok(()) + } + } +} + +macro_rules! store { + ($name:ident, $reg_ty:ident, $size:ident) => { + pub fn $name(&mut self, offset: u32) -> Result<(), T::Error> { + fn put_reg_in_address( + ctx: &mut Context, + src: GPR, + dst_components: (Result, Option), + ) { + match dst_components { + (Ok(offset), Some(offset_reg)) => { + dynasm!(ctx.asm + ; mov [offset + Rq(offset_reg)], $reg_ty(src) + ); + } + (Ok(offset), None) => { + dynasm!(ctx.asm + ; mov [offset], $reg_ty(src) + ); + } + (Err(left), Some(right)) => { + dynasm!(ctx.asm + ; mov [Rq(left) + Rq(right)], $reg_ty(src) + ); + } + (Err(offset_reg), None) => { + dynasm!(ctx.asm + ; mov [Rq(offset_reg)], $reg_ty(src) + ); + } + } + } + + fn put_in_address( + ctx: &mut Context, + src: Value, + (offset, gpr): (i64, Option) + ) { + let dst_components: (Result, _) = if let Some(offset) = offset.try_into() { + (Ok(offset), gpr) + } else { + (Err(ctx.into_temp_reg(Value::Immediate(offset))), gpr) + }; + + match src.location(&ctx.block_state.locals) { + ValueLocation::Immediate(i) => { + let imm: Result = if let Some(i) = i.try_into() { + Ok(i) + } else { + Err(ctx.into_temp_reg(Value::Immediate(i))) + }; + match (imm, dst_components) { + (Ok(val), (Ok(offset), Some(gpr))) => { + dynasm!(ctx.asm + ; mov $size [offset + Rq(gpr)], val + ); + } + (Ok(val), (Ok(offset), None)) => { + dynasm!(ctx.asm + ; mov $size [offset], val + ); + } + (Ok(val), (Err(left), Some(right))) => { + dynasm!(ctx.asm + ; mov $size [Rq(left) + Rq(right)], val + ); + } + (Ok(val), (Err(gpr), None)) => { + dynasm!(ctx.asm + ; mov $size [Rq(gpr)], val + ); + } + (Err(val_reg), (Ok(offset), Some(gpr))) => { + dynasm!(ctx.asm + ; mov [offset + Rq(gpr)], $reg_ty(val_reg) + ); + } + (Err(val_reg), (Ok(offset), None)) => { + dynasm!(ctx.asm + ; mov [offset], $reg_ty(val_reg) + ); + } + (Err(val_reg), (Err(left), Some(right))) => { + dynasm!(ctx.asm + ; mov [Rq(left) + Rq(right)], $reg_ty(val_reg) + ); + } + (Err(val_reg), (Err(gpr), None)) => { + dynasm!(ctx.asm + ; mov [Rq(gpr)], $reg_ty(val_reg) + ); + } + } + + if let Err(imm) = imm { + ctx.block_state.regs.release_scratch_gpr(imm); + } + } + ValueLocation::Reg(gpr) => { + put_reg_in_address(ctx, gpr, dst_components); + } + ValueLocation::Stack(_) => { + let gpr = ctx.into_temp_reg(src); + put_reg_in_address(ctx, gpr, dst_components); + ctx.block_state.regs.release_scratch_gpr(gpr); + } + } + + if let Err(gpr) = dst_components.0 { + ctx.block_state.regs.release_scratch_gpr(gpr); + } + } + + let value = self.pop(); + let base = self.pop(); + let address = T::offset(&self.memory_base, offset)?; + + match base.location(&self.block_state.locals) { + // TODO: Do compilers (to wasm) actually emit load-with-immediate when doing + // constant loads? There isn't a `load` variant that _doesn't_ take a + // runtime parameter. + ValueLocation::Immediate(i) => { + let address = address + i as i32 as i64; + + // TODO: Use 32-bit relative addressing? + // TODO: Are addresses stored in registers signed or unsigned and is it + // possible to map 2^63..2^64 such that it would matter? + put_in_address(self, value, (address, None)); + + // TODO: Push relocation + } + ValueLocation::Reg(gpr) => { + put_in_address(self, value, (address, Some(gpr))); + + // TODO: Push relocation + } + ValueLocation::Stack(_) => { + let gpr = self.into_temp_reg(base); + put_in_address(self, value, (address, Some(gpr))); + self.block_state.regs.release_scratch_gpr(gpr); + // TODO: Push relocation + } + } + + self.free_value(value); + self.free_value(base); + + Ok(()) + } + } +} + +trait TryInto { + fn try_into(self) -> Option; +} + +impl TryInto for u64 { + fn try_into(self) -> Option { + let max = i64::max_value() as u64; + + if self <= max { + Some(self as i64) + } else { + None + } + } +} + +impl TryInto for i64 { + fn try_into(self) -> Option { + let min = i32::min_value() as i64; + let max = i32::max_value() as i64; + + if self >= min && self <= max { + Some(self as i32) + } else { + None + } + } +} + +impl Context<'_, T> { + /// Create a new undefined label. + pub fn create_label(&mut self) -> Label { + Label(self.asm.new_dynamic_label()) } - // TODO: With real stack allocation we can make this constant-time - for stack_val in ctx.block_state.stack.iter_mut().rev() { - match stack_val.location(&ctx.block_state.locals) { - // For now it's impossible for a local to be in RAX but that might be - // possible in the future, so we check both cases. - Some(ValueLocation::Reg(r)) if r == reg => { - *stack_val = StackValue::Pop; + fn adjusted_offset(&self, offset: i32) -> i32 { + (self.block_state.depth.0 * WORD_SIZE) as i32 + offset + } - out = Some(*stack_val); + cmp_i32!(i32_eq, sete, sete, |a, b| a == b); + cmp_i32!(i32_neq, setne, setne, |a, b| a != b); + // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous + cmp_i32!(i32_lt_u, setnae, seta, |a, b| (a as u32) < (b as u32)); + cmp_i32!(i32_le_u, setbe, setae, |a, b| (a as u32) <= (b as u32)); + cmp_i32!(i32_gt_u, seta, setnae, |a, b| (a as u32) > (b as u32)); + cmp_i32!(i32_ge_u, setae, setna, |a, b| (a as u32) >= (b as u32)); + cmp_i32!(i32_lt_s, setl, setnle, |a, b| a < b); + cmp_i32!(i32_le_s, setle, setnl, |a, b| a <= b); + cmp_i32!(i32_gt_s, setg, setnge, |a, b| a > b); + cmp_i32!(i32_ge_s, setge, setng, |a, b| a >= b); - break; - } - Some(_) => {} - None => { - to_repush += 1; - } + cmp_i64!(i64_eq, sete, sete, |a, b| a == b); + cmp_i64!(i64_neq, setne, setne, |a, b| a != b); + // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous + cmp_i64!(i64_lt_u, setnae, seta, |a, b| (a as u64) < (b as u64)); + cmp_i64!(i64_le_u, setbe, setae, |a, b| (a as u64) <= (b as u64)); + cmp_i64!(i64_gt_u, seta, setnae, |a, b| (a as u64) > (b as u64)); + cmp_i64!(i64_ge_u, setae, setna, |a, b| (a as u64) >= (b as u64)); + cmp_i64!(i64_lt_s, setl, setnle, |a, b| a < b); + cmp_i64!(i64_le_s, setle, setnl, |a, b| a <= b); + cmp_i64!(i64_gt_s, setg, setnge, |a, b| a > b); + cmp_i64!(i64_ge_s, setge, setng, |a, b| a >= b); + + /// Pops i32 predicate and branches to the specified label + /// if the predicate is equal to zero. + pub fn jump_if_false(&mut self, label: Label) { + let val = self.pop(); + let predicate = self.into_temp_reg(val); + dynasm!(self.asm + ; test Rd(predicate), Rd(predicate) + ; je =>label.0 + ); + self.block_state.regs.release_scratch_gpr(predicate); + } + + /// Branch unconditionally to the specified label. + pub fn br(&mut self, label: Label) { + dynasm!(self.asm + ; jmp =>label.0 + ); + } + + fn immediate_to_reg(&mut self, reg: GPR, val: i64) { + if (val as u64) <= u32::max_value() as u64 { + dynasm!(self.asm + ; mov Rd(reg), val as i32 + ); + } else { + dynasm!(self.asm + ; mov Rq(reg), QWORD val + ); } } - if let Some(out) = out { - match out { - StackValue::Temp(gpr) => { - dynasm!(ctx.asm - ; mov Rq(gpr), rax + fn copy_value(&mut self, src: ValueLocation, dst: ValueLocation) { + match (src, dst) { + (ValueLocation::Stack(in_offset), ValueLocation::Stack(out_offset)) => { + let in_offset = self.adjusted_offset(in_offset); + let out_offset = self.adjusted_offset(out_offset); + if in_offset != out_offset { + let gpr = self.block_state.regs.take_scratch_gpr(); + dynasm!(self.asm + ; mov Rq(gpr), [rsp + in_offset] + ; mov [rsp + out_offset], Rq(gpr) + ); + self.block_state.regs.release_scratch_gpr(gpr); + } + } + (ValueLocation::Reg(in_reg), ValueLocation::Stack(out_offset)) => { + let out_offset = self.adjusted_offset(out_offset); + dynasm!(self.asm + ; mov [rsp + out_offset], Rq(in_reg) ); } + (ValueLocation::Immediate(i), ValueLocation::Stack(out_offset)) => { + let out_offset = self.adjusted_offset(out_offset); + if (i as u64) <= u32::max_value() as u64 { + dynasm!(self.asm + ; mov DWORD [rsp + out_offset], i as i32 + ); + } else { + let scratch = self.block_state.regs.take_scratch_gpr(); + + dynasm!(self.asm + ; mov Rq(scratch), QWORD i + ; mov [rsp + out_offset], Rq(scratch) + ); + + self.block_state.regs.release_scratch_gpr(scratch); + } + } + (ValueLocation::Stack(in_offset), ValueLocation::Reg(out_reg)) => { + let in_offset = self.adjusted_offset(in_offset); + dynasm!(self.asm + ; mov Rq(out_reg), [rsp + in_offset] + ); + } + (ValueLocation::Reg(in_reg), ValueLocation::Reg(out_reg)) => { + if in_reg != out_reg { + dynasm!(self.asm + ; mov Rq(out_reg), Rq(in_reg) + ); + } + } + (ValueLocation::Immediate(i), ValueLocation::Reg(out_reg)) => { + self.immediate_to_reg(out_reg, i); + } + // TODO: Have separate `ReadLocation` and `WriteLocation`? + (_, ValueLocation::Immediate(_)) => panic!("Tried to copy to an immediate value!"), + } + } + + /// Define the given label at the current position. + /// + /// Multiple labels can be defined at the same position. However, a label + /// can be defined only once. + pub fn define_label(&mut self, label: Label) { + self.asm.dynamic_label(label.0); + } + + fn expand_stack(&mut self, by: u32) { + use std::iter; + + if by == 0 { + return; + } + + let new_stack_size = (self.block_state.stack_map.len() + by as usize).next_power_of_two(); + let additional_elements = new_stack_size - self.block_state.stack_map.len(); + self.block_state + .stack_map + .extend(iter::repeat(false).take(additional_elements)); + + dynasm!(self.asm + ; sub rsp, additional_elements as i32 + ); + } + + // TODO: Make this generic over `Vec` or `ArrayVec`? + fn stack_slots(&mut self, count: u32) -> Vec { + let mut out = Vec::with_capacity(count as usize); + + let offset_if_taken = |(i, is_taken): (usize, bool)| { + if !is_taken { + Some(i as i32 * WORD_SIZE as i32) + } else { + None + } + }; + + out.extend( + self.block_state + .stack_map + .iter() + .cloned() + .enumerate() + .filter_map(offset_if_taken), + ); + + let remaining = count as usize - out.len(); + + if remaining > 0 { + self.expand_stack(remaining as u32); + out.extend( + self.block_state + .stack_map + .iter() + .cloned() + .enumerate() + .filter_map(offset_if_taken), + ); + } + + out + } + + fn stack_slot(&mut self) -> i32 { + if let Some(pos) = self + .block_state + .stack_map + .iter() + .position(|is_taken| !is_taken) + { + self.block_state.stack_map[pos] = true; + pos as i32 * WORD_SIZE as i32 + } else { + self.expand_stack(1); + self.stack_slot() + } + } + + // We use `put` instead of `pop` since with `BrIf` it's possible + // that the block will continue after returning. + pub fn return_from_block(&mut self, arity: u32, is_function_end: bool) { + // This should just be an optimisation, passing `false` should always result + // in correct code. + if !is_function_end { + self.restore_locals(); + } + + if arity == 0 { + return; + } + + let stack_top = *self.block_state.stack.last().expect("Stack is empty"); + if let Some(reg) = self.block_state.return_register { + self.put_stack_val_into(stack_top, ValueLocation::Reg(reg)); + } else { + let out_reg = match stack_top { + StackValue::Temp(r) => r, + other => { + let new_scratch = self.block_state.regs.take_scratch_gpr(); + self.put_stack_val_into(other, ValueLocation::Reg(new_scratch)); + new_scratch + } + }; + + self.block_state.return_register = Some(out_reg); + } + } + + pub fn start_block(&mut self) -> BlockState { + use std::mem; + + // OPTIMISATION: We cannot use the parent's stack values (it is disallowed by the spec) + // so we start a new stack, using `mem::replace` to ensure that we never + // clone or deallocate anything. + // + // I believe that it would be possible to cause a compiler bomb if we did + // not do this, since cloning iterates over the whole `Vec`. + let out_stack = mem::replace(&mut self.block_state.stack, vec![]); + let mut current_state = self.block_state.clone(); + current_state.stack = out_stack; + + self.block_state.parent_locals = self.block_state.locals.clone(); + self.block_state.return_register = None; + current_state + } + + // To start the next subblock of a block (for `if..then..else..end`). + // The only difference is that choices we made in the first subblock + // (for now only the return register) must be maintained in the next + // subblocks. + pub fn reset_block(&mut self, parent_block_state: BlockState) { + let return_reg = self.block_state.return_register; + + self.block_state = parent_block_state; + + self.block_state.return_register = return_reg; + } + + pub fn end_block(&mut self, parent_block_state: BlockState, func: impl FnOnce(&mut Self)) { + // TODO: This should currently never be called, but is important for if we want to + // have a more complex stack spilling scheme. + debug_assert_eq!( + self.block_state.depth, parent_block_state.depth, + "Imbalanced pushes and pops" + ); + if self.block_state.depth != parent_block_state.depth { + dynasm!(self.asm + ; add rsp, ((self.block_state.depth.0 - parent_block_state.depth.0) * WORD_SIZE) as i32 + ); + } + + let return_reg = self.block_state.return_register; + self.block_state = parent_block_state; + + func(self); + + if let Some(reg) = return_reg { + self.block_state.regs.mark_used(reg); + self.block_state.stack.push(StackValue::Temp(reg)); + } + } + + fn restore_locals(&mut self) { + for (src, dst) in self + .block_state + .locals + .register_arguments + .clone() + .iter() + .zip(&self.block_state.parent_locals.register_arguments.clone()) + { + self.copy_value(*src, *dst); + } + } + + load!(i32_load, Rd); + load!(i64_load, Rq); + store!(i32_store, Rd, DWORD); + store!(i64_store, Rq, QWORD); + + fn push(&mut self, value: Value) { + let stack_loc = match value { + Value::Local(loc) => StackValue::Local(loc), + Value::Immediate(i) => StackValue::Immediate(i), + Value::Temp(gpr) => { + if self.block_state.regs.free_scratch() >= 1 { + StackValue::Temp(gpr) + } else { + self.block_state.depth.reserve(1); + // TODO: Proper stack allocation scheme + dynasm!(self.asm + ; push Rq(gpr) + ); + self.block_state.regs.release_scratch_gpr(gpr); + StackValue::Pop + } + } + }; + + self.block_state.stack.push(stack_loc); + } + + fn pop(&mut self) -> Value { + match self.block_state.stack.pop().expect("Stack is empty") { + StackValue::Local(loc) => Value::Local(loc), + StackValue::Immediate(i) => Value::Immediate(i), + StackValue::Temp(reg) => Value::Temp(reg), StackValue::Pop => { - ctx.block_state.depth.reserve(1); - // TODO: Ideally we should do proper stack allocation so we - // don't have to check this at all (i.e. order on the - // physical stack and order on the logical stack should - // be independent). - debug_assert_eq!(to_repush, 0); - dynasm!(ctx.asm + self.block_state.depth.free(1); + let gpr = self.block_state.regs.take_scratch_gpr(); + dynasm!(self.asm + ; pop Rq(gpr) + ); + Value::Temp(gpr) + } + } + } + + /// Warning: this _will_ pop the runtime stack, but will _not_ pop the compile-time + /// stack. It's specifically for mid-block breaks like `Br` and `BrIf`. + fn put_stack_val_into(&mut self, val: StackValue, dst: ValueLocation) { + let to_move = match val { + StackValue::Local(loc) => Value::Local(loc), + StackValue::Immediate(i) => Value::Immediate(i), + StackValue::Temp(reg) => Value::Temp(reg), + StackValue::Pop => { + self.block_state.depth.free(1); + match dst { + ValueLocation::Reg(r) => dynasm!(self.asm + ; pop Rq(r) + ), + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + dynasm!(self.asm + ; pop QWORD [rsp + offset] + ) + } + ValueLocation::Immediate(_) => panic!("Tried to write to literal!"), + } + + // DO NOT DO A `copy_val` + return; + } + }; + + let src = to_move.location(&self.block_state.locals); + self.copy_value(src, dst); + if src != dst { + self.free_value(to_move); + } + } + + pub fn drop(&mut self) { + match self.block_state.stack.pop().expect("Stack is empty") { + StackValue::Pop => { + self.block_state.depth.free(1); + dynasm!(self.asm + ; add rsp, WORD_SIZE as i32 + ); + } + StackValue::Temp(gpr) => self.free_value(Value::Temp(gpr)), + StackValue::Local(loc) => self.free_value(Value::Local(loc)), + StackValue::Immediate(imm) => self.free_value(Value::Immediate(imm)), + } + } + + fn pop_into(&mut self, dst: ValueLocation) { + let val = self.block_state.stack.pop().expect("Stack is empty"); + self.put_stack_val_into(val, dst); + } + + fn free_value(&mut self, val: Value) { + match val { + Value::Temp(reg) => self.block_state.regs.release_scratch_gpr(reg), + Value::Local(_) | Value::Immediate(_) => {} + } + } + + /// Puts this value into a register so that it can be efficiently read + fn into_reg(&mut self, val: Value) -> GPR { + match val.location(&self.block_state.locals) { + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + let scratch = self.block_state.regs.take_scratch_gpr(); + dynasm!(self.asm + ; mov Rq(scratch), [rsp + offset] + ); + scratch + } + ValueLocation::Immediate(i) => { + let scratch = self.block_state.regs.take_scratch_gpr(); + self.immediate_to_reg(scratch, i); + scratch + } + ValueLocation::Reg(reg) => reg, + } + } + + /// Puts this value into a temporary register so that operations + /// on that register don't write to a local. + fn into_temp_reg(&mut self, val: Value) -> GPR { + match val { + Value::Local(loc) => { + let scratch = self.block_state.regs.take_scratch_gpr(); + + match self.block_state.locals.get(loc) { + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + dynasm!(self.asm + ; mov Rq(scratch), [rsp + offset] + ); + } + ValueLocation::Reg(reg) => { + dynasm!(self.asm + ; mov Rq(scratch), Rq(reg) + ); + } + ValueLocation::Immediate(_) => { + panic!("We shouldn't be storing immediates in locals for now") + } + } + + scratch + } + Value::Immediate(i) => { + let scratch = self.block_state.regs.take_scratch_gpr(); + + self.immediate_to_reg(scratch, i); + + scratch + } + Value::Temp(reg) => reg, + } + } + + // TODO: Use `lea` when the LHS operand isn't a temporary but both of the operands + // are in registers. + commutative_binop_i32!(i32_add, add, |a, b| (a as i32).wrapping_add(b as i32)); + commutative_binop_i32!(i32_and, and, |a, b| a & b); + commutative_binop_i32!(i32_or, or, |a, b| a | b); + commutative_binop_i32!(i32_xor, xor, |a, b| a ^ b); + + commutative_binop_i64!(i64_add, add, i64::wrapping_add); + commutative_binop_i64!(i64_and, and, |a, b| a & b); + commutative_binop_i64!(i64_or, or, |a, b| a | b); + commutative_binop_i64!(i64_xor, xor, |a, b| a ^ b); + + // `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1` + // temp register as the output) + pub fn i64_sub(&mut self) { + let op0 = self.pop(); + let op1 = self.pop(); + + if let Some(i1) = op1.immediate() { + if let Some(i0) = op0.immediate() { + self.block_state.stack.push(StackValue::Immediate(i1 - i0)); + return; + } + } + + let op1 = self.into_temp_reg(op1); + match op0.location(&self.block_state.locals) { + ValueLocation::Reg(reg) => { + dynasm!(self.asm + ; sub Rq(op1), Rq(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + dynasm!(self.asm + ; sub Rq(op1), [rsp + offset] + ); + } + ValueLocation::Immediate(i) => { + if let Some(i) = i.try_into() { + dynasm!(self.asm + ; sub Rq(op1), i + ); + } else { + unimplemented!(concat!( + "Unsupported `sub` with large 64-bit immediate operand" + )); + } + } + } + + self.push(Value::Temp(op1)); + self.free_value(op0); + } + + // `i64_mul` needs to be seperate because the immediate form of the instruction + // has a different syntax to the immediate form of the other instructions. + pub fn i64_mul(&mut self) { + let op0 = self.pop(); + let op1 = self.pop(); + + if let Some(i1) = op1.immediate() { + if let Some(i0) = op0.immediate() { + self.block_state + .stack + .push(StackValue::Immediate(i64::wrapping_mul(i1, i0))); + return; + } + } + + let (op1, op0) = match op1 { + Value::Temp(reg) => (reg, op0), + _ => { + if op0.immediate().is_some() { + (self.into_temp_reg(op1), op0) + } else { + (self.into_temp_reg(op0), op1) + } + } + }; + + match op0.location(&self.block_state.locals) { + ValueLocation::Reg(reg) => { + dynasm!(self.asm + ; imul Rq(op1), Rq(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + dynasm!(self.asm + ; imul Rq(op1), [rsp + offset] + ); + } + ValueLocation::Immediate(i) => { + if let Some(i) = i.try_into() { + dynasm!(self.asm + ; imul Rq(op1), Rq(op1), i + ); + } else { + unimplemented!(concat!( + "Unsupported `imul` with large 64-bit immediate operand" + )); + } + } + } + + self.push(Value::Temp(op1)); + self.free_value(op0); + } + + // `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1` + // temp register as the output) + pub fn i32_sub(&mut self) { + let op0 = self.pop(); + let op1 = self.pop(); + + if let Some(i1) = op1.immediate() { + if let Some(i0) = op0.immediate() { + self.block_state.stack.push(StackValue::Immediate(i1 - i0)); + return; + } + } + + let op1 = self.into_temp_reg(op1); + match op0.location(&self.block_state.locals) { + ValueLocation::Reg(reg) => { + dynasm!(self.asm + ; sub Rd(op1), Rd(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + dynasm!(self.asm + ; sub Rd(op1), [rsp + offset] + ); + } + ValueLocation::Immediate(i) => { + if i == 1 { + dynasm!(self.asm + ; dec Rd(op1) + ); + } else { + dynasm!(self.asm + ; sub Rd(op1), i as i32 + ); + } + } + } + + self.push(Value::Temp(op1)); + self.free_value(op0); + } + + // `i32_mul` needs to be seperate because the immediate form of the instruction + // has a different syntax to the immediate form of the other instructions. + pub fn i32_mul(&mut self) { + let op0 = self.pop(); + let op1 = self.pop(); + + if let Some(i1) = op1.immediate() { + if let Some(i0) = op0.immediate() { + self.block_state + .stack + .push(StackValue::Immediate( + i32::wrapping_mul(i1 as i32, i0 as i32) as _, + )); + return; + } + } + + let (op1, op0) = match op1 { + Value::Temp(reg) => (reg, op0), + _ => { + if op0.immediate().is_some() { + (self.into_temp_reg(op1), op0) + } else { + (self.into_temp_reg(op0), op1) + } + } + }; + + match op0.location(&self.block_state.locals) { + ValueLocation::Reg(reg) => { + dynasm!(self.asm + ; imul Rd(op1), Rd(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + dynasm!(self.asm + ; imul Rd(op1), [rsp + offset] + ); + } + ValueLocation::Immediate(i) => { + dynasm!(self.asm + ; imul Rd(op1), Rd(op1), i as i32 + ); + } + } + + self.push(Value::Temp(op1)); + self.free_value(op0); + } + + pub fn get_local(&mut self, local_idx: u32) { + self.push(Value::Local(local_idx)); + } + + // TODO: We can put locals that were spilled to the stack + // back into registers here. + pub fn set_local(&mut self, local_idx: u32) { + let val = self.pop(); + let val_loc = val.location(&self.block_state.locals); + let dst_loc = self.block_state.parent_locals.get(local_idx); + + self.materialize_local(local_idx); + + if let Some(cur) = self + .block_state + .locals + .register_arguments + .get_mut(local_idx as usize) + { + *cur = dst_loc; + } + + self.copy_value(val_loc, dst_loc); + self.free_value(val); + } + + pub fn tee_local(&mut self, local_idx: u32) { + let val = self.pop(); + let val_loc = val.location(&self.block_state.locals); + let dst_loc = self.block_state.parent_locals.get(local_idx); + + self.materialize_local(local_idx); + + if let Some(cur) = self + .block_state + .locals + .register_arguments + .get_mut(local_idx as usize) + { + *cur = dst_loc; + } + + self.copy_value(val_loc, dst_loc); + + match (val_loc, dst_loc) { + (ValueLocation::Stack(_), ValueLocation::Reg(_)) => { + self.free_value(val); + self.block_state.stack.push(StackValue::Local(local_idx)) + }, + _ => self.push(val), + } + } + + fn materialize_local(&mut self, local_idx: u32) { + // TODO: With real stack allocation we can make this constant-time. We can have a kind of + // on-the-fly SSA transformation where we mark each `StackValue::Local` with an ID + // that increases with each assignment (this can be stored in block state and so + // is reset when the block ends). We then refcount the storage associated with each + // "value ID" and in `pop` we free up slots whose refcount hits 0. This means we + // can have even cleaner assembly than we currently do while giving us back + // linear runtime. + let mut highest_stack_index = None; + let mut highest_pop_index = None; + + for index in (0..self.block_state.stack.len()).rev() { + match self.block_state.stack[index] { + // For now it's impossible for a local to be in RAX but that might be + // possible in the future, so we check both cases. + StackValue::Local(i) if i == local_idx => { + if highest_stack_index.is_none() { + highest_stack_index = Some(index); + } + + self.block_state.depth.reserve(1); + self.block_state.stack[index] = StackValue::Pop; + match self.block_state.locals.get(local_idx) { + ValueLocation::Reg(r) => dynasm!(self.asm + ; push Rq(r) + ), + ValueLocation::Stack(offset) => { + let offset = self.adjusted_offset(offset); + dynasm!(self.asm + ; push QWORD [rsp + offset] + ) + } + _ => unreachable!(), + } + } + StackValue::Pop => { + if highest_pop_index.is_none() { + highest_pop_index = Some(index); + } + } + _ => {} + } + } + + if let (Some(stack), Some(pop)) = (highest_stack_index, highest_pop_index) { + if stack < pop { + panic!("Tried to materialize local but the stack already contains elements"); + } + } + } + + pub fn i32_literal(&mut self, imm: i32) { + self.push(Value::Immediate(imm as _)); + } + + pub fn i64_literal(&mut self, imm: i64) { + self.push(Value::Immediate(imm)); + } + + /// Make sure that any argument registers that will be used by the call are free + /// by storing them to the stack. + /// + /// Unfortunately, we can't elide this store if we're just passing arguments on + /// because these registers are caller-saved and so the callee can use them as + /// scratch space. + fn free_arg_registers(&mut self, count: u32) { + if count == 0 { + return; + } + + // This is bound to the maximum size of the `ArrayVec` amd so can be considered to have constant + // runtime + for i in 0..self.block_state.locals.register_arguments.len() { + match self.block_state.locals.register_arguments[i] { + ValueLocation::Reg(reg) => { + if ARGS_IN_GPRS.contains(®) { + let dst = ValueLocation::Stack( + ((self.block_state.locals.num_local_stack_slots - 1 - i as u32) + * WORD_SIZE) as _, + ); + self.copy_value(ValueLocation::Reg(reg), dst); + self.block_state.locals.register_arguments[i] = dst; + } + } + _ => {} + } + } + } + + fn free_return_register(&mut self, count: u32) { + if count == 0 { + return; + } + + self.free_register(RAX); + } + + fn free_register(&mut self, reg: GPR) { + let mut to_repush = 0; + let mut out = None; + + if self.block_state.regs.is_free(reg) { + return; + } + + // TODO: With real stack allocation we can make this constant-time + for stack_val in self.block_state.stack.iter_mut().rev() { + match stack_val.location(&self.block_state.locals) { + // For now it's impossible for a local to be in RAX but that might be + // possible in the future, so we check both cases. + Some(ValueLocation::Reg(r)) if r == reg => { + *stack_val = StackValue::Pop; + + out = Some(*stack_val); + + break; + } + Some(_) => {} + None => { + to_repush += 1; + } + } + } + + if let Some(out) = out { + match out { + StackValue::Temp(gpr) => { + dynasm!(self.asm + ; mov Rq(gpr), rax + ); + } + StackValue::Pop => { + self.block_state.depth.reserve(1); + // TODO: Ideally we should do proper stack allocation so we + // don't have to check this at all (i.e. order on the + // physical stack and order on the logical stack should + // be independent). + debug_assert_eq!(to_repush, 0); + dynasm!(self.asm + ; push Rq(reg) + ); + } + _ => unreachable!(), + } + self.block_state.regs.release_scratch_gpr(reg); + } + } + + // TODO: Use `ArrayVec`? + /// Saves volatile (i.e. caller-saved) registers before a function call, if they are used. + fn save_volatile(&mut self) -> ArrayVec<[GPR; SCRATCH_REGS.len()]> { + let mut out = ArrayVec::new(); + + // TODO: If there are no `StackValue::Pop`s that need to be popped + // before we reach our `Temp` value, we can set the `StackValue` + // for the register to be restored to `StackValue::Pop` (and + // release the register!) instead of restoring it. + for ® in SCRATCH_REGS.iter() { + if !self.block_state.regs.is_free(reg) { + dynasm!(self.asm ; push Rq(reg) ); + out.push(reg); } - _ => unreachable!(), } - ctx.block_state.regs.release_scratch_gpr(reg); + + out } -} -// TODO: Use `ArrayVec`? -/// Saves volatile (i.e. caller-saved) registers before a function call, if they are used. -fn save_volatile(ctx: &mut Context) -> ArrayVec<[GPR; SCRATCH_REGS.len()]> { - let mut out = ArrayVec::new(); + /// Write the arguments to the callee to the registers and the stack using the SystemV + /// calling convention. + fn pass_outgoing_args(&mut self, arity: u32, return_arity: u32) -> CallCleanup { + let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32; - // TODO: If there are no `StackValue::Pop`s that need to be popped - // before we reach our `Temp` value, we can set the `StackValue` - // for the register to be restored to `StackValue::Pop` (and - // release the register!) instead of restoring it. - for ® in SCRATCH_REGS.iter() { - if !ctx.block_state.regs.is_free(reg) { - dynasm!(ctx.asm - ; push Rq(reg) + self.free_arg_registers(arity); + + // We pop stack arguments first - arguments are RTL + if num_stack_args > 0 { + let size = num_stack_args * WORD_SIZE as i32; + + // Reserve space for the outgoing stack arguments (so we don't + // stomp on any locals or the value stack). + dynasm!(self.asm + ; sub rsp, size ); - out.push(reg); + self.block_state.depth.reserve(num_stack_args as u32); + + for stack_slot in (0..num_stack_args).rev() { + // Since the stack offset is from the bottom of the locals + // and we want to start from the actual RSP (so `offset = 0` + // writes to `[rsp]`), we subtract our current depth. + // + // We might want to do this in the future by having a separate + // `AbsoluteValueLocation` and `RelativeValueLocation`. + let offset = stack_slot * WORD_SIZE as i32 + - self.block_state.depth.0 as i32 * WORD_SIZE as i32; + self.pop_into(ValueLocation::Stack(offset)); + } + } + + for reg in ARGS_IN_GPRS[..(arity as usize).min(ARGS_IN_GPRS.len())] + .iter() + .rev() + { + self.pop_into(ValueLocation::Reg(*reg)); + } + + // We do this before doing `save_volatile`, since otherwise we'll trample the return value + // of the call when we pop back. + self.free_return_register(return_arity); + + CallCleanup { + stack_depth: num_stack_args, + restore_registers: self.save_volatile(), } } - out -} + /// Frees up the stack space used for stack-passed arguments and restores the value + /// of volatile (i.e. caller-saved) registers to the state that they were in before + /// the call. + fn post_call_cleanup(&mut self, mut cleanup: CallCleanup) { + if cleanup.stack_depth > 0 { + let size = cleanup.stack_depth * WORD_SIZE as i32; + self.block_state.depth.free(cleanup.stack_depth as _); + dynasm!(self.asm + ; add rsp, size + ); + } -/// Write the arguments to the callee to the registers and the stack using the SystemV -/// calling convention. -fn pass_outgoing_args(ctx: &mut Context, arity: u32, return_arity: u32) -> CallCleanup { - let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32; - - free_arg_registers(ctx, arity); - - // We pop stack arguments first - arguments are RTL - if num_stack_args > 0 { - let size = num_stack_args * WORD_SIZE as i32; - - // Reserve space for the outgoing stack arguments (so we don't - // stomp on any locals or the value stack). - dynasm!(ctx.asm - ; sub rsp, size - ); - ctx.block_state.depth.reserve(num_stack_args as u32); - - for stack_slot in (0..num_stack_args).rev() { - // Since the stack offset is from the bottom of the locals - // and we want to start from the actual RSP (so `offset = 0` - // writes to `[rsp]`), we subtract our current depth. - // - // We might want to do this in the future by having a separate - // `AbsoluteValueLocation` and `RelativeValueLocation`. - let offset = - stack_slot * WORD_SIZE as i32 - ctx.block_state.depth.0 as i32 * WORD_SIZE as i32; - pop_into(ctx, ValueLocation::Stack(offset)); + for reg in cleanup.restore_registers.drain(..).rev() { + dynasm!(self.asm + ; pop Rq(reg) + ); } } - for reg in ARGS_IN_GPRS[..(arity as usize).min(ARGS_IN_GPRS.len())] - .iter() - .rev() - { - pop_into(ctx, ValueLocation::Reg(*reg)); + fn push_function_return(&mut self, arity: u32) { + if arity == 0 { + return; + } + debug_assert_eq!(arity, 1); + self.block_state.regs.mark_used(RAX); + self.push(Value::Temp(RAX)); } - // We do this before doing `save_volatile`, since otherwise we'll trample the return value - // of the call when we pop back. - free_return_register(ctx, return_arity); + /// Call a function with the given index + pub fn call_direct(&mut self, index: u32, arg_arity: u32, return_arity: u32) { + debug_assert!( + return_arity == 0 || return_arity == 1, + "We don't support multiple return yet" + ); - CallCleanup { - stack_depth: num_stack_args, - restore_registers: save_volatile(ctx), + let cleanup = self.pass_outgoing_args(arg_arity, return_arity); + + let label = &self.func_starts[index as usize].1; + dynasm!(self.asm + ; call =>*label + ); + + self.post_call_cleanup(cleanup); + self.push_function_return(return_arity); } -} -/// Frees up the stack space used for stack-passed arguments and restores the value -/// of volatile (i.e. caller-saved) registers to the state that they were in before -/// the call. -fn post_call_cleanup(ctx: &mut Context, mut cleanup: CallCleanup) { - if cleanup.stack_depth > 0 { - let size = cleanup.stack_depth * WORD_SIZE as i32; - ctx.block_state.depth.free(cleanup.stack_depth as _); - dynasm!(ctx.asm - ; add rsp, size + // TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them + // as scratch registers + // TODO: Allow use of unused argument registers as scratch registers. + /// Writes the function prologue and stores the arguments as locals + pub fn start_function(&mut self, arguments: u32, locals: u32) -> Function { + let reg_args = &ARGS_IN_GPRS[..(arguments as usize).min(ARGS_IN_GPRS.len())]; + + // We need space to store the register arguments if we need to call a function + // and overwrite these registers so we add `reg_args.len()` + let stack_slots = locals + reg_args.len() as u32; + // Align stack slots to the nearest even number. This is required + // by x86-64 ABI. + let aligned_stack_slots = (stack_slots + 1) & !1; + let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32; + + self.block_state.locals.register_arguments = + reg_args.iter().cloned().map(ValueLocation::Reg).collect(); + self.block_state.locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _); + self.block_state.locals.num_local_stack_slots = stack_slots; + self.block_state.return_register = Some(RAX); + + self.block_state.parent_locals = self.block_state.locals.clone(); + + // self.block_state.depth.reserve(aligned_stack_slots - locals); + let should_generate_epilogue = frame_size > 0; + if should_generate_epilogue { + dynasm!(self.asm + ; push rbp + ; mov rbp, rsp + ; sub rsp, frame_size + ); + } + + Function { + should_generate_epilogue, + } + } + + /// Writes the function epilogue, restoring the stack pointer and returning to the + /// caller. + pub fn epilogue(&mut self, func: Function) { + // We don't need to clean up the stack - RSP is restored and + // the calling function has its own register stack and will + // stomp on the registers from our stack if necessary. + if func.should_generate_epilogue { + dynasm!(self.asm + ; mov rsp, rbp + ; pop rbp + ); + } + + dynasm!(self.asm + ; ret ); } - for reg in cleanup.restore_registers.drain(..).rev() { - dynasm!(ctx.asm - ; pop Rq(reg) + pub fn trap(&mut self) { + dynasm!(self.asm + ; ud2 ); } } -fn push_function_return(ctx: &mut Context, arity: u32) { - if arity == 0 { - return; - } - debug_assert_eq!(arity, 1); - ctx.block_state.regs.mark_used(RAX); - ctx.block_state.stack.push(StackValue::Temp(RAX)); -} - -/// Call a function with the given index -pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity: u32) { - debug_assert!( - return_arity == 0 || return_arity == 1, - "We don't support multiple return yet" - ); - - let cleanup = pass_outgoing_args(ctx, arg_arity, return_arity); - - let label = &ctx.func_starts[index as usize].1; - dynasm!(ctx.asm - ; call =>*label - ); - - post_call_cleanup(ctx, cleanup); - push_function_return(ctx, return_arity); -} - -#[must_use] -pub struct Function { - should_generate_epilogue: bool, -} - -// TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them -// as scratch registers -// TODO: Allow use of unused argument registers as scratch registers. -/// Writes the function prologue and stores the arguments as locals -pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) -> Function { - let reg_args = &ARGS_IN_GPRS[..(arguments as usize).min(ARGS_IN_GPRS.len())]; - - // We need space to store the register arguments if we need to call a function - // and overwrite these registers so we add `reg_args.len()` - let stack_slots = locals + reg_args.len() as u32; - // Align stack slots to the nearest even number. This is required - // by x86-64 ABI. - let aligned_stack_slots = (stack_slots + 1) & !1; - let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32; - - ctx.block_state.locals.register_arguments = - reg_args.iter().cloned().map(ValueLocation::Reg).collect(); - ctx.block_state.locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _); - ctx.block_state.locals.num_local_stack_slots = stack_slots; - ctx.block_state.return_register = Some(RAX); - - ctx.block_state.parent_locals = ctx.block_state.locals.clone(); - - // ctx.block_state.depth.reserve(aligned_stack_slots - locals); - let should_generate_epilogue = frame_size > 0; - if should_generate_epilogue { - dynasm!(ctx.asm - ; push rbp - ; mov rbp, rsp - ; sub rsp, frame_size - ); - } - - Function { - should_generate_epilogue, - } -} - -/// Writes the function epilogue, restoring the stack pointer and returning to the -/// caller. -pub fn epilogue(ctx: &mut Context, func: Function) { - // We don't need to clean up the stack - RSP is restored and - // the calling function has its own register stack and will - // stomp on the registers from our stack if necessary. - if func.should_generate_epilogue { - dynasm!(ctx.asm - ; mov rsp, rbp - ; pop rbp - ); - } - - dynasm!(ctx.asm - ; ret - ); -} - -pub fn trap(ctx: &mut Context) { - dynasm!(ctx.asm - ; ud2 - ); -} - diff --git a/src/error.rs b/src/error.rs index 5f2ac6492e..c7b1fe46e8 100644 --- a/src/error.rs +++ b/src/error.rs @@ -20,6 +20,12 @@ impl From for Error { } } +impl From for Error { + fn from(other: !) -> Self { + other + } +} + impl From for Error { fn from(e: capstone::Error) -> Self { Error::Disassembler(e.to_string()) diff --git a/src/function_body.rs b/src/function_body.rs index c40db65fe3..b5022506f3 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -90,12 +90,31 @@ impl ControlFrame { } } -pub fn translate( - session: &mut CodeGenSession, +pub fn translate( + session: &mut CodeGenSession, translation_ctx: &TranslationContext, func_idx: u32, body: &FunctionBody, -) -> Result<(), Error> { +) -> Result<(), Error> +where + Error: From, +{ + fn break_from_control_frame_with_id( + ctx: &mut Context, + control_frames: &mut Vec, + idx: usize, + ) { + control_frames + .last_mut() + .expect("Control stack is empty!") + .mark_unreachable(); + + let control_frame = control_frames.get(idx).expect("wrong depth"); + ctx.return_from_block(control_frame.arity(), idx == 0); + + ctx.br(control_frame.kind.branch_target()); + } + let locals = body.get_locals_reader()?; let func_type = translation_ctx.func_type(func_idx); @@ -117,14 +136,14 @@ pub fn translate( let ctx = &mut session.new_context(func_idx); let operators = body.get_operators_reader()?; - let func = start_function(ctx, arg_count, num_locals); + let func = ctx.start_function(arg_count, num_locals); let mut control_frames = Vec::new(); // Upon entering the function implicit frame for function body is pushed. It has the same // result type as the function itself. Branching to it is equivalent to returning from the function. - let epilogue_label = create_label(ctx); - let function_block_state = start_block(ctx); + let epilogue_label = ctx.create_label(); + let function_block_state = ctx.start_block(); control_frames.push(ControlFrame::new( ControlFrameKind::Block { end_label: epilogue_label, @@ -135,6 +154,8 @@ pub fn translate( // TODO: We want to make this a state machine (maybe requires 1-element lookahead? Not sure) so that we // can coelesce multiple `end`s and optimise break-at-end-of-block into noop. + // TODO: Does coelescing multiple `end`s matter since at worst this really only elides a single move at + // the end of a function, and this is probably a no-op anyway due to register renaming. for op in operators { let op = op?; @@ -157,11 +178,11 @@ pub fn translate( .last_mut() .expect("control stack is never empty") .mark_unreachable(); - trap(ctx); + ctx.trap(); } Operator::Block { ty } => { - let label = create_label(ctx); - let state = start_block(ctx); + let label = ctx.create_label(); + let state = ctx.start_block(); control_frames.push(ControlFrame::new( ControlFrameKind::Block { end_label: label }, state, @@ -169,49 +190,32 @@ pub fn translate( )); } Operator::Return => { - control_frames - .last_mut() - .expect("control stack is never empty") - .mark_unreachable(); - - let control_frame = control_frames.get(0).expect("control stack is never empty"); - - return_from_block(ctx, control_frame.arity(), true); - - br(ctx, control_frame.kind.branch_target()); + break_from_control_frame_with_id(ctx, &mut control_frames, 0); } Operator::Br { relative_depth } => { - control_frames - .last_mut() - .expect("control stack is never empty") - .mark_unreachable(); - let idx = control_frames.len() - 1 - relative_depth as usize; - let control_frame = control_frames.get(idx).expect("wrong depth"); - return_from_block(ctx, control_frame.arity(), idx == 0); - - br(ctx, control_frame.kind.branch_target()); + break_from_control_frame_with_id(ctx, &mut control_frames, idx); } Operator::BrIf { relative_depth } => { let idx = control_frames.len() - 1 - relative_depth as usize; let control_frame = control_frames.get(idx).expect("wrong depth"); - let if_not = create_label(ctx); + let if_not = ctx.create_label(); - jump_if_false(ctx, if_not); + ctx.jump_if_false(if_not); - return_from_block(ctx, control_frame.arity(), idx == 0); - br(ctx, control_frame.kind.branch_target()); + ctx.return_from_block(control_frame.arity(), idx == 0); + ctx.br(control_frame.kind.branch_target()); - define_label(ctx, if_not); + ctx.define_label(if_not); } Operator::If { ty } => { - let end_label = create_label(ctx); - let if_not = create_label(ctx); + let end_label = ctx.create_label(); + let if_not = ctx.create_label(); - jump_if_false(ctx, if_not); - let state = start_block(ctx); + ctx.jump_if_false(if_not); + let state = ctx.start_block(); control_frames.push(ControlFrame::new( ControlFrameKind::IfTrue { end_label, if_not }, @@ -220,10 +224,10 @@ pub fn translate( )); } Operator::Loop { ty } => { - let header = create_label(ctx); + let header = ctx.create_label(); - define_label(ctx, header); - let state = start_block(ctx); + ctx.define_label(header); + let state = ctx.start_block(); control_frames.push(ControlFrame::new( ControlFrameKind::Loop { header }, @@ -239,16 +243,16 @@ pub fn translate( block_state, .. }) => { - return_from_block(ctx, arity(ty), false); - reset_block(ctx, block_state.clone()); + ctx.return_from_block(arity(ty), false); + ctx.reset_block(block_state.clone()); // Finalize `then` block by jumping to the `end_label`. - br(ctx, end_label); + ctx.br(end_label); // Define `if_not` label here, so if the corresponding `if` block receives // 0 it will branch here. // After that reset stack depth to the value before entering `if` block. - define_label(ctx, if_not); + ctx.define_label(if_not); // Carry over the `end_label`, so it will be resolved when the corresponding `end` // is encountered. @@ -278,66 +282,71 @@ pub fn translate( // Don't bother generating this code if we're in unreachable code if !control_frame.unreachable { - return_from_block(ctx, arity, control_frames.is_empty()); + ctx.return_from_block(arity, control_frames.is_empty()); } + let block_end = control_frame.kind.block_end(); // TODO: What is the correct order of this and the `define_label`? It's clear for `block`s // but I'm not certain for `if..then..else..end`. - end_block(ctx, control_frame.block_state); - - if let Some(block_end) = control_frame.kind.block_end() { - define_label(ctx, block_end); - } + ctx.end_block(control_frame.block_state, |ctx| { + if let Some(block_end) = block_end { + ctx.define_label(block_end); + } + }); if let ControlFrameKind::IfTrue { if_not, .. } = control_frame.kind { // this is `if .. end` construction. Define the `if_not` label here. - define_label(ctx, if_not); + ctx.define_label(if_not); } } - Operator::I32Eq => i32_eq(ctx), - Operator::I32Ne => i32_neq(ctx), - Operator::I32LtS => i32_lt_s(ctx), - Operator::I32LeS => i32_le_s(ctx), - Operator::I32GtS => i32_gt_s(ctx), - Operator::I32GeS => i32_ge_s(ctx), - Operator::I32LtU => i32_lt_u(ctx), - Operator::I32LeU => i32_le_u(ctx), - Operator::I32GtU => i32_gt_u(ctx), - Operator::I32GeU => i32_ge_u(ctx), - Operator::I32Add => i32_add(ctx), - Operator::I32Sub => i32_sub(ctx), - Operator::I32And => i32_and(ctx), - Operator::I32Or => i32_or(ctx), - Operator::I32Xor => i32_xor(ctx), - Operator::I32Mul => i32_mul(ctx), - Operator::I64Eq => i64_eq(ctx), - Operator::I64Ne => i64_neq(ctx), - Operator::I64LtS => i64_lt_s(ctx), - Operator::I64LeS => i64_le_s(ctx), - Operator::I64GtS => i64_gt_s(ctx), - Operator::I64GeS => i64_ge_s(ctx), - Operator::I64LtU => i64_lt_u(ctx), - Operator::I64LeU => i64_le_u(ctx), - Operator::I64GtU => i64_gt_u(ctx), - Operator::I64GeU => i64_ge_u(ctx), - Operator::I64Add => i64_add(ctx), - Operator::I64Sub => i64_sub(ctx), - Operator::I64And => i64_and(ctx), - Operator::I64Or => i64_or(ctx), - Operator::I64Xor => i64_xor(ctx), - Operator::I64Mul => i64_mul(ctx), - Operator::Drop => drop(ctx), - Operator::SetLocal { local_index } => set_local_i32(ctx, local_index), - Operator::GetLocal { local_index } => get_local_i32(ctx, local_index), - Operator::I32Const { value } => literal_i32(ctx, value), - Operator::I64Const { value } => literal_i64(ctx, value), + Operator::I32Eq => ctx.i32_eq(), + Operator::I32Ne => ctx.i32_neq(), + Operator::I32LtS => ctx.i32_lt_s(), + Operator::I32LeS => ctx.i32_le_s(), + Operator::I32GtS => ctx.i32_gt_s(), + Operator::I32GeS => ctx.i32_ge_s(), + Operator::I32LtU => ctx.i32_lt_u(), + Operator::I32LeU => ctx.i32_le_u(), + Operator::I32GtU => ctx.i32_gt_u(), + Operator::I32GeU => ctx.i32_ge_u(), + Operator::I32Add => ctx.i32_add(), + Operator::I32Sub => ctx.i32_sub(), + Operator::I32And => ctx.i32_and(), + Operator::I32Or => ctx.i32_or(), + Operator::I32Xor => ctx.i32_xor(), + Operator::I32Mul => ctx.i32_mul(), + Operator::I64Eq => ctx.i64_eq(), + Operator::I64Ne => ctx.i64_neq(), + Operator::I64LtS => ctx.i64_lt_s(), + Operator::I64LeS => ctx.i64_le_s(), + Operator::I64GtS => ctx.i64_gt_s(), + Operator::I64GeS => ctx.i64_ge_s(), + Operator::I64LtU => ctx.i64_lt_u(), + Operator::I64LeU => ctx.i64_le_u(), + Operator::I64GtU => ctx.i64_gt_u(), + Operator::I64GeU => ctx.i64_ge_u(), + Operator::I64Add => ctx.i64_add(), + Operator::I64Sub => ctx.i64_sub(), + Operator::I64And => ctx.i64_and(), + Operator::I64Or => ctx.i64_or(), + Operator::I64Xor => ctx.i64_xor(), + Operator::I64Mul => ctx.i64_mul(), + Operator::Drop => ctx.drop(), + Operator::SetLocal { local_index } => ctx.set_local(local_index), + Operator::GetLocal { local_index } => ctx.get_local(local_index), + Operator::TeeLocal { local_index } => ctx.tee_local(local_index), + Operator::I32Const { value } => ctx.i32_literal(value), + Operator::I64Const { value } => ctx.i64_literal(value), + Operator::I32Load { memarg } => ctx.i32_load(memarg.offset)?, + Operator::I64Load { memarg } => ctx.i64_load(memarg.offset)?, + Operator::I32Store { memarg } => ctx.i32_store(memarg.offset)?, + Operator::I64Store { memarg } => ctx.i64_store(memarg.offset)?, Operator::Call { function_index } => { let callee_ty = translation_ctx.func_type(function_index); // TODO: this implementation assumes that this function is locally defined. - call_direct( - ctx, + ctx.call_direct( function_index, callee_ty.params.len() as u32, callee_ty.returns.len() as u32, @@ -349,7 +358,7 @@ pub fn translate( } } } - epilogue(ctx, func); + ctx.epilogue(func); Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index b725112611..c96301861a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -#![feature(plugin, test, const_slice_len)] +#![feature(plugin, test, const_slice_len, never_type)] #![plugin(dynasm)] extern crate test; diff --git a/src/module.rs b/src/module.rs index c3b7f4ddaf..888893bfef 100644 --- a/src/module.rs +++ b/src/module.rs @@ -38,6 +38,7 @@ impl_function_args!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S); #[derive(Default)] pub struct TranslatedModule { translated_code_section: Option, + memory: Option>, } impl TranslatedModule { @@ -136,7 +137,18 @@ pub fn translate(data: &[u8]) -> Result { if let SectionCode::Memory = section.code { let memories = section.get_memory_section_reader()?; - translate_sections::memory(memories)?; + let mem = translate_sections::memory(memories)?; + + assert!( + mem.len() <= 1, + "Multiple memory sections not yet unimplemented" + ); + + if !mem.is_empty() { + let mem = mem[0]; + assert_eq!(Some(mem.limits.initial), mem.limits.maximum); + output.memory = Some(vec![0; mem.limits.initial as usize * 65_536]); + } reader.skip_custom_sections()?; if reader.eof() { @@ -191,7 +203,11 @@ pub fn translate(data: &[u8]) -> Result { if let SectionCode::Code = section.code { let code = section.get_code_section_reader()?; - output.translated_code_section = Some(translate_sections::code(code, &ctx)?); + output.translated_code_section = Some(translate_sections::code( + code, + &ctx, + output.memory.as_mut().map(|m| &mut m[..]), + )?); reader.skip_custom_sections()?; if reader.eof() { diff --git a/src/tests.rs b/src/tests.rs index bfd525c7ee..03a460e8f6 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -25,6 +25,7 @@ mod op32 { ($op:ident, $func:expr) => { mod $op { use super::{translate_wat, TranslatedModule}; + use std::sync::Once; const OP: &str = stringify!($op); @@ -41,17 +42,18 @@ mod op32 { } fn lit_lit(a: i32, b: i32) -> bool { + let translated = translate_wat(&format!(" + (module (func (result i32) + (i32.{op} (i32.const {left}) (i32.const {right})))) + ", op = OP, left = a, right = b)); + static ONCE: Once = Once::new(); + ONCE.call_once(|| translated.disassemble()); unsafe { - translate_wat(&format!(" - (module (func (result i32) - (i32.{op} (i32.const {left}) (i32.const {right})))) - ", op = OP, left = a, right = b)).execute_func::<(), i32>(0, ()) == $func(a, b) + translated.execute_func::<(), i32>(0, ()) == $func(a, b) } } fn lit_reg(a: i32, b: i32) -> bool { - use std::sync::Once; - let translated = translate_wat(&format!(" (module (func (param i32) (result i32) (i32.{op} (i32.const {left}) (get_local 0)))) @@ -64,11 +66,14 @@ mod op32 { } fn reg_lit(a: i32, b: i32) -> bool { + let translated = translate_wat(&format!(" + (module (func (param i32) (result i32) + (i32.{op} (get_local 0) (i32.const {right})))) + ", op = OP, right = b)); + static ONCE: Once = Once::new(); + ONCE.call_once(|| translated.disassemble()); unsafe { - translate_wat(&format!(" - (module (func (param i32) (result i32) - (i32.{op} (get_local 0) (i32.const {right})))) - ", op = OP, right = b)).execute_func::<(i32,), i32>(0, (a,)) == $func(a, b) + translated.execute_func::<(i32,), i32>(0, (a,)) == $func(a, b) } } } @@ -452,7 +457,11 @@ fn br_block() { ) ) "#; - assert_eq!(execute_wat(code, 5, 7), 12); + + let translated = translate_wat(code); + translated.disassemble(); + + assert_eq!(unsafe { translated.execute_func::<(i32, i32), i32>(0, (5, 7)) }, 12); } // Tests discarding values on the value stack, while @@ -724,6 +733,42 @@ fn fib() { } } +#[test] +fn storage() { + const CODE: &str = r#" +(module + (memory 1 1) + + (func (result i32) + (local i32 i32 i32) + (set_local 0 (i32.const 10)) + (block + (loop + (if + (i32.eq (get_local 0) (i32.const 0)) + (then (br 2)) + ) + (set_local 2 (i32.mul (get_local 0) (i32.const 4))) + (i32.store (get_local 2) (get_local 0)) + (set_local 1 (i32.load (get_local 2))) + (if + (i32.ne (get_local 0) (get_local 1)) + (then (return (i32.const 0))) + ) + (set_local 0 (i32.sub (get_local 0) (i32.const 1))) + (br 0) + ) + ) + (i32.const 1) + ) +)"#; + + let translated = translate_wat(CODE); + translated.disassemble(); + + assert_eq!(unsafe { translated.execute_func::<(), i32>(0, ()) }, 1); +} + #[bench] fn bench_fibonacci_compile(b: &mut test::Bencher) { let wasm = wabt::wat2wasm(FIBONACCI).unwrap(); diff --git a/src/translate_sections.rs b/src/translate_sections.rs index 94d248d517..3ff236fbd4 100644 --- a/src/translate_sections.rs +++ b/src/translate_sections.rs @@ -45,11 +45,11 @@ pub fn table(tables: TableSectionReader) -> Result<(), Error> { } /// Parses the Memory section of the wasm module. -pub fn memory(memories: MemorySectionReader) -> Result<(), Error> { - for entry in memories { - entry?; // TODO - } - Ok(()) +pub fn memory(memories: MemorySectionReader) -> Result, Error> { + memories + .into_iter() + .map(|r| r.map_err(Into::into)) + .collect() } /// Parses the Global section of the wasm module. @@ -86,13 +86,22 @@ pub fn element(elements: ElementSectionReader) -> Result<(), Error> { pub fn code( code: CodeSectionReader, translation_ctx: &TranslationContext, + memory: Option<&mut [u8]>, ) -> Result { let func_count = code.get_count(); - let mut session = CodeGenSession::new(func_count); - for (idx, body) in code.into_iter().enumerate() { - function_body::translate(&mut session, translation_ctx, idx as u32, &body?)?; + if let Some(memory) = memory { + let mut session = CodeGenSession::<::backend::HasMemory>::with_memory(func_count, memory.as_mut_ptr()); + for (idx, body) in code.into_iter().enumerate() { + function_body::translate(&mut session, translation_ctx, idx as u32, &body?)?; + } + Ok(session.into_translated_code_section()?) + } else { + let mut session = CodeGenSession::<::backend::NoMemory>::new(func_count); + for (idx, body) in code.into_iter().enumerate() { + function_body::translate(&mut session, translation_ctx, idx as u32, &body?)?; + } + Ok(session.into_translated_code_section()?) } - Ok(session.into_translated_code_section()?) } /// Parses the Data section of the wasm module.