diff --git a/winch/codegen/src/abi/local.rs b/winch/codegen/src/abi/local.rs index 6f4f727481..a951dac446 100644 --- a/winch/codegen/src/abi/local.rs +++ b/winch/codegen/src/abi/local.rs @@ -1,7 +1,7 @@ use wasmparser::ValType; /// Base register used to address the local slot. /// -/// Slots for stack arguments are addressed from the frame pointer +/// Slots for stack arguments are addressed from the frame pointer. /// Slots for function-defined locals and for registers are addressed /// from the stack pointer. #[derive(Eq, PartialEq)] diff --git a/winch/codegen/src/abi/mod.rs b/winch/codegen/src/abi/mod.rs index 0877703916..83a39cf6db 100644 --- a/winch/codegen/src/abi/mod.rs +++ b/winch/codegen/src/abi/mod.rs @@ -1,3 +1,47 @@ +//! This module provides all the necessary building blocks for +//! implementing ISA specific ABIs. +//! +//! # Default ABI +//! +//! Winch uses a default internal ABI, for all internal functions. +//! This allows us to push the complexity of system ABI compliance to +//! the trampolines (not yet implemented). The default ABI treats all +//! allocatable registers as caller saved, which means that (i) all +//! register values in the Wasm value stack (which are normally +//! referred to as "live"), must be saved onto the machine stack (ii) +//! function prologues and epilogues don't store/restore other +//! registers more than the non-allocatable ones (e.g. rsp/rbp in +//! x86_64). +//! +//! The calling convention in the default ABI, uses registers to a +//! certain fixed count for arguments and return values, and then the +//! stack is used for all additional arguments. +//! +//! Generally the stack layout looks like: +//! +-------------------------------+ +//! | | +//! | | +//! | Stack Args | +//! | | +//! | | +//! +-------------------------------+----> SP @ function entry +//! | Ret addr | +//! +-------------------------------+ +//! | SP | +//! +-------------------------------+----> SP @ Function prologue +//! | | +//! | | +//! | | +//! | Stack slots | +//! | + dynamic space | +//! | | +//! | | +//! | | +//! +-------------------------------+----> SP @ callsite (after) +//! | alignment | +//! | + arguments | +//! | | ----> Space allocated for calls +//! | | use crate::isa::reg::Reg; use smallvec::SmallVec; use std::ops::{Add, BitAnd, Not, Sub}; @@ -13,6 +57,9 @@ pub(crate) trait ABI { /// The required stack alignment. fn stack_align(&self) -> u8; + /// The required stack alignment for calls. + fn call_stack_align(&self) -> u8; + /// The offset to the argument base, relative to the frame pointer. fn arg_base_offset(&self) -> u8; @@ -117,11 +164,27 @@ impl ABIResult { } } +pub(crate) type ABIParams = SmallVec<[ABIArg; 6]>; + /// An ABI-specific representation of a function signature. pub(crate) struct ABISig { /// Function parameters. - pub params: SmallVec<[ABIArg; 6]>, + pub params: ABIParams, + /// Function result. pub result: ABIResult, + /// Stack space needed for stack arguments. + pub stack_bytes: u32, +} + +impl ABISig { + /// Create a new ABI signature. + pub fn new(params: ABIParams, result: ABIResult, stack_bytes: u32) -> Self { + Self { + params, + result, + stack_bytes, + } + } } /// Returns the size in bytes of a given WebAssembly type. diff --git a/winch/codegen/src/codegen/call.rs b/winch/codegen/src/codegen/call.rs new file mode 100644 index 0000000000..c2907ab24b --- /dev/null +++ b/winch/codegen/src/codegen/call.rs @@ -0,0 +1,222 @@ +//! Function call emission. For more details around the ABI and +//! calling convention, see [ABI]. +use super::CodeGenContext; +use crate::{ + abi::{align_to, ABIArg, ABIResult, ABISig, ABI}, + masm::{MacroAssembler, OperandSize}, + reg::Reg, + stack::Val, +}; + +/// All the information needed to emit a function call. +pub(crate) struct FnCall<'a> { + /// The total stack space in bytes used by the function call. + /// This amount includes the sum of: + /// + /// 1. The amount of stack space that needs to be explicitly + /// allocated at the callsite for callee arguments that + /// go in the stack, plus any alignment. + /// 2. The amount of stack space created by saving any live + /// registers at the callsite. + /// 3. The amount of space used by any memory entries in the value + /// stack present at the callsite, that will be used as + /// arguments for the function call. Any memory values in the + /// value stack that are needed as part of the function + /// arguments, will be consumed by the function call (either by + /// assigning those values to a register or by storing those + /// values to a memory location if the callee argument is on + /// the stack), so we track that stack space to reclaim it once + /// the function call has ended. This could also be done in + /// `assign_args` everytime a memory entry needs to be assigned + /// to a particular location, but doing so, will incur in more + /// instructions (e.g. a pop per argument that needs to be + /// assigned); it's more efficient to track the space needed by + /// those memory values and reclaim it at once. + /// + /// The machine stack state that this amount is capturing, is the following: + /// ┌──────────────────────────────────────────────────┐ + /// │ │ + /// │ │ + /// │ Stack space created by any previous spills │ + /// │ from the value stack; and which memory values │ + /// │ are used as function arguments. │ + /// │ │ + /// ├──────────────────────────────────────────────────┤ ---> The Wasm value stack at this point in time would look like: + /// │ │ [ Reg | Reg | Mem(offset) | Mem(offset) ] + /// │ │ + /// │ Stack space created by saving │ + /// │ any live registers at the callsite. │ + /// │ │ + /// │ │ + /// ├─────────────────────────────────────────────────┬┤ ---> The Wasm value stack at this point in time would look like: + /// │ │ [ Mem(offset) | Mem(offset) | Mem(offset) | Mem(offset) ] + /// │ │ Assuming that the callee takes 4 arguments, we calculate + /// │ │ 2 spilled registers + 2 memory values; all of which will be used + /// │ Stack space allocated for │ as arguments to the call via `assign_args`, thus the memory they represent is + /// │ the callee function arguments in the stack; │ is considered to be consumed by the call. + /// │ represented by `arg_stack_space` │ + /// │ │ + /// │ │ + /// │ │ + /// └──────────────────────────────────────────────────┘ ------> Stack pointer when emitting the call + /// + total_stack_space: u32, + /// The total stack space needed for the callee arguments on the + /// stack, including any adjustments to the function's frame and + /// aligned to to the required ABI alignment. + arg_stack_space: u32, + /// The ABI-specific signature of the callee. + abi_sig: &'a ABISig, + /// The stack pointer offset prior to preparing and emitting the + /// call. This is tracked to assert the position of the stack + /// pointer after the call has finished. + sp_offset_at_callsite: u32, +} + +impl<'a> FnCall<'a> { + /// Allocate and setup a new function call. + /// + /// The setup process, will first save all the live registers in + /// the value stack, tracking down those spilled for the function + /// arguments(see comment below for more details) it will also + /// track all the memory entries consumed by the function + /// call. Then, it will calculate any adjustments needed to ensure + /// the alignment of the caller's frame. It's important to note + /// that the order of operations in the setup is important, as we + /// want to calculate any adjustments to the caller's frame, after + /// having saved any live registers, so that we can account for + /// any pushes generated by register spilling. + pub fn new( + abi: &A, + callee_sig: &'a ABISig, + context: &mut CodeGenContext, + masm: &mut M, + ) -> Self { + let stack = &context.stack; + let arg_stack_space = callee_sig.stack_bytes; + let callee_params = &callee_sig.params; + let sp_offset_at_callsite = masm.sp_offset(); + + let (spilled_regs, memory_values) = match callee_params.len() { + 0 => { + let _ = context.spill_regs_and_count_memory_in(masm, ..); + (0, 0) + } + _ => { + // Here we perform a "spill" of the register entries + // in the Wasm value stack, we also count any memory + // values that will be used used as part of the callee + // arguments. Saving the live registers is done by + // emitting push operations for every `Reg` entry in + // the Wasm value stack. We do this to be compliant + // with Winch's internal ABI, in which all registers + // are treated as caller-saved. For more details, see + // [ABI]. + // + // The next few lines, partition the value stack into + // two sections: + // +------------------+--+--- (Stack top) + // | | | + // | | | 1. The top `n` elements, which are used for + // | | | function arguments; for which we save any + // | | | live registers, keeping track of the amount of registers + // +------------------+ | saved plus the amount of memory values consumed by the function call; + // | | | with this information we can later reclaim the space used by the function call. + // | | | + // +------------------+--+--- + // | | | 2. The rest of the items in the stack, for which + // | | | we only save any live registers. + // | | | + // +------------------+ | + assert!(stack.len() >= callee_params.len()); + let partition = stack.len() - callee_params.len(); + let _ = context.spill_regs_and_count_memory_in(masm, 0..partition); + context.spill_regs_and_count_memory_in(masm, partition..) + } + }; + + let delta = calculate_frame_adjustment( + masm.sp_offset(), + abi.arg_base_offset() as u32, + abi.call_stack_align() as u32, + ); + + let arg_stack_space = align_to(arg_stack_space + delta, abi.call_stack_align() as u32); + Self { + abi_sig: &callee_sig, + arg_stack_space, + total_stack_space: (spilled_regs * ::word_bytes()) + + (memory_values * ::word_bytes()) + + arg_stack_space, + sp_offset_at_callsite, + } + } + + /// Emit the function call. + pub fn emit( + &self, + masm: &mut M, + context: &mut CodeGenContext, + callee: u32, + ) { + masm.reserve_stack(self.arg_stack_space); + self.assign_args(context, masm, ::scratch_reg()); + masm.call(callee); + masm.free_stack(self.total_stack_space); + context.drop_last(self.abi_sig.params.len()); + // The stack pointer at the end of the function call + // cannot be less than what it was when starting the + // function call. + assert!(self.sp_offset_at_callsite >= masm.sp_offset()); + self.handle_result(context, masm); + } + + fn assign_args( + &self, + context: &mut CodeGenContext, + masm: &mut M, + scratch: Reg, + ) { + let arg_count = self.abi_sig.params.len(); + let stack = &context.stack; + let mut stack_values = stack.peekn(arg_count); + for arg in &self.abi_sig.params { + let val = stack_values + .next() + .unwrap_or_else(|| panic!("expected stack value for function argument")); + match &arg { + &ABIArg::Reg { ty, reg } => { + context.move_val_to_reg(&val, *reg, masm, (*ty).into()); + } + &ABIArg::Stack { ty, offset } => { + let addr = masm.address_at_sp(*offset); + let size: OperandSize = (*ty).into(); + context.move_val_to_reg(val, scratch, masm, size); + masm.store(scratch.into(), addr, size); + } + } + } + } + + fn handle_result(&self, context: &mut CodeGenContext, masm: &mut M) { + let result = &self.abi_sig.result; + if result.is_void() { + return; + } + + match result { + &ABIResult::Reg { ty: _, reg } => { + assert!(context.regalloc.gpr_available(reg)); + let result_reg = Val::reg(context.gpr(reg, masm)); + context.stack.push(result_reg); + } + } + } +} + +/// Calculates the delta needed to adjust a function's frame plus some +/// addend to a given alignment. +fn calculate_frame_adjustment(frame_size: u32, addend: u32, alignment: u32) -> u32 { + let total = frame_size + addend; + (alignment - (total % alignment)) % alignment +} diff --git a/winch/codegen/src/codegen/context.rs b/winch/codegen/src/codegen/context.rs index 94b3da496f..12d3f8de41 100644 --- a/winch/codegen/src/codegen/context.rs +++ b/winch/codegen/src/codegen/context.rs @@ -5,6 +5,7 @@ use crate::{ regalloc::RegAlloc, stack::{Stack, Val}, }; +use std::ops::RangeBounds; /// The code generation context. /// The code generation context is made up of three @@ -60,58 +61,66 @@ impl<'a> CodeGenContext<'a> { self.regalloc.free_gpr(reg); } - /// Loads the stack top value into a register, if it isn't already one; - /// spilling if there are no registers available. - pub fn pop_to_reg(&mut self, masm: &mut M, size: OperandSize) -> Reg { - if let Some(reg) = self.stack.pop_reg() { - return reg; - } - - let dst = self.any_gpr(masm); - let val = self.stack.pop().expect("a value at stack top"); - Self::move_val_to_reg(val, dst, masm, self.frame, size); - dst - } - - /// Checks if the stack top contains the given register. The register - /// gets allocated otherwise, potentially causing a spill. - /// Once the requested register is allocated, the value at the top of the stack - /// gets loaded into the register. - pub fn pop_to_named_reg( + /// Loads the stack top value into the next available register, if + /// it isn't already one; spilling if there are no registers + /// available. Optionally the caller may specify a specific + /// destination register. + pub fn pop_to_reg( &mut self, masm: &mut M, - named: Reg, + named: Option, size: OperandSize, ) -> Reg { - if let Some(reg) = self.stack.pop_named_reg(named) { - return reg; + let (in_stack, dst) = if let Some(dst) = named { + self.stack + .pop_named_reg(dst) + .map(|reg| (true, reg)) + .unwrap_or_else(|| (false, self.gpr(dst, masm))) + } else { + self.stack + .pop_reg() + .map(|reg| (true, reg)) + .unwrap_or_else(|| (false, self.any_gpr(masm))) + }; + + if in_stack { + return dst; } - let dst = self.gpr(named, masm); let val = self.stack.pop().expect("a value at stack top"); - Self::move_val_to_reg(val, dst, masm, self.frame, size); + if val.is_mem() { + masm.pop(dst); + } else { + self.move_val_to_reg(&val, dst, masm, size); + } + dst } - fn move_val_to_reg( - src: Val, + /// Move a stack value to the given register. + pub fn move_val_to_reg( + &self, + src: &Val, dst: Reg, masm: &mut M, - frame: &Frame, size: OperandSize, ) { match src { - Val::Reg(src) => masm.mov(RegImm::reg(src), RegImm::reg(dst), size), - Val::I32(imm) => masm.mov(RegImm::imm(imm.into()), RegImm::reg(dst), size), - Val::I64(imm) => masm.mov(RegImm::imm(imm), RegImm::reg(dst), size), + Val::Reg(src) => masm.mov(RegImm::reg(*src), RegImm::reg(dst), size), + Val::I32(imm) => masm.mov(RegImm::imm((*imm).into()), RegImm::reg(dst), size), + Val::I64(imm) => masm.mov(RegImm::imm(*imm), RegImm::reg(dst), size), Val::Local(index) => { - let slot = frame - .get_local(index) - .expect(&format!("valid locat at index = {}", index)); + let slot = self + .frame + .get_local(*index) + .unwrap_or_else(|| panic!("valid local at index = {}", index)); let addr = masm.local_address(&slot); masm.load(addr, dst, slot.ty.into()); } - v => panic!("Unsupported value {:?}", v), + Val::Memory(offset) => { + let addr = masm.address_from_sp(*offset); + masm.load(addr, dst, size); + } }; } @@ -128,7 +137,7 @@ impl<'a> CodeGenContext<'a> { .stack .pop_i32_const() .expect("i32 const value at stack top"); - let reg = self.pop_to_reg(masm, OperandSize::S32); + let reg = self.pop_to_reg(masm, None, OperandSize::S32); emit( masm, RegImm::reg(reg), @@ -137,8 +146,8 @@ impl<'a> CodeGenContext<'a> { ); self.stack.push(Val::reg(reg)); } else { - let src = self.pop_to_reg(masm, OperandSize::S32); - let dst = self.pop_to_reg(masm, OperandSize::S32); + let src = self.pop_to_reg(masm, None, OperandSize::S32); + let dst = self.pop_to_reg(masm, None, OperandSize::S32); emit(masm, dst.into(), src.into(), OperandSize::S32); self.regalloc.free_gpr(src); self.stack.push(Val::reg(dst)); @@ -157,18 +166,64 @@ impl<'a> CodeGenContext<'a> { .stack .pop_i64_const() .expect("i64 const value at stack top"); - let reg = self.pop_to_reg(masm, OperandSize::S64); + let reg = self.pop_to_reg(masm, None, OperandSize::S64); emit(masm, RegImm::reg(reg), RegImm::imm(val), OperandSize::S64); self.stack.push(Val::reg(reg)); } else { - let src = self.pop_to_reg(masm, OperandSize::S64); - let dst = self.pop_to_reg(masm, OperandSize::S64); + let src = self.pop_to_reg(masm, None, OperandSize::S64); + let dst = self.pop_to_reg(masm, None, OperandSize::S64); emit(masm, dst.into(), src.into(), OperandSize::S64); self.regalloc.free_gpr(src); self.stack.push(Val::reg(dst)); } } + /// Saves any live registers in the value stack in a particular + /// range defined by the caller. This is a specialization of the + /// spill function; made available for cases in which spilling + /// locals is not required, like for example for function calls in + /// which locals are not reachable by the callee. It also tracks + /// down the number of memory values in the given range. + /// + /// Returns the number of spilled registers and the number of + /// memory values in the given range of the value stack. + pub fn spill_regs_and_count_memory_in(&mut self, masm: &mut M, range: R) -> (u32, u32) + where + R: RangeBounds, + M: MacroAssembler, + { + let mut spilled: u32 = 0; + let mut memory_values = 0; + for i in self.stack.inner_mut().range_mut(range) { + if i.is_reg() { + let reg = i.get_reg(); + let offset = masm.push(reg); + self.regalloc.free_gpr(reg); + *i = Val::Memory(offset); + spilled += 1; + } else if i.is_mem() { + memory_values += 1; + } + } + + (spilled, memory_values) + } + + /// Drops the last `n` elements of the stack, freeing any + /// registers located in that region. + pub fn drop_last(&mut self, last: usize) { + let len = self.stack.len(); + assert!(last <= len); + let truncate = self.stack.len() - last; + + self.stack.inner_mut().range(truncate..).for_each(|v| { + if v.is_reg() { + self.regalloc.free_gpr(v.get_reg()); + } + }); + self.stack.inner_mut().truncate(truncate); + } + /// Spill locals and registers to memory. // TODO optimize the spill range; // diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index 8079122188..7ffbaf5e9c 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -3,21 +3,21 @@ use crate::{ masm::{MacroAssembler, OperandSize}, }; use anyhow::Result; +use call::FnCall; use wasmparser::{BinaryReader, FuncValidator, ValType, ValidatorResources, VisitOperator}; mod context; pub(crate) use context::*; mod env; pub use env::*; +mod call; /// The code generation abstraction. -pub(crate) struct CodeGen<'a, M> +pub(crate) struct CodeGen<'a, A, M> where M: MacroAssembler, + A: ABI, { - /// The word size in bytes, extracted from the current ABI. - word_size: u32, - /// The ABI-specific representation of the function signature, excluding results. sig: ABISig, @@ -26,18 +26,32 @@ where /// The MacroAssembler. pub masm: &'a mut M, + + /// A reference to the function compilation environment. + pub env: &'a dyn env::FuncEnv, + + /// A reference to the current ABI. + pub abi: &'a A, } -impl<'a, M> CodeGen<'a, M> +impl<'a, A, M> CodeGen<'a, A, M> where M: MacroAssembler, + A: ABI, { - pub fn new(masm: &'a mut M, context: CodeGenContext<'a>, sig: ABISig) -> Self { + pub fn new( + masm: &'a mut M, + abi: &'a A, + context: CodeGenContext<'a>, + env: &'a dyn FuncEnv, + sig: ABISig, + ) -> Self { Self { - word_size: ::word_bytes(), sig, context, masm, + abi, + env, } } @@ -70,7 +84,7 @@ where let defined_locals_range = &self.context.frame.defined_locals_range; self.masm.zero_mem_range( defined_locals_range.as_range(), - self.word_size, + ::word_bytes(), &mut self.context.regalloc, ); @@ -105,8 +119,21 @@ where } } - // Emit the usual function end instruction sequence. - pub fn emit_end(&mut self) -> Result<()> { + /// Emit a direct function call. + pub fn emit_call(&mut self, index: u32) { + let callee = self.env.callee_from_index(index); + if callee.import { + // TODO: Only locally defined functions for now. + unreachable!() + } + + let sig = self.abi.sig(&callee.ty); + let fncall = FnCall::new(self.abi, &sig, &mut self.context, self.masm); + fncall.emit::(self.masm, &mut self.context, index); + } + + /// Emit the usual function end instruction sequence. + fn emit_end(&mut self) -> Result<()> { self.handle_abi_result(); self.masm.epilogue(self.context.frame.locals_size); Ok(()) @@ -149,7 +176,7 @@ where let named_reg = self.sig.result.result_reg(); let reg = self .context - .pop_to_named_reg(self.masm, named_reg, OperandSize::S64); + .pop_to_reg(self.masm, Some(named_reg), OperandSize::S64); self.context.regalloc.free_gpr(reg); } } diff --git a/winch/codegen/src/isa/aarch64/abi.rs b/winch/codegen/src/isa/aarch64/abi.rs index 14afcc2356..f348412e42 100644 --- a/winch/codegen/src/isa/aarch64/abi.rs +++ b/winch/codegen/src/isa/aarch64/abi.rs @@ -47,6 +47,10 @@ impl ABI for Aarch64ABI { 8 } + fn call_stack_align(&self) -> u8 { + 16 + } + fn arg_base_offset(&self) -> u8 { 16 } @@ -74,7 +78,7 @@ impl ABI for Aarch64ABI { let reg = regs::xreg(0); let result = ABIResult::reg(ty, reg); - ABISig { params, result } + ABISig::new(params, result, stack_offset) } fn scratch_reg() -> Reg { diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index e44c6c471c..92b30218f8 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -96,6 +96,10 @@ impl Masm for MacroAssembler { self.increment_sp(bytes); } + fn free_stack(&mut self, _bytes: u32) { + todo!() + } + fn local_address(&mut self, local: &LocalSlot) -> Address { let (reg, offset) = local .addressed_from_sp() @@ -111,6 +115,14 @@ impl Masm for MacroAssembler { Address::offset(reg, offset as i64) } + fn address_from_sp(&self, _offset: u32) -> Self::Address { + todo!() + } + + fn address_at_sp(&self, _offset: u32) -> Self::Address { + todo!() + } + fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) { let src = match src { RegImm::Imm(imm) => { @@ -124,10 +136,18 @@ impl Masm for MacroAssembler { self.asm.str(src, dst, size); } + fn call(&mut self, _callee: u32) { + todo!() + } + fn load(&mut self, src: Address, dst: Reg, size: OperandSize) { self.asm.ldr(src, dst, size); } + fn pop(&mut self, _dst: Reg) { + todo!() + } + fn sp_offset(&mut self) -> u32 { self.sp_offset } diff --git a/winch/codegen/src/isa/aarch64/mod.rs b/winch/codegen/src/isa/aarch64/mod.rs index ff56cdb999..6934d04924 100644 --- a/winch/codegen/src/isa/aarch64/mod.rs +++ b/winch/codegen/src/isa/aarch64/mod.rs @@ -8,6 +8,7 @@ use crate::{ regalloc::RegAlloc, regset::RegSet, stack::Stack, + FuncEnv, }; use anyhow::Result; use cranelift_codegen::settings::{self, Flags}; @@ -84,6 +85,7 @@ impl TargetIsa for Aarch64 { &self, sig: &FuncType, body: &FunctionBody, + env: &dyn FuncEnv, mut validator: FuncValidator, ) -> Result> { let mut body = body.get_binary_reader(); @@ -95,7 +97,7 @@ impl TargetIsa for Aarch64 { // TODO: Add floating point bitmask let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), scratch()); let codegen_context = CodeGenContext::new(regalloc, stack, &frame); - let mut codegen = CodeGen::new::(&mut masm, codegen_context, abi_sig); + let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig); codegen.emit(&mut body, validator)?; Ok(masm.finalize()) diff --git a/winch/codegen/src/isa/mod.rs b/winch/codegen/src/isa/mod.rs index 37fcb76f0d..058c9c70e4 100644 --- a/winch/codegen/src/isa/mod.rs +++ b/winch/codegen/src/isa/mod.rs @@ -10,6 +10,8 @@ use std::{ use target_lexicon::{Architecture, Triple}; use wasmparser::{FuncType, FuncValidator, FunctionBody, ValidatorResources}; +use crate::FuncEnv; + #[cfg(feature = "x64")] pub(crate) mod x64; @@ -92,6 +94,7 @@ pub trait TargetIsa: Send + Sync { &self, sig: &FuncType, body: &FunctionBody, + env: &dyn FuncEnv, validator: FuncValidator, ) -> Result>; diff --git a/winch/codegen/src/isa/x64/abi.rs b/winch/codegen/src/isa/x64/abi.rs index cf7dc2eb57..0bd30e7133 100644 --- a/winch/codegen/src/isa/x64/abi.rs +++ b/winch/codegen/src/isa/x64/abi.rs @@ -39,6 +39,10 @@ impl ABI for X64ABI { 8 } + fn call_stack_align(&self) -> u8 { + 16 + } + fn arg_base_offset(&self) -> u8 { // Two 8-byte slots, one for the return address and another // one for the frame pointer. @@ -75,7 +79,7 @@ impl ABI for X64ABI { let reg = regs::rax(); let result = ABIResult::reg(ty, reg); - ABISig { params, result } + ABISig::new(params, result, stack_offset) } fn scratch_reg() -> Reg { diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index c6a2548c86..8f03489e20 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -5,18 +5,21 @@ use crate::{ masm::{DivKind, OperandSize, RemKind}, }; use cranelift_codegen::{ + entity::EntityRef, ir::TrapCode, + ir::{ExternalName, Opcode, UserExternalNameRef}, isa::x64::{ args::{ self, AluRmiROpcode, Amode, CmpOpcode, DivSignedness, ExtMode, FromWritableReg, Gpr, GprMem, GprMemImm, RegMem, RegMemImm, SyntheticAmode, WritableGpr, CC, }, - settings as x64_settings, EmitInfo, EmitState, Inst, + settings as x64_settings, CallInfo, EmitInfo, EmitState, Inst, }, settings, Final, MachBuffer, MachBufferFinalized, MachInstEmit, Writable, }; use super::{address::Address, regs}; +use smallvec::smallvec; /// A x64 instruction operand. #[derive(Debug, Copy, Clone)] @@ -465,4 +468,18 @@ impl Assembler { dst: dst.into(), }); } + + /// Direct function call to a user defined function. + pub fn call(&mut self, callee: u32) { + let dest = ExternalName::user(UserExternalNameRef::new(callee as usize)); + self.emit(Inst::CallKnown { + dest, + info: Box::new(CallInfo { + uses: smallvec![], + defs: smallvec![], + clobbers: Default::default(), + opcode: Opcode::Call, + }), + }); + } } diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index e4bc0db62d..cc973b24af 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -70,6 +70,14 @@ impl Masm for MacroAssembler { self.increment_sp(bytes); } + fn free_stack(&mut self, bytes: u32) { + if bytes == 0 { + return; + } + self.asm.add_ir(bytes as i32, rsp(), OperandSize::S64); + self.decrement_sp(bytes); + } + fn local_address(&mut self, local: &LocalSlot) -> Address { let (reg, offset) = local .addressed_from_sp() @@ -85,6 +93,14 @@ impl Masm for MacroAssembler { Address::offset(reg, offset) } + fn address_from_sp(&self, offset: u32) -> Self::Address { + Address::offset(regs::rsp(), self.sp_offset - offset) + } + + fn address_at_sp(&self, offset: u32) -> Self::Address { + Address::offset(regs::rsp(), offset) + } + fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) { let src: Operand = src.into(); let dst: Operand = dst.into(); @@ -92,6 +108,16 @@ impl Masm for MacroAssembler { self.asm.mov(src, dst, size); } + fn pop(&mut self, dst: Reg) { + self.asm.pop_r(dst); + // Similar to the comment in `push`, we assume 8 bytes per pop. + self.decrement_sp(8); + } + + fn call(&mut self, callee: u32) { + self.asm.call(callee); + } + fn load(&mut self, src: Address, dst: Reg, size: OperandSize) { let src = src.into(); let dst = dst.into(); @@ -158,12 +184,12 @@ impl Masm for MacroAssembler { let rax = context.gpr(regs::rax(), self); // Allocate the divisor, which can be any gpr. - let divisor = context.pop_to_reg(self, size); + let divisor = context.pop_to_reg(self, None, size); // Mark rax as allocatable. context.regalloc.free_gpr(rax); // Move the top value to rax. - let rax = context.pop_to_named_reg(self, rax, size); + let rax = context.pop_to_reg(self, Some(rax), size); self.asm.div(divisor, (rax, rdx), kind, size); // Free the divisor and rdx. @@ -180,12 +206,12 @@ impl Masm for MacroAssembler { let rax = context.gpr(regs::rax(), self); // Allocate the divisor, which can be any gpr. - let divisor = context.pop_to_reg(self, size); + let divisor = context.pop_to_reg(self, None, size); // Mark rax as allocatable. context.regalloc.free_gpr(rax); // Move the top value to rax. - let rax = context.pop_to_named_reg(self, rax, size); + let rax = context.pop_to_reg(self, Some(rax), size); self.asm.rem(divisor, (rax, rdx), kind, size); // Free the divisor and rax. @@ -225,7 +251,6 @@ impl MacroAssembler { self.sp_offset += bytes; } - #[allow(dead_code)] fn decrement_sp(&mut self, bytes: u32) { assert!( self.sp_offset >= bytes, diff --git a/winch/codegen/src/isa/x64/mod.rs b/winch/codegen/src/isa/x64/mod.rs index dc31055409..78d85a38c7 100644 --- a/winch/codegen/src/isa/x64/mod.rs +++ b/winch/codegen/src/isa/x64/mod.rs @@ -5,6 +5,7 @@ use crate::isa::x64::masm::MacroAssembler as X64Masm; use crate::masm::MacroAssembler; use crate::regalloc::RegAlloc; use crate::stack::Stack; +use crate::FuncEnv; use crate::{ isa::{Builder, TargetIsa}, regset::RegSet, @@ -85,6 +86,7 @@ impl TargetIsa for X64 { &self, sig: &FuncType, body: &FunctionBody, + env: &dyn FuncEnv, mut validator: FuncValidator, ) -> Result> { let mut body = body.get_binary_reader(); @@ -96,7 +98,7 @@ impl TargetIsa for X64 { // TODO Add in floating point bitmask let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), regs::scratch()); let codegen_context = CodeGenContext::new(regalloc, stack, &frame); - let mut codegen = CodeGen::new::(&mut masm, codegen_context, abi_sig); + let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig); codegen.emit(&mut body, validator)?; diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index f23e31917a..38ce406557 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -22,7 +22,7 @@ pub(crate) enum RemKind { } /// Operand size, in bits. -#[derive(Copy, Clone, Eq, PartialEq)] +#[derive(Copy, Debug, Clone, Eq, PartialEq)] pub(crate) enum OperandSize { /// 32 bits. S32, @@ -87,9 +87,24 @@ pub(crate) trait MacroAssembler { /// Reserve stack space. fn reserve_stack(&mut self, bytes: u32); + /// Free stack space. + fn free_stack(&mut self, bytes: u32); + /// Get the address of a local slot. fn local_address(&mut self, local: &LocalSlot) -> Self::Address; + /// Constructs an address with an offset that is relative to the + /// current position of the stack pointer (e.g. [sp + (sp_offset - + /// offset)]. + fn address_from_sp(&self, offset: u32) -> Self::Address; + + /// Constructs an address with an offset that is absolute to the + /// current position of the stack pointer (e.g. [sp + offset]. + fn address_at_sp(&self, offset: u32) -> Self::Address; + + /// Emit a function call to a locally defined function. + fn call(&mut self, callee: u32); + /// Get stack pointer offset. fn sp_offset(&mut self) -> u32; @@ -99,6 +114,9 @@ pub(crate) trait MacroAssembler { /// Perform a stack load. fn load(&mut self, src: Self::Address, dst: Reg, size: OperandSize); + /// Pop a value from the machine stack into the given register. + fn pop(&mut self, dst: Reg); + /// Perform a move. fn mov(&mut self, src: RegImm, dst: RegImm, size: OperandSize); diff --git a/winch/codegen/src/regalloc.rs b/winch/codegen/src/regalloc.rs index 9cd95213a6..7640561d71 100644 --- a/winch/codegen/src/regalloc.rs +++ b/winch/codegen/src/regalloc.rs @@ -35,6 +35,11 @@ impl RegAlloc { }) } + /// Checks if a general purpose register is avaiable. + pub fn gpr_available(&self, reg: Reg) -> bool { + self.regset.named_gpr_available(reg.hw_enc() as u32) + } + /// Request a specific general purpose register, /// spilling if not available. pub fn gpr(&mut self, named: Reg, spill: &mut F) -> Reg diff --git a/winch/codegen/src/regset.rs b/winch/codegen/src/regset.rs index 676a259924..8b0ab0c735 100644 --- a/winch/codegen/src/regset.rs +++ b/winch/codegen/src/regset.rs @@ -38,7 +38,9 @@ impl RegSet { self.gpr |= 1 << index; } - fn named_gpr_available(&self, index: u32) -> bool { + /// Returns true if the given general purpose register + /// is available. + pub fn named_gpr_available(&self, index: u32) -> bool { let index = 1 << index; (!self.gpr & index) == 0 } diff --git a/winch/codegen/src/stack.rs b/winch/codegen/src/stack.rs index 72308006fc..d563d91e84 100644 --- a/winch/codegen/src/stack.rs +++ b/winch/codegen/src/stack.rs @@ -45,6 +45,14 @@ impl Val { } } + /// Check wheter the value is a memory offset. + pub fn is_mem(&self) -> bool { + match *self { + Self::Memory(_) => true, + _ => false, + } + } + /// Get the register representation of the value. /// /// # Panics @@ -109,6 +117,11 @@ impl Stack { } } + /// Get the length of the stack. + pub fn len(&self) -> usize { + self.inner.len() + } + /// Push a value to the stack. pub fn push(&mut self, val: Val) { self.inner.push_back(val); @@ -119,6 +132,16 @@ impl Stack { self.inner.back() } + /// Returns an iterator referencing the last n items of the stack, + /// in bottom-most to top-most order. + pub fn peekn(&self, n: usize) -> impl Iterator + '_ { + let len = self.len(); + assert!(n <= len); + + let partition = len - n; + self.inner.range(partition..) + } + /// Pops the top element of the stack, if any. pub fn pop(&mut self) -> Option { self.inner.pop_back() diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 331d0b76d5..ac872e4605 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -4,6 +4,7 @@ //! which validates and dispatches to the corresponding //! machine code emitter. +use crate::abi::ABI; use crate::codegen::CodeGen; use crate::masm::{DivKind, MacroAssembler, OperandSize, RegImm, RemKind}; use crate::stack::Val; @@ -49,14 +50,16 @@ macro_rules! def_unsupported { (emit I64Sub $($rest:tt)*) => {}; (emit LocalGet $($rest:tt)*) => {}; (emit LocalSet $($rest:tt)*) => {}; + (emit Call $($rest:tt)*) => {}; (emit End $($rest:tt)*) => {}; (emit $unsupported:tt $($rest:tt)*) => {$($rest)*}; } -impl<'a, M> VisitOperator<'a> for CodeGen<'a, M> +impl<'a, A, M> VisitOperator<'a> for CodeGen<'a, A, M> where M: MacroAssembler, + A: ABI, { type Output = (); @@ -188,12 +191,16 @@ where .get_local(index) .expect(&format!("vald local at slot = {}", index)); let size: OperandSize = slot.ty.into(); - let src = self.context.pop_to_reg(self.masm, size); + let src = self.context.pop_to_reg(self.masm, None, size); let addr = self.masm.local_address(&slot); self.masm.store(RegImm::reg(src), addr, size); self.context.regalloc.free_gpr(src); } + fn visit_call(&mut self, index: u32) { + self.emit_call(index); + } + wasmparser::for_each_operator!(def_unsupported); } diff --git a/winch/filetests/filetests/x64/call/params.wat b/winch/filetests/filetests/x64/call/params.wat new file mode 100644 index 0000000000..56ebb9dd4a --- /dev/null +++ b/winch/filetests/filetests/x64/call/params.wat @@ -0,0 +1,121 @@ +;;! target = "x86_64" + +(module + (func (export "main") (param i32) (param i32) (result i32) + (local.get 1) + (local.get 0) + (i32.div_u) + + (call $add (i32.const 1) (i32.const 2) (i32.const 3) (i32.const 4) (i32.const 5) (i32.const 6) (i32.const 7) (i32.const 8)) + + (local.get 1) + (local.get 0) + (i32.div_u) + + (call $add (i32.const 2) (i32.const 3) (i32.const 4) (i32.const 5) (i32.const 6) (i32.const 7) (i32.const 8)) + ) + + (func $add (param i32 i32 i32 i32 i32 i32 i32 i32 i32) (result i32) + (local.get 0) + (local.get 1) + (i32.add) + (local.get 2) + (i32.add) + (local.get 3) + (i32.add) + (local.get 4) + (i32.add) + (local.get 5) + (i32.add) + (local.get 6) + (i32.add) + (local.get 7) + (i32.add) + (local.get 8) + (i32.add) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec08 sub rsp, 8 +;; 8: 897c2404 mov dword ptr [rsp + 4], edi +;; c: 893424 mov dword ptr [rsp], esi +;; f: 8b4c2404 mov ecx, dword ptr [rsp + 4] +;; 13: 8b0424 mov eax, dword ptr [rsp] +;; 16: 31d2 xor edx, edx +;; 18: f7f1 div ecx +;; 1a: 50 push rax +;; 1b: 4883ec20 sub rsp, 0x20 +;; 1f: 8b7c2420 mov edi, dword ptr [rsp + 0x20] +;; 23: be01000000 mov esi, 1 +;; 28: ba02000000 mov edx, 2 +;; 2d: b903000000 mov ecx, 3 +;; 32: 41b804000000 mov r8d, 4 +;; 38: 41b905000000 mov r9d, 5 +;; 3e: 41bb06000000 mov r11d, 6 +;; 44: 44891c24 mov dword ptr [rsp], r11d +;; 48: 41bb07000000 mov r11d, 7 +;; 4e: 44895c2408 mov dword ptr [rsp + 8], r11d +;; 53: 41bb08000000 mov r11d, 8 +;; 59: 44895c2410 mov dword ptr [rsp + 0x10], r11d +;; 5e: e800000000 call 0x63 +;; 63: 4883c428 add rsp, 0x28 +;; 67: 50 push rax +;; 68: 448b5c2408 mov r11d, dword ptr [rsp + 8] +;; 6d: 4153 push r11 +;; 6f: 448b5c2414 mov r11d, dword ptr [rsp + 0x14] +;; 74: 4153 push r11 +;; 76: 59 pop rcx +;; 77: 58 pop rax +;; 78: 31d2 xor edx, edx +;; 7a: f7f1 div ecx +;; 7c: 50 push rax +;; 7d: 4883ec20 sub rsp, 0x20 +;; 81: 8b7c2428 mov edi, dword ptr [rsp + 0x28] +;; 85: 8b742420 mov esi, dword ptr [rsp + 0x20] +;; 89: ba02000000 mov edx, 2 +;; 8e: b903000000 mov ecx, 3 +;; 93: 41b804000000 mov r8d, 4 +;; 99: 41b905000000 mov r9d, 5 +;; 9f: 41bb06000000 mov r11d, 6 +;; a5: 44891c24 mov dword ptr [rsp], r11d +;; a9: 41bb07000000 mov r11d, 7 +;; af: 44895c2408 mov dword ptr [rsp + 8], r11d +;; b4: 41bb08000000 mov r11d, 8 +;; ba: 44895c2410 mov dword ptr [rsp + 0x10], r11d +;; bf: e800000000 call 0xc4 +;; c4: 4883c430 add rsp, 0x30 +;; c8: 4883c408 add rsp, 8 +;; cc: 5d pop rbp +;; cd: c3 ret +;; +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec18 sub rsp, 0x18 +;; 8: 897c2414 mov dword ptr [rsp + 0x14], edi +;; c: 89742410 mov dword ptr [rsp + 0x10], esi +;; 10: 8954240c mov dword ptr [rsp + 0xc], edx +;; 14: 894c2408 mov dword ptr [rsp + 8], ecx +;; 18: 4489442404 mov dword ptr [rsp + 4], r8d +;; 1d: 44890c24 mov dword ptr [rsp], r9d +;; 21: 8b442410 mov eax, dword ptr [rsp + 0x10] +;; 25: 8b4c2414 mov ecx, dword ptr [rsp + 0x14] +;; 29: 01c1 add ecx, eax +;; 2b: 8b44240c mov eax, dword ptr [rsp + 0xc] +;; 2f: 01c1 add ecx, eax +;; 31: 8b442408 mov eax, dword ptr [rsp + 8] +;; 35: 01c1 add ecx, eax +;; 37: 8b442404 mov eax, dword ptr [rsp + 4] +;; 3b: 01c1 add ecx, eax +;; 3d: 8b0424 mov eax, dword ptr [rsp] +;; 40: 01c1 add ecx, eax +;; 42: 8b4510 mov eax, dword ptr [rbp + 0x10] +;; 45: 01c1 add ecx, eax +;; 47: 8b4518 mov eax, dword ptr [rbp + 0x18] +;; 4a: 01c1 add ecx, eax +;; 4c: 8b4520 mov eax, dword ptr [rbp + 0x20] +;; 4f: 01c1 add ecx, eax +;; 51: 4889c8 mov rax, rcx +;; 54: 4883c418 add rsp, 0x18 +;; 58: 5d pop rbp +;; 59: c3 ret diff --git a/winch/filetests/filetests/x64/call/simple.wat b/winch/filetests/filetests/x64/call/simple.wat new file mode 100644 index 0000000000..a2cb8d32a7 --- /dev/null +++ b/winch/filetests/filetests/x64/call/simple.wat @@ -0,0 +1,49 @@ +;;! target = "x86_64" + +(module + (func $main (result i32) + (local $var i32) + (call $product (i32.const 20) (i32.const 80)) + (local.set $var (i32.const 2)) + (local.get $var) + (i32.div_u)) + + (func $product (param i32 i32) (result i32) + (local.get 0) + (local.get 1) + (i32.mul)) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec08 sub rsp, 8 +;; 8: 48c7042400000000 mov qword ptr [rsp], 0 +;; 10: 4883ec10 sub rsp, 0x10 +;; 14: bf14000000 mov edi, 0x14 +;; 19: be50000000 mov esi, 0x50 +;; 1e: e800000000 call 0x23 +;; 23: 4883c410 add rsp, 0x10 +;; 27: b902000000 mov ecx, 2 +;; 2c: 894c2404 mov dword ptr [rsp + 4], ecx +;; 30: 50 push rax +;; 31: 448b5c240c mov r11d, dword ptr [rsp + 0xc] +;; 36: 4153 push r11 +;; 38: 59 pop rcx +;; 39: 58 pop rax +;; 3a: 31d2 xor edx, edx +;; 3c: f7f1 div ecx +;; 3e: 4883c408 add rsp, 8 +;; 42: 5d pop rbp +;; 43: c3 ret +;; +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec08 sub rsp, 8 +;; 8: 897c2404 mov dword ptr [rsp + 4], edi +;; c: 893424 mov dword ptr [rsp], esi +;; f: 8b0424 mov eax, dword ptr [rsp] +;; 12: 8b4c2404 mov ecx, dword ptr [rsp + 4] +;; 16: 0fafc8 imul ecx, eax +;; 19: 4889c8 mov rax, rcx +;; 1c: 4883c408 add rsp, 8 +;; 20: 5d pop rbp +;; 21: c3 ret diff --git a/winch/filetests/src/lib.rs b/winch/filetests/src/lib.rs index 0d4f8832db..a6d744717c 100644 --- a/winch/filetests/src/lib.rs +++ b/winch/filetests/src/lib.rs @@ -155,7 +155,7 @@ mod test { let buffer = env .isa - .compile_function(&sig, &body, validator) + .compile_function(&sig, &body, env, validator) .expect("Couldn't compile function"); disasm(buffer.data(), env.isa).unwrap() diff --git a/winch/src/compile.rs b/winch/src/compile.rs index 1e05a6aaff..c08fa800b8 100644 --- a/winch/src/compile.rs +++ b/winch/src/compile.rs @@ -60,7 +60,7 @@ fn compile(env: &FuncEnv, f: (DefinedFuncIndex, FunctionBodyData<'_>)) -> Result let validator = validator.into_validator(Default::default()); let buffer = env .isa - .compile_function(&sig, &body, validator) + .compile_function(&sig, &body, env, validator) .expect("Couldn't compile function"); println!("Disassembly for function: {}", index.as_u32());