winch(x64): Initial implementation for function calls (#6067)
* winch(x64): Initial implementation for function calls This change adds the main building blocks for calling locally defined functions. Support for function imports will be added iteratively after this change lands and once trampolines are supported. To support function calls, this change introduces the following functionality to the MacroAssembler: * `pop` to pop the machine stack into a given register, which in the case of this change, translates to the x64 pop instruction. * `call` to a emit a call to locally defined functions. * `address_from_sp` to construct memory addresses with the SP as a base. * `free_stack` to emit the necessary instrunctions to claim stack space. The heavy lifting of setting up and emitting the function call is done through the implementation of `FnCall`. * Fix spill behaviour in function calls and add more documentation This commits adds a more detailed documentation to the `call.rs` module. It also fixes a couple of bugs, mainly: * The previous commit didn't account for memory addresses used as arguments for the function call, any memory entry in the value stack used as a function argument should be tracked and then used to claim that memory when the function call ends. We could `pop` and do this implicitly, but we can also track this down and emit a single instruction to decrement the stack pointer, which will result in better code. * Introduce a differentiator between addresses relative or absolute to the stack pointer. When passing arguments in the stack -- assuming that SP at that point is aligned for the function call -- we should store the arguments relative to the absolute position of the stack pointer and when addressing a memory entry in the Wasm value stack, we should use an address relative to the offset and the position of the stack pointer. * Simplify tracking of the stack space needed for emitting a function call
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
use wasmparser::ValType;
|
||||
/// Base register used to address the local slot.
|
||||
///
|
||||
/// Slots for stack arguments are addressed from the frame pointer
|
||||
/// Slots for stack arguments are addressed from the frame pointer.
|
||||
/// Slots for function-defined locals and for registers are addressed
|
||||
/// from the stack pointer.
|
||||
#[derive(Eq, PartialEq)]
|
||||
|
||||
@@ -1,3 +1,47 @@
|
||||
//! This module provides all the necessary building blocks for
|
||||
//! implementing ISA specific ABIs.
|
||||
//!
|
||||
//! # Default ABI
|
||||
//!
|
||||
//! Winch uses a default internal ABI, for all internal functions.
|
||||
//! This allows us to push the complexity of system ABI compliance to
|
||||
//! the trampolines (not yet implemented). The default ABI treats all
|
||||
//! allocatable registers as caller saved, which means that (i) all
|
||||
//! register values in the Wasm value stack (which are normally
|
||||
//! referred to as "live"), must be saved onto the machine stack (ii)
|
||||
//! function prologues and epilogues don't store/restore other
|
||||
//! registers more than the non-allocatable ones (e.g. rsp/rbp in
|
||||
//! x86_64).
|
||||
//!
|
||||
//! The calling convention in the default ABI, uses registers to a
|
||||
//! certain fixed count for arguments and return values, and then the
|
||||
//! stack is used for all additional arguments.
|
||||
//!
|
||||
//! Generally the stack layout looks like:
|
||||
//! +-------------------------------+
|
||||
//! | |
|
||||
//! | |
|
||||
//! | Stack Args |
|
||||
//! | |
|
||||
//! | |
|
||||
//! +-------------------------------+----> SP @ function entry
|
||||
//! | Ret addr |
|
||||
//! +-------------------------------+
|
||||
//! | SP |
|
||||
//! +-------------------------------+----> SP @ Function prologue
|
||||
//! | |
|
||||
//! | |
|
||||
//! | |
|
||||
//! | Stack slots |
|
||||
//! | + dynamic space |
|
||||
//! | |
|
||||
//! | |
|
||||
//! | |
|
||||
//! +-------------------------------+----> SP @ callsite (after)
|
||||
//! | alignment |
|
||||
//! | + arguments |
|
||||
//! | | ----> Space allocated for calls
|
||||
//! | |
|
||||
use crate::isa::reg::Reg;
|
||||
use smallvec::SmallVec;
|
||||
use std::ops::{Add, BitAnd, Not, Sub};
|
||||
@@ -13,6 +57,9 @@ pub(crate) trait ABI {
|
||||
/// The required stack alignment.
|
||||
fn stack_align(&self) -> u8;
|
||||
|
||||
/// The required stack alignment for calls.
|
||||
fn call_stack_align(&self) -> u8;
|
||||
|
||||
/// The offset to the argument base, relative to the frame pointer.
|
||||
fn arg_base_offset(&self) -> u8;
|
||||
|
||||
@@ -117,11 +164,27 @@ impl ABIResult {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) type ABIParams = SmallVec<[ABIArg; 6]>;
|
||||
|
||||
/// An ABI-specific representation of a function signature.
|
||||
pub(crate) struct ABISig {
|
||||
/// Function parameters.
|
||||
pub params: SmallVec<[ABIArg; 6]>,
|
||||
pub params: ABIParams,
|
||||
/// Function result.
|
||||
pub result: ABIResult,
|
||||
/// Stack space needed for stack arguments.
|
||||
pub stack_bytes: u32,
|
||||
}
|
||||
|
||||
impl ABISig {
|
||||
/// Create a new ABI signature.
|
||||
pub fn new(params: ABIParams, result: ABIResult, stack_bytes: u32) -> Self {
|
||||
Self {
|
||||
params,
|
||||
result,
|
||||
stack_bytes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the size in bytes of a given WebAssembly type.
|
||||
|
||||
222
winch/codegen/src/codegen/call.rs
Normal file
222
winch/codegen/src/codegen/call.rs
Normal file
@@ -0,0 +1,222 @@
|
||||
//! Function call emission. For more details around the ABI and
|
||||
//! calling convention, see [ABI].
|
||||
use super::CodeGenContext;
|
||||
use crate::{
|
||||
abi::{align_to, ABIArg, ABIResult, ABISig, ABI},
|
||||
masm::{MacroAssembler, OperandSize},
|
||||
reg::Reg,
|
||||
stack::Val,
|
||||
};
|
||||
|
||||
/// All the information needed to emit a function call.
|
||||
pub(crate) struct FnCall<'a> {
|
||||
/// The total stack space in bytes used by the function call.
|
||||
/// This amount includes the sum of:
|
||||
///
|
||||
/// 1. The amount of stack space that needs to be explicitly
|
||||
/// allocated at the callsite for callee arguments that
|
||||
/// go in the stack, plus any alignment.
|
||||
/// 2. The amount of stack space created by saving any live
|
||||
/// registers at the callsite.
|
||||
/// 3. The amount of space used by any memory entries in the value
|
||||
/// stack present at the callsite, that will be used as
|
||||
/// arguments for the function call. Any memory values in the
|
||||
/// value stack that are needed as part of the function
|
||||
/// arguments, will be consumed by the function call (either by
|
||||
/// assigning those values to a register or by storing those
|
||||
/// values to a memory location if the callee argument is on
|
||||
/// the stack), so we track that stack space to reclaim it once
|
||||
/// the function call has ended. This could also be done in
|
||||
/// `assign_args` everytime a memory entry needs to be assigned
|
||||
/// to a particular location, but doing so, will incur in more
|
||||
/// instructions (e.g. a pop per argument that needs to be
|
||||
/// assigned); it's more efficient to track the space needed by
|
||||
/// those memory values and reclaim it at once.
|
||||
///
|
||||
/// The machine stack state that this amount is capturing, is the following:
|
||||
/// ┌──────────────────────────────────────────────────┐
|
||||
/// │ │
|
||||
/// │ │
|
||||
/// │ Stack space created by any previous spills │
|
||||
/// │ from the value stack; and which memory values │
|
||||
/// │ are used as function arguments. │
|
||||
/// │ │
|
||||
/// ├──────────────────────────────────────────────────┤ ---> The Wasm value stack at this point in time would look like:
|
||||
/// │ │ [ Reg | Reg | Mem(offset) | Mem(offset) ]
|
||||
/// │ │
|
||||
/// │ Stack space created by saving │
|
||||
/// │ any live registers at the callsite. │
|
||||
/// │ │
|
||||
/// │ │
|
||||
/// ├─────────────────────────────────────────────────┬┤ ---> The Wasm value stack at this point in time would look like:
|
||||
/// │ │ [ Mem(offset) | Mem(offset) | Mem(offset) | Mem(offset) ]
|
||||
/// │ │ Assuming that the callee takes 4 arguments, we calculate
|
||||
/// │ │ 2 spilled registers + 2 memory values; all of which will be used
|
||||
/// │ Stack space allocated for │ as arguments to the call via `assign_args`, thus the memory they represent is
|
||||
/// │ the callee function arguments in the stack; │ is considered to be consumed by the call.
|
||||
/// │ represented by `arg_stack_space` │
|
||||
/// │ │
|
||||
/// │ │
|
||||
/// │ │
|
||||
/// └──────────────────────────────────────────────────┘ ------> Stack pointer when emitting the call
|
||||
///
|
||||
total_stack_space: u32,
|
||||
/// The total stack space needed for the callee arguments on the
|
||||
/// stack, including any adjustments to the function's frame and
|
||||
/// aligned to to the required ABI alignment.
|
||||
arg_stack_space: u32,
|
||||
/// The ABI-specific signature of the callee.
|
||||
abi_sig: &'a ABISig,
|
||||
/// The stack pointer offset prior to preparing and emitting the
|
||||
/// call. This is tracked to assert the position of the stack
|
||||
/// pointer after the call has finished.
|
||||
sp_offset_at_callsite: u32,
|
||||
}
|
||||
|
||||
impl<'a> FnCall<'a> {
|
||||
/// Allocate and setup a new function call.
|
||||
///
|
||||
/// The setup process, will first save all the live registers in
|
||||
/// the value stack, tracking down those spilled for the function
|
||||
/// arguments(see comment below for more details) it will also
|
||||
/// track all the memory entries consumed by the function
|
||||
/// call. Then, it will calculate any adjustments needed to ensure
|
||||
/// the alignment of the caller's frame. It's important to note
|
||||
/// that the order of operations in the setup is important, as we
|
||||
/// want to calculate any adjustments to the caller's frame, after
|
||||
/// having saved any live registers, so that we can account for
|
||||
/// any pushes generated by register spilling.
|
||||
pub fn new<A: ABI, M: MacroAssembler>(
|
||||
abi: &A,
|
||||
callee_sig: &'a ABISig,
|
||||
context: &mut CodeGenContext,
|
||||
masm: &mut M,
|
||||
) -> Self {
|
||||
let stack = &context.stack;
|
||||
let arg_stack_space = callee_sig.stack_bytes;
|
||||
let callee_params = &callee_sig.params;
|
||||
let sp_offset_at_callsite = masm.sp_offset();
|
||||
|
||||
let (spilled_regs, memory_values) = match callee_params.len() {
|
||||
0 => {
|
||||
let _ = context.spill_regs_and_count_memory_in(masm, ..);
|
||||
(0, 0)
|
||||
}
|
||||
_ => {
|
||||
// Here we perform a "spill" of the register entries
|
||||
// in the Wasm value stack, we also count any memory
|
||||
// values that will be used used as part of the callee
|
||||
// arguments. Saving the live registers is done by
|
||||
// emitting push operations for every `Reg` entry in
|
||||
// the Wasm value stack. We do this to be compliant
|
||||
// with Winch's internal ABI, in which all registers
|
||||
// are treated as caller-saved. For more details, see
|
||||
// [ABI].
|
||||
//
|
||||
// The next few lines, partition the value stack into
|
||||
// two sections:
|
||||
// +------------------+--+--- (Stack top)
|
||||
// | | |
|
||||
// | | | 1. The top `n` elements, which are used for
|
||||
// | | | function arguments; for which we save any
|
||||
// | | | live registers, keeping track of the amount of registers
|
||||
// +------------------+ | saved plus the amount of memory values consumed by the function call;
|
||||
// | | | with this information we can later reclaim the space used by the function call.
|
||||
// | | |
|
||||
// +------------------+--+---
|
||||
// | | | 2. The rest of the items in the stack, for which
|
||||
// | | | we only save any live registers.
|
||||
// | | |
|
||||
// +------------------+ |
|
||||
assert!(stack.len() >= callee_params.len());
|
||||
let partition = stack.len() - callee_params.len();
|
||||
let _ = context.spill_regs_and_count_memory_in(masm, 0..partition);
|
||||
context.spill_regs_and_count_memory_in(masm, partition..)
|
||||
}
|
||||
};
|
||||
|
||||
let delta = calculate_frame_adjustment(
|
||||
masm.sp_offset(),
|
||||
abi.arg_base_offset() as u32,
|
||||
abi.call_stack_align() as u32,
|
||||
);
|
||||
|
||||
let arg_stack_space = align_to(arg_stack_space + delta, abi.call_stack_align() as u32);
|
||||
Self {
|
||||
abi_sig: &callee_sig,
|
||||
arg_stack_space,
|
||||
total_stack_space: (spilled_regs * <A as ABI>::word_bytes())
|
||||
+ (memory_values * <A as ABI>::word_bytes())
|
||||
+ arg_stack_space,
|
||||
sp_offset_at_callsite,
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit the function call.
|
||||
pub fn emit<M: MacroAssembler, A: ABI>(
|
||||
&self,
|
||||
masm: &mut M,
|
||||
context: &mut CodeGenContext,
|
||||
callee: u32,
|
||||
) {
|
||||
masm.reserve_stack(self.arg_stack_space);
|
||||
self.assign_args(context, masm, <A as ABI>::scratch_reg());
|
||||
masm.call(callee);
|
||||
masm.free_stack(self.total_stack_space);
|
||||
context.drop_last(self.abi_sig.params.len());
|
||||
// The stack pointer at the end of the function call
|
||||
// cannot be less than what it was when starting the
|
||||
// function call.
|
||||
assert!(self.sp_offset_at_callsite >= masm.sp_offset());
|
||||
self.handle_result(context, masm);
|
||||
}
|
||||
|
||||
fn assign_args<M: MacroAssembler>(
|
||||
&self,
|
||||
context: &mut CodeGenContext,
|
||||
masm: &mut M,
|
||||
scratch: Reg,
|
||||
) {
|
||||
let arg_count = self.abi_sig.params.len();
|
||||
let stack = &context.stack;
|
||||
let mut stack_values = stack.peekn(arg_count);
|
||||
for arg in &self.abi_sig.params {
|
||||
let val = stack_values
|
||||
.next()
|
||||
.unwrap_or_else(|| panic!("expected stack value for function argument"));
|
||||
match &arg {
|
||||
&ABIArg::Reg { ty, reg } => {
|
||||
context.move_val_to_reg(&val, *reg, masm, (*ty).into());
|
||||
}
|
||||
&ABIArg::Stack { ty, offset } => {
|
||||
let addr = masm.address_at_sp(*offset);
|
||||
let size: OperandSize = (*ty).into();
|
||||
context.move_val_to_reg(val, scratch, masm, size);
|
||||
masm.store(scratch.into(), addr, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_result<M: MacroAssembler>(&self, context: &mut CodeGenContext, masm: &mut M) {
|
||||
let result = &self.abi_sig.result;
|
||||
if result.is_void() {
|
||||
return;
|
||||
}
|
||||
|
||||
match result {
|
||||
&ABIResult::Reg { ty: _, reg } => {
|
||||
assert!(context.regalloc.gpr_available(reg));
|
||||
let result_reg = Val::reg(context.gpr(reg, masm));
|
||||
context.stack.push(result_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates the delta needed to adjust a function's frame plus some
|
||||
/// addend to a given alignment.
|
||||
fn calculate_frame_adjustment(frame_size: u32, addend: u32, alignment: u32) -> u32 {
|
||||
let total = frame_size + addend;
|
||||
(alignment - (total % alignment)) % alignment
|
||||
}
|
||||
@@ -5,6 +5,7 @@ use crate::{
|
||||
regalloc::RegAlloc,
|
||||
stack::{Stack, Val},
|
||||
};
|
||||
use std::ops::RangeBounds;
|
||||
|
||||
/// The code generation context.
|
||||
/// The code generation context is made up of three
|
||||
@@ -60,58 +61,66 @@ impl<'a> CodeGenContext<'a> {
|
||||
self.regalloc.free_gpr(reg);
|
||||
}
|
||||
|
||||
/// Loads the stack top value into a register, if it isn't already one;
|
||||
/// spilling if there are no registers available.
|
||||
pub fn pop_to_reg<M: MacroAssembler>(&mut self, masm: &mut M, size: OperandSize) -> Reg {
|
||||
if let Some(reg) = self.stack.pop_reg() {
|
||||
return reg;
|
||||
}
|
||||
|
||||
let dst = self.any_gpr(masm);
|
||||
let val = self.stack.pop().expect("a value at stack top");
|
||||
Self::move_val_to_reg(val, dst, masm, self.frame, size);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Checks if the stack top contains the given register. The register
|
||||
/// gets allocated otherwise, potentially causing a spill.
|
||||
/// Once the requested register is allocated, the value at the top of the stack
|
||||
/// gets loaded into the register.
|
||||
pub fn pop_to_named_reg<M: MacroAssembler>(
|
||||
/// Loads the stack top value into the next available register, if
|
||||
/// it isn't already one; spilling if there are no registers
|
||||
/// available. Optionally the caller may specify a specific
|
||||
/// destination register.
|
||||
pub fn pop_to_reg<M: MacroAssembler>(
|
||||
&mut self,
|
||||
masm: &mut M,
|
||||
named: Reg,
|
||||
named: Option<Reg>,
|
||||
size: OperandSize,
|
||||
) -> Reg {
|
||||
if let Some(reg) = self.stack.pop_named_reg(named) {
|
||||
return reg;
|
||||
let (in_stack, dst) = if let Some(dst) = named {
|
||||
self.stack
|
||||
.pop_named_reg(dst)
|
||||
.map(|reg| (true, reg))
|
||||
.unwrap_or_else(|| (false, self.gpr(dst, masm)))
|
||||
} else {
|
||||
self.stack
|
||||
.pop_reg()
|
||||
.map(|reg| (true, reg))
|
||||
.unwrap_or_else(|| (false, self.any_gpr(masm)))
|
||||
};
|
||||
|
||||
if in_stack {
|
||||
return dst;
|
||||
}
|
||||
|
||||
let dst = self.gpr(named, masm);
|
||||
let val = self.stack.pop().expect("a value at stack top");
|
||||
Self::move_val_to_reg(val, dst, masm, self.frame, size);
|
||||
if val.is_mem() {
|
||||
masm.pop(dst);
|
||||
} else {
|
||||
self.move_val_to_reg(&val, dst, masm, size);
|
||||
}
|
||||
|
||||
dst
|
||||
}
|
||||
|
||||
fn move_val_to_reg<M: MacroAssembler>(
|
||||
src: Val,
|
||||
/// Move a stack value to the given register.
|
||||
pub fn move_val_to_reg<M: MacroAssembler>(
|
||||
&self,
|
||||
src: &Val,
|
||||
dst: Reg,
|
||||
masm: &mut M,
|
||||
frame: &Frame,
|
||||
size: OperandSize,
|
||||
) {
|
||||
match src {
|
||||
Val::Reg(src) => masm.mov(RegImm::reg(src), RegImm::reg(dst), size),
|
||||
Val::I32(imm) => masm.mov(RegImm::imm(imm.into()), RegImm::reg(dst), size),
|
||||
Val::I64(imm) => masm.mov(RegImm::imm(imm), RegImm::reg(dst), size),
|
||||
Val::Reg(src) => masm.mov(RegImm::reg(*src), RegImm::reg(dst), size),
|
||||
Val::I32(imm) => masm.mov(RegImm::imm((*imm).into()), RegImm::reg(dst), size),
|
||||
Val::I64(imm) => masm.mov(RegImm::imm(*imm), RegImm::reg(dst), size),
|
||||
Val::Local(index) => {
|
||||
let slot = frame
|
||||
.get_local(index)
|
||||
.expect(&format!("valid locat at index = {}", index));
|
||||
let slot = self
|
||||
.frame
|
||||
.get_local(*index)
|
||||
.unwrap_or_else(|| panic!("valid local at index = {}", index));
|
||||
let addr = masm.local_address(&slot);
|
||||
masm.load(addr, dst, slot.ty.into());
|
||||
}
|
||||
v => panic!("Unsupported value {:?}", v),
|
||||
Val::Memory(offset) => {
|
||||
let addr = masm.address_from_sp(*offset);
|
||||
masm.load(addr, dst, size);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -128,7 +137,7 @@ impl<'a> CodeGenContext<'a> {
|
||||
.stack
|
||||
.pop_i32_const()
|
||||
.expect("i32 const value at stack top");
|
||||
let reg = self.pop_to_reg(masm, OperandSize::S32);
|
||||
let reg = self.pop_to_reg(masm, None, OperandSize::S32);
|
||||
emit(
|
||||
masm,
|
||||
RegImm::reg(reg),
|
||||
@@ -137,8 +146,8 @@ impl<'a> CodeGenContext<'a> {
|
||||
);
|
||||
self.stack.push(Val::reg(reg));
|
||||
} else {
|
||||
let src = self.pop_to_reg(masm, OperandSize::S32);
|
||||
let dst = self.pop_to_reg(masm, OperandSize::S32);
|
||||
let src = self.pop_to_reg(masm, None, OperandSize::S32);
|
||||
let dst = self.pop_to_reg(masm, None, OperandSize::S32);
|
||||
emit(masm, dst.into(), src.into(), OperandSize::S32);
|
||||
self.regalloc.free_gpr(src);
|
||||
self.stack.push(Val::reg(dst));
|
||||
@@ -157,18 +166,64 @@ impl<'a> CodeGenContext<'a> {
|
||||
.stack
|
||||
.pop_i64_const()
|
||||
.expect("i64 const value at stack top");
|
||||
let reg = self.pop_to_reg(masm, OperandSize::S64);
|
||||
let reg = self.pop_to_reg(masm, None, OperandSize::S64);
|
||||
emit(masm, RegImm::reg(reg), RegImm::imm(val), OperandSize::S64);
|
||||
self.stack.push(Val::reg(reg));
|
||||
} else {
|
||||
let src = self.pop_to_reg(masm, OperandSize::S64);
|
||||
let dst = self.pop_to_reg(masm, OperandSize::S64);
|
||||
let src = self.pop_to_reg(masm, None, OperandSize::S64);
|
||||
let dst = self.pop_to_reg(masm, None, OperandSize::S64);
|
||||
emit(masm, dst.into(), src.into(), OperandSize::S64);
|
||||
self.regalloc.free_gpr(src);
|
||||
self.stack.push(Val::reg(dst));
|
||||
}
|
||||
}
|
||||
|
||||
/// Saves any live registers in the value stack in a particular
|
||||
/// range defined by the caller. This is a specialization of the
|
||||
/// spill function; made available for cases in which spilling
|
||||
/// locals is not required, like for example for function calls in
|
||||
/// which locals are not reachable by the callee. It also tracks
|
||||
/// down the number of memory values in the given range.
|
||||
///
|
||||
/// Returns the number of spilled registers and the number of
|
||||
/// memory values in the given range of the value stack.
|
||||
pub fn spill_regs_and_count_memory_in<M, R>(&mut self, masm: &mut M, range: R) -> (u32, u32)
|
||||
where
|
||||
R: RangeBounds<usize>,
|
||||
M: MacroAssembler,
|
||||
{
|
||||
let mut spilled: u32 = 0;
|
||||
let mut memory_values = 0;
|
||||
for i in self.stack.inner_mut().range_mut(range) {
|
||||
if i.is_reg() {
|
||||
let reg = i.get_reg();
|
||||
let offset = masm.push(reg);
|
||||
self.regalloc.free_gpr(reg);
|
||||
*i = Val::Memory(offset);
|
||||
spilled += 1;
|
||||
} else if i.is_mem() {
|
||||
memory_values += 1;
|
||||
}
|
||||
}
|
||||
|
||||
(spilled, memory_values)
|
||||
}
|
||||
|
||||
/// Drops the last `n` elements of the stack, freeing any
|
||||
/// registers located in that region.
|
||||
pub fn drop_last(&mut self, last: usize) {
|
||||
let len = self.stack.len();
|
||||
assert!(last <= len);
|
||||
let truncate = self.stack.len() - last;
|
||||
|
||||
self.stack.inner_mut().range(truncate..).for_each(|v| {
|
||||
if v.is_reg() {
|
||||
self.regalloc.free_gpr(v.get_reg());
|
||||
}
|
||||
});
|
||||
self.stack.inner_mut().truncate(truncate);
|
||||
}
|
||||
|
||||
/// Spill locals and registers to memory.
|
||||
// TODO optimize the spill range;
|
||||
//
|
||||
|
||||
@@ -3,21 +3,21 @@ use crate::{
|
||||
masm::{MacroAssembler, OperandSize},
|
||||
};
|
||||
use anyhow::Result;
|
||||
use call::FnCall;
|
||||
use wasmparser::{BinaryReader, FuncValidator, ValType, ValidatorResources, VisitOperator};
|
||||
|
||||
mod context;
|
||||
pub(crate) use context::*;
|
||||
mod env;
|
||||
pub use env::*;
|
||||
mod call;
|
||||
|
||||
/// The code generation abstraction.
|
||||
pub(crate) struct CodeGen<'a, M>
|
||||
pub(crate) struct CodeGen<'a, A, M>
|
||||
where
|
||||
M: MacroAssembler,
|
||||
A: ABI,
|
||||
{
|
||||
/// The word size in bytes, extracted from the current ABI.
|
||||
word_size: u32,
|
||||
|
||||
/// The ABI-specific representation of the function signature, excluding results.
|
||||
sig: ABISig,
|
||||
|
||||
@@ -26,18 +26,32 @@ where
|
||||
|
||||
/// The MacroAssembler.
|
||||
pub masm: &'a mut M,
|
||||
|
||||
/// A reference to the function compilation environment.
|
||||
pub env: &'a dyn env::FuncEnv,
|
||||
|
||||
/// A reference to the current ABI.
|
||||
pub abi: &'a A,
|
||||
}
|
||||
|
||||
impl<'a, M> CodeGen<'a, M>
|
||||
impl<'a, A, M> CodeGen<'a, A, M>
|
||||
where
|
||||
M: MacroAssembler,
|
||||
A: ABI,
|
||||
{
|
||||
pub fn new<A: ABI>(masm: &'a mut M, context: CodeGenContext<'a>, sig: ABISig) -> Self {
|
||||
pub fn new(
|
||||
masm: &'a mut M,
|
||||
abi: &'a A,
|
||||
context: CodeGenContext<'a>,
|
||||
env: &'a dyn FuncEnv,
|
||||
sig: ABISig,
|
||||
) -> Self {
|
||||
Self {
|
||||
word_size: <A as ABI>::word_bytes(),
|
||||
sig,
|
||||
context,
|
||||
masm,
|
||||
abi,
|
||||
env,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,7 +84,7 @@ where
|
||||
let defined_locals_range = &self.context.frame.defined_locals_range;
|
||||
self.masm.zero_mem_range(
|
||||
defined_locals_range.as_range(),
|
||||
self.word_size,
|
||||
<A as ABI>::word_bytes(),
|
||||
&mut self.context.regalloc,
|
||||
);
|
||||
|
||||
@@ -105,8 +119,21 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
// Emit the usual function end instruction sequence.
|
||||
pub fn emit_end(&mut self) -> Result<()> {
|
||||
/// Emit a direct function call.
|
||||
pub fn emit_call(&mut self, index: u32) {
|
||||
let callee = self.env.callee_from_index(index);
|
||||
if callee.import {
|
||||
// TODO: Only locally defined functions for now.
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
let sig = self.abi.sig(&callee.ty);
|
||||
let fncall = FnCall::new(self.abi, &sig, &mut self.context, self.masm);
|
||||
fncall.emit::<M, A>(self.masm, &mut self.context, index);
|
||||
}
|
||||
|
||||
/// Emit the usual function end instruction sequence.
|
||||
fn emit_end(&mut self) -> Result<()> {
|
||||
self.handle_abi_result();
|
||||
self.masm.epilogue(self.context.frame.locals_size);
|
||||
Ok(())
|
||||
@@ -149,7 +176,7 @@ where
|
||||
let named_reg = self.sig.result.result_reg();
|
||||
let reg = self
|
||||
.context
|
||||
.pop_to_named_reg(self.masm, named_reg, OperandSize::S64);
|
||||
.pop_to_reg(self.masm, Some(named_reg), OperandSize::S64);
|
||||
self.context.regalloc.free_gpr(reg);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,6 +47,10 @@ impl ABI for Aarch64ABI {
|
||||
8
|
||||
}
|
||||
|
||||
fn call_stack_align(&self) -> u8 {
|
||||
16
|
||||
}
|
||||
|
||||
fn arg_base_offset(&self) -> u8 {
|
||||
16
|
||||
}
|
||||
@@ -74,7 +78,7 @@ impl ABI for Aarch64ABI {
|
||||
let reg = regs::xreg(0);
|
||||
let result = ABIResult::reg(ty, reg);
|
||||
|
||||
ABISig { params, result }
|
||||
ABISig::new(params, result, stack_offset)
|
||||
}
|
||||
|
||||
fn scratch_reg() -> Reg {
|
||||
|
||||
@@ -96,6 +96,10 @@ impl Masm for MacroAssembler {
|
||||
self.increment_sp(bytes);
|
||||
}
|
||||
|
||||
fn free_stack(&mut self, _bytes: u32) {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn local_address(&mut self, local: &LocalSlot) -> Address {
|
||||
let (reg, offset) = local
|
||||
.addressed_from_sp()
|
||||
@@ -111,6 +115,14 @@ impl Masm for MacroAssembler {
|
||||
Address::offset(reg, offset as i64)
|
||||
}
|
||||
|
||||
fn address_from_sp(&self, _offset: u32) -> Self::Address {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn address_at_sp(&self, _offset: u32) -> Self::Address {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) {
|
||||
let src = match src {
|
||||
RegImm::Imm(imm) => {
|
||||
@@ -124,10 +136,18 @@ impl Masm for MacroAssembler {
|
||||
self.asm.str(src, dst, size);
|
||||
}
|
||||
|
||||
fn call(&mut self, _callee: u32) {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn load(&mut self, src: Address, dst: Reg, size: OperandSize) {
|
||||
self.asm.ldr(src, dst, size);
|
||||
}
|
||||
|
||||
fn pop(&mut self, _dst: Reg) {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn sp_offset(&mut self) -> u32 {
|
||||
self.sp_offset
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ use crate::{
|
||||
regalloc::RegAlloc,
|
||||
regset::RegSet,
|
||||
stack::Stack,
|
||||
FuncEnv,
|
||||
};
|
||||
use anyhow::Result;
|
||||
use cranelift_codegen::settings::{self, Flags};
|
||||
@@ -84,6 +85,7 @@ impl TargetIsa for Aarch64 {
|
||||
&self,
|
||||
sig: &FuncType,
|
||||
body: &FunctionBody,
|
||||
env: &dyn FuncEnv,
|
||||
mut validator: FuncValidator<ValidatorResources>,
|
||||
) -> Result<MachBufferFinalized<Final>> {
|
||||
let mut body = body.get_binary_reader();
|
||||
@@ -95,7 +97,7 @@ impl TargetIsa for Aarch64 {
|
||||
// TODO: Add floating point bitmask
|
||||
let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), scratch());
|
||||
let codegen_context = CodeGenContext::new(regalloc, stack, &frame);
|
||||
let mut codegen = CodeGen::new::<abi::Aarch64ABI>(&mut masm, codegen_context, abi_sig);
|
||||
let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig);
|
||||
|
||||
codegen.emit(&mut body, validator)?;
|
||||
Ok(masm.finalize())
|
||||
|
||||
@@ -10,6 +10,8 @@ use std::{
|
||||
use target_lexicon::{Architecture, Triple};
|
||||
use wasmparser::{FuncType, FuncValidator, FunctionBody, ValidatorResources};
|
||||
|
||||
use crate::FuncEnv;
|
||||
|
||||
#[cfg(feature = "x64")]
|
||||
pub(crate) mod x64;
|
||||
|
||||
@@ -92,6 +94,7 @@ pub trait TargetIsa: Send + Sync {
|
||||
&self,
|
||||
sig: &FuncType,
|
||||
body: &FunctionBody,
|
||||
env: &dyn FuncEnv,
|
||||
validator: FuncValidator<ValidatorResources>,
|
||||
) -> Result<MachBufferFinalized<Final>>;
|
||||
|
||||
|
||||
@@ -39,6 +39,10 @@ impl ABI for X64ABI {
|
||||
8
|
||||
}
|
||||
|
||||
fn call_stack_align(&self) -> u8 {
|
||||
16
|
||||
}
|
||||
|
||||
fn arg_base_offset(&self) -> u8 {
|
||||
// Two 8-byte slots, one for the return address and another
|
||||
// one for the frame pointer.
|
||||
@@ -75,7 +79,7 @@ impl ABI for X64ABI {
|
||||
let reg = regs::rax();
|
||||
let result = ABIResult::reg(ty, reg);
|
||||
|
||||
ABISig { params, result }
|
||||
ABISig::new(params, result, stack_offset)
|
||||
}
|
||||
|
||||
fn scratch_reg() -> Reg {
|
||||
|
||||
@@ -5,18 +5,21 @@ use crate::{
|
||||
masm::{DivKind, OperandSize, RemKind},
|
||||
};
|
||||
use cranelift_codegen::{
|
||||
entity::EntityRef,
|
||||
ir::TrapCode,
|
||||
ir::{ExternalName, Opcode, UserExternalNameRef},
|
||||
isa::x64::{
|
||||
args::{
|
||||
self, AluRmiROpcode, Amode, CmpOpcode, DivSignedness, ExtMode, FromWritableReg, Gpr,
|
||||
GprMem, GprMemImm, RegMem, RegMemImm, SyntheticAmode, WritableGpr, CC,
|
||||
},
|
||||
settings as x64_settings, EmitInfo, EmitState, Inst,
|
||||
settings as x64_settings, CallInfo, EmitInfo, EmitState, Inst,
|
||||
},
|
||||
settings, Final, MachBuffer, MachBufferFinalized, MachInstEmit, Writable,
|
||||
};
|
||||
|
||||
use super::{address::Address, regs};
|
||||
use smallvec::smallvec;
|
||||
|
||||
/// A x64 instruction operand.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
@@ -465,4 +468,18 @@ impl Assembler {
|
||||
dst: dst.into(),
|
||||
});
|
||||
}
|
||||
|
||||
/// Direct function call to a user defined function.
|
||||
pub fn call(&mut self, callee: u32) {
|
||||
let dest = ExternalName::user(UserExternalNameRef::new(callee as usize));
|
||||
self.emit(Inst::CallKnown {
|
||||
dest,
|
||||
info: Box::new(CallInfo {
|
||||
uses: smallvec![],
|
||||
defs: smallvec![],
|
||||
clobbers: Default::default(),
|
||||
opcode: Opcode::Call,
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,6 +70,14 @@ impl Masm for MacroAssembler {
|
||||
self.increment_sp(bytes);
|
||||
}
|
||||
|
||||
fn free_stack(&mut self, bytes: u32) {
|
||||
if bytes == 0 {
|
||||
return;
|
||||
}
|
||||
self.asm.add_ir(bytes as i32, rsp(), OperandSize::S64);
|
||||
self.decrement_sp(bytes);
|
||||
}
|
||||
|
||||
fn local_address(&mut self, local: &LocalSlot) -> Address {
|
||||
let (reg, offset) = local
|
||||
.addressed_from_sp()
|
||||
@@ -85,6 +93,14 @@ impl Masm for MacroAssembler {
|
||||
Address::offset(reg, offset)
|
||||
}
|
||||
|
||||
fn address_from_sp(&self, offset: u32) -> Self::Address {
|
||||
Address::offset(regs::rsp(), self.sp_offset - offset)
|
||||
}
|
||||
|
||||
fn address_at_sp(&self, offset: u32) -> Self::Address {
|
||||
Address::offset(regs::rsp(), offset)
|
||||
}
|
||||
|
||||
fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) {
|
||||
let src: Operand = src.into();
|
||||
let dst: Operand = dst.into();
|
||||
@@ -92,6 +108,16 @@ impl Masm for MacroAssembler {
|
||||
self.asm.mov(src, dst, size);
|
||||
}
|
||||
|
||||
fn pop(&mut self, dst: Reg) {
|
||||
self.asm.pop_r(dst);
|
||||
// Similar to the comment in `push`, we assume 8 bytes per pop.
|
||||
self.decrement_sp(8);
|
||||
}
|
||||
|
||||
fn call(&mut self, callee: u32) {
|
||||
self.asm.call(callee);
|
||||
}
|
||||
|
||||
fn load(&mut self, src: Address, dst: Reg, size: OperandSize) {
|
||||
let src = src.into();
|
||||
let dst = dst.into();
|
||||
@@ -158,12 +184,12 @@ impl Masm for MacroAssembler {
|
||||
let rax = context.gpr(regs::rax(), self);
|
||||
|
||||
// Allocate the divisor, which can be any gpr.
|
||||
let divisor = context.pop_to_reg(self, size);
|
||||
let divisor = context.pop_to_reg(self, None, size);
|
||||
|
||||
// Mark rax as allocatable.
|
||||
context.regalloc.free_gpr(rax);
|
||||
// Move the top value to rax.
|
||||
let rax = context.pop_to_named_reg(self, rax, size);
|
||||
let rax = context.pop_to_reg(self, Some(rax), size);
|
||||
self.asm.div(divisor, (rax, rdx), kind, size);
|
||||
|
||||
// Free the divisor and rdx.
|
||||
@@ -180,12 +206,12 @@ impl Masm for MacroAssembler {
|
||||
let rax = context.gpr(regs::rax(), self);
|
||||
|
||||
// Allocate the divisor, which can be any gpr.
|
||||
let divisor = context.pop_to_reg(self, size);
|
||||
let divisor = context.pop_to_reg(self, None, size);
|
||||
|
||||
// Mark rax as allocatable.
|
||||
context.regalloc.free_gpr(rax);
|
||||
// Move the top value to rax.
|
||||
let rax = context.pop_to_named_reg(self, rax, size);
|
||||
let rax = context.pop_to_reg(self, Some(rax), size);
|
||||
self.asm.rem(divisor, (rax, rdx), kind, size);
|
||||
|
||||
// Free the divisor and rax.
|
||||
@@ -225,7 +251,6 @@ impl MacroAssembler {
|
||||
self.sp_offset += bytes;
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn decrement_sp(&mut self, bytes: u32) {
|
||||
assert!(
|
||||
self.sp_offset >= bytes,
|
||||
|
||||
@@ -5,6 +5,7 @@ use crate::isa::x64::masm::MacroAssembler as X64Masm;
|
||||
use crate::masm::MacroAssembler;
|
||||
use crate::regalloc::RegAlloc;
|
||||
use crate::stack::Stack;
|
||||
use crate::FuncEnv;
|
||||
use crate::{
|
||||
isa::{Builder, TargetIsa},
|
||||
regset::RegSet,
|
||||
@@ -85,6 +86,7 @@ impl TargetIsa for X64 {
|
||||
&self,
|
||||
sig: &FuncType,
|
||||
body: &FunctionBody,
|
||||
env: &dyn FuncEnv,
|
||||
mut validator: FuncValidator<ValidatorResources>,
|
||||
) -> Result<MachBufferFinalized<Final>> {
|
||||
let mut body = body.get_binary_reader();
|
||||
@@ -96,7 +98,7 @@ impl TargetIsa for X64 {
|
||||
// TODO Add in floating point bitmask
|
||||
let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), regs::scratch());
|
||||
let codegen_context = CodeGenContext::new(regalloc, stack, &frame);
|
||||
let mut codegen = CodeGen::new::<abi::X64ABI>(&mut masm, codegen_context, abi_sig);
|
||||
let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig);
|
||||
|
||||
codegen.emit(&mut body, validator)?;
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ pub(crate) enum RemKind {
|
||||
}
|
||||
|
||||
/// Operand size, in bits.
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
|
||||
pub(crate) enum OperandSize {
|
||||
/// 32 bits.
|
||||
S32,
|
||||
@@ -87,9 +87,24 @@ pub(crate) trait MacroAssembler {
|
||||
/// Reserve stack space.
|
||||
fn reserve_stack(&mut self, bytes: u32);
|
||||
|
||||
/// Free stack space.
|
||||
fn free_stack(&mut self, bytes: u32);
|
||||
|
||||
/// Get the address of a local slot.
|
||||
fn local_address(&mut self, local: &LocalSlot) -> Self::Address;
|
||||
|
||||
/// Constructs an address with an offset that is relative to the
|
||||
/// current position of the stack pointer (e.g. [sp + (sp_offset -
|
||||
/// offset)].
|
||||
fn address_from_sp(&self, offset: u32) -> Self::Address;
|
||||
|
||||
/// Constructs an address with an offset that is absolute to the
|
||||
/// current position of the stack pointer (e.g. [sp + offset].
|
||||
fn address_at_sp(&self, offset: u32) -> Self::Address;
|
||||
|
||||
/// Emit a function call to a locally defined function.
|
||||
fn call(&mut self, callee: u32);
|
||||
|
||||
/// Get stack pointer offset.
|
||||
fn sp_offset(&mut self) -> u32;
|
||||
|
||||
@@ -99,6 +114,9 @@ pub(crate) trait MacroAssembler {
|
||||
/// Perform a stack load.
|
||||
fn load(&mut self, src: Self::Address, dst: Reg, size: OperandSize);
|
||||
|
||||
/// Pop a value from the machine stack into the given register.
|
||||
fn pop(&mut self, dst: Reg);
|
||||
|
||||
/// Perform a move.
|
||||
fn mov(&mut self, src: RegImm, dst: RegImm, size: OperandSize);
|
||||
|
||||
|
||||
@@ -35,6 +35,11 @@ impl RegAlloc {
|
||||
})
|
||||
}
|
||||
|
||||
/// Checks if a general purpose register is avaiable.
|
||||
pub fn gpr_available(&self, reg: Reg) -> bool {
|
||||
self.regset.named_gpr_available(reg.hw_enc() as u32)
|
||||
}
|
||||
|
||||
/// Request a specific general purpose register,
|
||||
/// spilling if not available.
|
||||
pub fn gpr<F>(&mut self, named: Reg, spill: &mut F) -> Reg
|
||||
|
||||
@@ -38,7 +38,9 @@ impl RegSet {
|
||||
self.gpr |= 1 << index;
|
||||
}
|
||||
|
||||
fn named_gpr_available(&self, index: u32) -> bool {
|
||||
/// Returns true if the given general purpose register
|
||||
/// is available.
|
||||
pub fn named_gpr_available(&self, index: u32) -> bool {
|
||||
let index = 1 << index;
|
||||
(!self.gpr & index) == 0
|
||||
}
|
||||
|
||||
@@ -45,6 +45,14 @@ impl Val {
|
||||
}
|
||||
}
|
||||
|
||||
/// Check wheter the value is a memory offset.
|
||||
pub fn is_mem(&self) -> bool {
|
||||
match *self {
|
||||
Self::Memory(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the register representation of the value.
|
||||
///
|
||||
/// # Panics
|
||||
@@ -109,6 +117,11 @@ impl Stack {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the length of the stack.
|
||||
pub fn len(&self) -> usize {
|
||||
self.inner.len()
|
||||
}
|
||||
|
||||
/// Push a value to the stack.
|
||||
pub fn push(&mut self, val: Val) {
|
||||
self.inner.push_back(val);
|
||||
@@ -119,6 +132,16 @@ impl Stack {
|
||||
self.inner.back()
|
||||
}
|
||||
|
||||
/// Returns an iterator referencing the last n items of the stack,
|
||||
/// in bottom-most to top-most order.
|
||||
pub fn peekn(&self, n: usize) -> impl Iterator<Item = &Val> + '_ {
|
||||
let len = self.len();
|
||||
assert!(n <= len);
|
||||
|
||||
let partition = len - n;
|
||||
self.inner.range(partition..)
|
||||
}
|
||||
|
||||
/// Pops the top element of the stack, if any.
|
||||
pub fn pop(&mut self) -> Option<Val> {
|
||||
self.inner.pop_back()
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
//! which validates and dispatches to the corresponding
|
||||
//! machine code emitter.
|
||||
|
||||
use crate::abi::ABI;
|
||||
use crate::codegen::CodeGen;
|
||||
use crate::masm::{DivKind, MacroAssembler, OperandSize, RegImm, RemKind};
|
||||
use crate::stack::Val;
|
||||
@@ -49,14 +50,16 @@ macro_rules! def_unsupported {
|
||||
(emit I64Sub $($rest:tt)*) => {};
|
||||
(emit LocalGet $($rest:tt)*) => {};
|
||||
(emit LocalSet $($rest:tt)*) => {};
|
||||
(emit Call $($rest:tt)*) => {};
|
||||
(emit End $($rest:tt)*) => {};
|
||||
|
||||
(emit $unsupported:tt $($rest:tt)*) => {$($rest)*};
|
||||
}
|
||||
|
||||
impl<'a, M> VisitOperator<'a> for CodeGen<'a, M>
|
||||
impl<'a, A, M> VisitOperator<'a> for CodeGen<'a, A, M>
|
||||
where
|
||||
M: MacroAssembler,
|
||||
A: ABI,
|
||||
{
|
||||
type Output = ();
|
||||
|
||||
@@ -188,12 +191,16 @@ where
|
||||
.get_local(index)
|
||||
.expect(&format!("vald local at slot = {}", index));
|
||||
let size: OperandSize = slot.ty.into();
|
||||
let src = self.context.pop_to_reg(self.masm, size);
|
||||
let src = self.context.pop_to_reg(self.masm, None, size);
|
||||
let addr = self.masm.local_address(&slot);
|
||||
self.masm.store(RegImm::reg(src), addr, size);
|
||||
self.context.regalloc.free_gpr(src);
|
||||
}
|
||||
|
||||
fn visit_call(&mut self, index: u32) {
|
||||
self.emit_call(index);
|
||||
}
|
||||
|
||||
wasmparser::for_each_operator!(def_unsupported);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user