winch(x64): Initial implementation for function calls (#6067)

* winch(x64): Initial implementation for function calls

This change adds the main building blocks for calling locally defined
functions. Support for function imports will be added iteratively after this
change lands and once trampolines are supported.

To support function calls, this change introduces the following functionality to
the MacroAssembler:

* `pop` to pop the machine stack into a given register, which in the case of
this change, translates to the x64 pop instruction.

* `call` to a emit a call to locally defined functions.

* `address_from_sp` to construct memory addresses with the SP as a base.

* `free_stack` to emit the necessary instrunctions to claim stack space.

The heavy lifting of setting up and emitting the function call is done through
the implementation of `FnCall`.

* Fix spill behaviour in function calls and add more documentation

This commits adds a more detailed documentation to the `call.rs` module.

It also fixes a couple of bugs, mainly:

* The previous commit didn't account for memory addresses used as arguments for
the function call, any memory entry in the value stack used as a function
argument should be tracked and then used to claim that memory when the function
call ends. We could `pop` and do this implicitly, but we can also track this
down and emit a single instruction to decrement the stack pointer, which will
result in better code.

* Introduce a differentiator between addresses relative or absolute to the stack
pointer. When passing arguments in the stack -- assuming that SP at that point
is aligned for the function call -- we should store the arguments relative to
the absolute position of the stack pointer and when addressing a memory entry in
the Wasm value stack, we should use an address relative to the offset and the
position of the stack pointer.

* Simplify tracking of the stack space needed for emitting a function call
This commit is contained in:
Saúl Cabrera
2023-03-28 14:30:31 -04:00
committed by GitHub
parent d54c00ba4d
commit af4d94c85a
22 changed files with 737 additions and 68 deletions

View File

@@ -1,7 +1,7 @@
use wasmparser::ValType;
/// Base register used to address the local slot.
///
/// Slots for stack arguments are addressed from the frame pointer
/// Slots for stack arguments are addressed from the frame pointer.
/// Slots for function-defined locals and for registers are addressed
/// from the stack pointer.
#[derive(Eq, PartialEq)]

View File

@@ -1,3 +1,47 @@
//! This module provides all the necessary building blocks for
//! implementing ISA specific ABIs.
//!
//! # Default ABI
//!
//! Winch uses a default internal ABI, for all internal functions.
//! This allows us to push the complexity of system ABI compliance to
//! the trampolines (not yet implemented). The default ABI treats all
//! allocatable registers as caller saved, which means that (i) all
//! register values in the Wasm value stack (which are normally
//! referred to as "live"), must be saved onto the machine stack (ii)
//! function prologues and epilogues don't store/restore other
//! registers more than the non-allocatable ones (e.g. rsp/rbp in
//! x86_64).
//!
//! The calling convention in the default ABI, uses registers to a
//! certain fixed count for arguments and return values, and then the
//! stack is used for all additional arguments.
//!
//! Generally the stack layout looks like:
//! +-------------------------------+
//! | |
//! | |
//! | Stack Args |
//! | |
//! | |
//! +-------------------------------+----> SP @ function entry
//! | Ret addr |
//! +-------------------------------+
//! | SP |
//! +-------------------------------+----> SP @ Function prologue
//! | |
//! | |
//! | |
//! | Stack slots |
//! | + dynamic space |
//! | |
//! | |
//! | |
//! +-------------------------------+----> SP @ callsite (after)
//! | alignment |
//! | + arguments |
//! | | ----> Space allocated for calls
//! | |
use crate::isa::reg::Reg;
use smallvec::SmallVec;
use std::ops::{Add, BitAnd, Not, Sub};
@@ -13,6 +57,9 @@ pub(crate) trait ABI {
/// The required stack alignment.
fn stack_align(&self) -> u8;
/// The required stack alignment for calls.
fn call_stack_align(&self) -> u8;
/// The offset to the argument base, relative to the frame pointer.
fn arg_base_offset(&self) -> u8;
@@ -117,11 +164,27 @@ impl ABIResult {
}
}
pub(crate) type ABIParams = SmallVec<[ABIArg; 6]>;
/// An ABI-specific representation of a function signature.
pub(crate) struct ABISig {
/// Function parameters.
pub params: SmallVec<[ABIArg; 6]>,
pub params: ABIParams,
/// Function result.
pub result: ABIResult,
/// Stack space needed for stack arguments.
pub stack_bytes: u32,
}
impl ABISig {
/// Create a new ABI signature.
pub fn new(params: ABIParams, result: ABIResult, stack_bytes: u32) -> Self {
Self {
params,
result,
stack_bytes,
}
}
}
/// Returns the size in bytes of a given WebAssembly type.

View File

@@ -0,0 +1,222 @@
//! Function call emission. For more details around the ABI and
//! calling convention, see [ABI].
use super::CodeGenContext;
use crate::{
abi::{align_to, ABIArg, ABIResult, ABISig, ABI},
masm::{MacroAssembler, OperandSize},
reg::Reg,
stack::Val,
};
/// All the information needed to emit a function call.
pub(crate) struct FnCall<'a> {
/// The total stack space in bytes used by the function call.
/// This amount includes the sum of:
///
/// 1. The amount of stack space that needs to be explicitly
/// allocated at the callsite for callee arguments that
/// go in the stack, plus any alignment.
/// 2. The amount of stack space created by saving any live
/// registers at the callsite.
/// 3. The amount of space used by any memory entries in the value
/// stack present at the callsite, that will be used as
/// arguments for the function call. Any memory values in the
/// value stack that are needed as part of the function
/// arguments, will be consumed by the function call (either by
/// assigning those values to a register or by storing those
/// values to a memory location if the callee argument is on
/// the stack), so we track that stack space to reclaim it once
/// the function call has ended. This could also be done in
/// `assign_args` everytime a memory entry needs to be assigned
/// to a particular location, but doing so, will incur in more
/// instructions (e.g. a pop per argument that needs to be
/// assigned); it's more efficient to track the space needed by
/// those memory values and reclaim it at once.
///
/// The machine stack state that this amount is capturing, is the following:
/// ┌──────────────────────────────────────────────────┐
/// │ │
/// │ │
/// │ Stack space created by any previous spills │
/// │ from the value stack; and which memory values │
/// │ are used as function arguments. │
/// │ │
/// ├──────────────────────────────────────────────────┤ ---> The Wasm value stack at this point in time would look like:
/// │ │ [ Reg | Reg | Mem(offset) | Mem(offset) ]
/// │ │
/// │ Stack space created by saving │
/// │ any live registers at the callsite. │
/// │ │
/// │ │
/// ├─────────────────────────────────────────────────┬┤ ---> The Wasm value stack at this point in time would look like:
/// │ │ [ Mem(offset) | Mem(offset) | Mem(offset) | Mem(offset) ]
/// │ │ Assuming that the callee takes 4 arguments, we calculate
/// │ │ 2 spilled registers + 2 memory values; all of which will be used
/// │ Stack space allocated for │ as arguments to the call via `assign_args`, thus the memory they represent is
/// │ the callee function arguments in the stack; │ is considered to be consumed by the call.
/// │ represented by `arg_stack_space` │
/// │ │
/// │ │
/// │ │
/// └──────────────────────────────────────────────────┘ ------> Stack pointer when emitting the call
///
total_stack_space: u32,
/// The total stack space needed for the callee arguments on the
/// stack, including any adjustments to the function's frame and
/// aligned to to the required ABI alignment.
arg_stack_space: u32,
/// The ABI-specific signature of the callee.
abi_sig: &'a ABISig,
/// The stack pointer offset prior to preparing and emitting the
/// call. This is tracked to assert the position of the stack
/// pointer after the call has finished.
sp_offset_at_callsite: u32,
}
impl<'a> FnCall<'a> {
/// Allocate and setup a new function call.
///
/// The setup process, will first save all the live registers in
/// the value stack, tracking down those spilled for the function
/// arguments(see comment below for more details) it will also
/// track all the memory entries consumed by the function
/// call. Then, it will calculate any adjustments needed to ensure
/// the alignment of the caller's frame. It's important to note
/// that the order of operations in the setup is important, as we
/// want to calculate any adjustments to the caller's frame, after
/// having saved any live registers, so that we can account for
/// any pushes generated by register spilling.
pub fn new<A: ABI, M: MacroAssembler>(
abi: &A,
callee_sig: &'a ABISig,
context: &mut CodeGenContext,
masm: &mut M,
) -> Self {
let stack = &context.stack;
let arg_stack_space = callee_sig.stack_bytes;
let callee_params = &callee_sig.params;
let sp_offset_at_callsite = masm.sp_offset();
let (spilled_regs, memory_values) = match callee_params.len() {
0 => {
let _ = context.spill_regs_and_count_memory_in(masm, ..);
(0, 0)
}
_ => {
// Here we perform a "spill" of the register entries
// in the Wasm value stack, we also count any memory
// values that will be used used as part of the callee
// arguments. Saving the live registers is done by
// emitting push operations for every `Reg` entry in
// the Wasm value stack. We do this to be compliant
// with Winch's internal ABI, in which all registers
// are treated as caller-saved. For more details, see
// [ABI].
//
// The next few lines, partition the value stack into
// two sections:
// +------------------+--+--- (Stack top)
// | | |
// | | | 1. The top `n` elements, which are used for
// | | | function arguments; for which we save any
// | | | live registers, keeping track of the amount of registers
// +------------------+ | saved plus the amount of memory values consumed by the function call;
// | | | with this information we can later reclaim the space used by the function call.
// | | |
// +------------------+--+---
// | | | 2. The rest of the items in the stack, for which
// | | | we only save any live registers.
// | | |
// +------------------+ |
assert!(stack.len() >= callee_params.len());
let partition = stack.len() - callee_params.len();
let _ = context.spill_regs_and_count_memory_in(masm, 0..partition);
context.spill_regs_and_count_memory_in(masm, partition..)
}
};
let delta = calculate_frame_adjustment(
masm.sp_offset(),
abi.arg_base_offset() as u32,
abi.call_stack_align() as u32,
);
let arg_stack_space = align_to(arg_stack_space + delta, abi.call_stack_align() as u32);
Self {
abi_sig: &callee_sig,
arg_stack_space,
total_stack_space: (spilled_regs * <A as ABI>::word_bytes())
+ (memory_values * <A as ABI>::word_bytes())
+ arg_stack_space,
sp_offset_at_callsite,
}
}
/// Emit the function call.
pub fn emit<M: MacroAssembler, A: ABI>(
&self,
masm: &mut M,
context: &mut CodeGenContext,
callee: u32,
) {
masm.reserve_stack(self.arg_stack_space);
self.assign_args(context, masm, <A as ABI>::scratch_reg());
masm.call(callee);
masm.free_stack(self.total_stack_space);
context.drop_last(self.abi_sig.params.len());
// The stack pointer at the end of the function call
// cannot be less than what it was when starting the
// function call.
assert!(self.sp_offset_at_callsite >= masm.sp_offset());
self.handle_result(context, masm);
}
fn assign_args<M: MacroAssembler>(
&self,
context: &mut CodeGenContext,
masm: &mut M,
scratch: Reg,
) {
let arg_count = self.abi_sig.params.len();
let stack = &context.stack;
let mut stack_values = stack.peekn(arg_count);
for arg in &self.abi_sig.params {
let val = stack_values
.next()
.unwrap_or_else(|| panic!("expected stack value for function argument"));
match &arg {
&ABIArg::Reg { ty, reg } => {
context.move_val_to_reg(&val, *reg, masm, (*ty).into());
}
&ABIArg::Stack { ty, offset } => {
let addr = masm.address_at_sp(*offset);
let size: OperandSize = (*ty).into();
context.move_val_to_reg(val, scratch, masm, size);
masm.store(scratch.into(), addr, size);
}
}
}
}
fn handle_result<M: MacroAssembler>(&self, context: &mut CodeGenContext, masm: &mut M) {
let result = &self.abi_sig.result;
if result.is_void() {
return;
}
match result {
&ABIResult::Reg { ty: _, reg } => {
assert!(context.regalloc.gpr_available(reg));
let result_reg = Val::reg(context.gpr(reg, masm));
context.stack.push(result_reg);
}
}
}
}
/// Calculates the delta needed to adjust a function's frame plus some
/// addend to a given alignment.
fn calculate_frame_adjustment(frame_size: u32, addend: u32, alignment: u32) -> u32 {
let total = frame_size + addend;
(alignment - (total % alignment)) % alignment
}

View File

@@ -5,6 +5,7 @@ use crate::{
regalloc::RegAlloc,
stack::{Stack, Val},
};
use std::ops::RangeBounds;
/// The code generation context.
/// The code generation context is made up of three
@@ -60,58 +61,66 @@ impl<'a> CodeGenContext<'a> {
self.regalloc.free_gpr(reg);
}
/// Loads the stack top value into a register, if it isn't already one;
/// spilling if there are no registers available.
pub fn pop_to_reg<M: MacroAssembler>(&mut self, masm: &mut M, size: OperandSize) -> Reg {
if let Some(reg) = self.stack.pop_reg() {
return reg;
}
let dst = self.any_gpr(masm);
let val = self.stack.pop().expect("a value at stack top");
Self::move_val_to_reg(val, dst, masm, self.frame, size);
dst
}
/// Checks if the stack top contains the given register. The register
/// gets allocated otherwise, potentially causing a spill.
/// Once the requested register is allocated, the value at the top of the stack
/// gets loaded into the register.
pub fn pop_to_named_reg<M: MacroAssembler>(
/// Loads the stack top value into the next available register, if
/// it isn't already one; spilling if there are no registers
/// available. Optionally the caller may specify a specific
/// destination register.
pub fn pop_to_reg<M: MacroAssembler>(
&mut self,
masm: &mut M,
named: Reg,
named: Option<Reg>,
size: OperandSize,
) -> Reg {
if let Some(reg) = self.stack.pop_named_reg(named) {
return reg;
let (in_stack, dst) = if let Some(dst) = named {
self.stack
.pop_named_reg(dst)
.map(|reg| (true, reg))
.unwrap_or_else(|| (false, self.gpr(dst, masm)))
} else {
self.stack
.pop_reg()
.map(|reg| (true, reg))
.unwrap_or_else(|| (false, self.any_gpr(masm)))
};
if in_stack {
return dst;
}
let dst = self.gpr(named, masm);
let val = self.stack.pop().expect("a value at stack top");
Self::move_val_to_reg(val, dst, masm, self.frame, size);
if val.is_mem() {
masm.pop(dst);
} else {
self.move_val_to_reg(&val, dst, masm, size);
}
dst
}
fn move_val_to_reg<M: MacroAssembler>(
src: Val,
/// Move a stack value to the given register.
pub fn move_val_to_reg<M: MacroAssembler>(
&self,
src: &Val,
dst: Reg,
masm: &mut M,
frame: &Frame,
size: OperandSize,
) {
match src {
Val::Reg(src) => masm.mov(RegImm::reg(src), RegImm::reg(dst), size),
Val::I32(imm) => masm.mov(RegImm::imm(imm.into()), RegImm::reg(dst), size),
Val::I64(imm) => masm.mov(RegImm::imm(imm), RegImm::reg(dst), size),
Val::Reg(src) => masm.mov(RegImm::reg(*src), RegImm::reg(dst), size),
Val::I32(imm) => masm.mov(RegImm::imm((*imm).into()), RegImm::reg(dst), size),
Val::I64(imm) => masm.mov(RegImm::imm(*imm), RegImm::reg(dst), size),
Val::Local(index) => {
let slot = frame
.get_local(index)
.expect(&format!("valid locat at index = {}", index));
let slot = self
.frame
.get_local(*index)
.unwrap_or_else(|| panic!("valid local at index = {}", index));
let addr = masm.local_address(&slot);
masm.load(addr, dst, slot.ty.into());
}
v => panic!("Unsupported value {:?}", v),
Val::Memory(offset) => {
let addr = masm.address_from_sp(*offset);
masm.load(addr, dst, size);
}
};
}
@@ -128,7 +137,7 @@ impl<'a> CodeGenContext<'a> {
.stack
.pop_i32_const()
.expect("i32 const value at stack top");
let reg = self.pop_to_reg(masm, OperandSize::S32);
let reg = self.pop_to_reg(masm, None, OperandSize::S32);
emit(
masm,
RegImm::reg(reg),
@@ -137,8 +146,8 @@ impl<'a> CodeGenContext<'a> {
);
self.stack.push(Val::reg(reg));
} else {
let src = self.pop_to_reg(masm, OperandSize::S32);
let dst = self.pop_to_reg(masm, OperandSize::S32);
let src = self.pop_to_reg(masm, None, OperandSize::S32);
let dst = self.pop_to_reg(masm, None, OperandSize::S32);
emit(masm, dst.into(), src.into(), OperandSize::S32);
self.regalloc.free_gpr(src);
self.stack.push(Val::reg(dst));
@@ -157,18 +166,64 @@ impl<'a> CodeGenContext<'a> {
.stack
.pop_i64_const()
.expect("i64 const value at stack top");
let reg = self.pop_to_reg(masm, OperandSize::S64);
let reg = self.pop_to_reg(masm, None, OperandSize::S64);
emit(masm, RegImm::reg(reg), RegImm::imm(val), OperandSize::S64);
self.stack.push(Val::reg(reg));
} else {
let src = self.pop_to_reg(masm, OperandSize::S64);
let dst = self.pop_to_reg(masm, OperandSize::S64);
let src = self.pop_to_reg(masm, None, OperandSize::S64);
let dst = self.pop_to_reg(masm, None, OperandSize::S64);
emit(masm, dst.into(), src.into(), OperandSize::S64);
self.regalloc.free_gpr(src);
self.stack.push(Val::reg(dst));
}
}
/// Saves any live registers in the value stack in a particular
/// range defined by the caller. This is a specialization of the
/// spill function; made available for cases in which spilling
/// locals is not required, like for example for function calls in
/// which locals are not reachable by the callee. It also tracks
/// down the number of memory values in the given range.
///
/// Returns the number of spilled registers and the number of
/// memory values in the given range of the value stack.
pub fn spill_regs_and_count_memory_in<M, R>(&mut self, masm: &mut M, range: R) -> (u32, u32)
where
R: RangeBounds<usize>,
M: MacroAssembler,
{
let mut spilled: u32 = 0;
let mut memory_values = 0;
for i in self.stack.inner_mut().range_mut(range) {
if i.is_reg() {
let reg = i.get_reg();
let offset = masm.push(reg);
self.regalloc.free_gpr(reg);
*i = Val::Memory(offset);
spilled += 1;
} else if i.is_mem() {
memory_values += 1;
}
}
(spilled, memory_values)
}
/// Drops the last `n` elements of the stack, freeing any
/// registers located in that region.
pub fn drop_last(&mut self, last: usize) {
let len = self.stack.len();
assert!(last <= len);
let truncate = self.stack.len() - last;
self.stack.inner_mut().range(truncate..).for_each(|v| {
if v.is_reg() {
self.regalloc.free_gpr(v.get_reg());
}
});
self.stack.inner_mut().truncate(truncate);
}
/// Spill locals and registers to memory.
// TODO optimize the spill range;
//

View File

@@ -3,21 +3,21 @@ use crate::{
masm::{MacroAssembler, OperandSize},
};
use anyhow::Result;
use call::FnCall;
use wasmparser::{BinaryReader, FuncValidator, ValType, ValidatorResources, VisitOperator};
mod context;
pub(crate) use context::*;
mod env;
pub use env::*;
mod call;
/// The code generation abstraction.
pub(crate) struct CodeGen<'a, M>
pub(crate) struct CodeGen<'a, A, M>
where
M: MacroAssembler,
A: ABI,
{
/// The word size in bytes, extracted from the current ABI.
word_size: u32,
/// The ABI-specific representation of the function signature, excluding results.
sig: ABISig,
@@ -26,18 +26,32 @@ where
/// The MacroAssembler.
pub masm: &'a mut M,
/// A reference to the function compilation environment.
pub env: &'a dyn env::FuncEnv,
/// A reference to the current ABI.
pub abi: &'a A,
}
impl<'a, M> CodeGen<'a, M>
impl<'a, A, M> CodeGen<'a, A, M>
where
M: MacroAssembler,
A: ABI,
{
pub fn new<A: ABI>(masm: &'a mut M, context: CodeGenContext<'a>, sig: ABISig) -> Self {
pub fn new(
masm: &'a mut M,
abi: &'a A,
context: CodeGenContext<'a>,
env: &'a dyn FuncEnv,
sig: ABISig,
) -> Self {
Self {
word_size: <A as ABI>::word_bytes(),
sig,
context,
masm,
abi,
env,
}
}
@@ -70,7 +84,7 @@ where
let defined_locals_range = &self.context.frame.defined_locals_range;
self.masm.zero_mem_range(
defined_locals_range.as_range(),
self.word_size,
<A as ABI>::word_bytes(),
&mut self.context.regalloc,
);
@@ -105,8 +119,21 @@ where
}
}
// Emit the usual function end instruction sequence.
pub fn emit_end(&mut self) -> Result<()> {
/// Emit a direct function call.
pub fn emit_call(&mut self, index: u32) {
let callee = self.env.callee_from_index(index);
if callee.import {
// TODO: Only locally defined functions for now.
unreachable!()
}
let sig = self.abi.sig(&callee.ty);
let fncall = FnCall::new(self.abi, &sig, &mut self.context, self.masm);
fncall.emit::<M, A>(self.masm, &mut self.context, index);
}
/// Emit the usual function end instruction sequence.
fn emit_end(&mut self) -> Result<()> {
self.handle_abi_result();
self.masm.epilogue(self.context.frame.locals_size);
Ok(())
@@ -149,7 +176,7 @@ where
let named_reg = self.sig.result.result_reg();
let reg = self
.context
.pop_to_named_reg(self.masm, named_reg, OperandSize::S64);
.pop_to_reg(self.masm, Some(named_reg), OperandSize::S64);
self.context.regalloc.free_gpr(reg);
}
}

View File

@@ -47,6 +47,10 @@ impl ABI for Aarch64ABI {
8
}
fn call_stack_align(&self) -> u8 {
16
}
fn arg_base_offset(&self) -> u8 {
16
}
@@ -74,7 +78,7 @@ impl ABI for Aarch64ABI {
let reg = regs::xreg(0);
let result = ABIResult::reg(ty, reg);
ABISig { params, result }
ABISig::new(params, result, stack_offset)
}
fn scratch_reg() -> Reg {

View File

@@ -96,6 +96,10 @@ impl Masm for MacroAssembler {
self.increment_sp(bytes);
}
fn free_stack(&mut self, _bytes: u32) {
todo!()
}
fn local_address(&mut self, local: &LocalSlot) -> Address {
let (reg, offset) = local
.addressed_from_sp()
@@ -111,6 +115,14 @@ impl Masm for MacroAssembler {
Address::offset(reg, offset as i64)
}
fn address_from_sp(&self, _offset: u32) -> Self::Address {
todo!()
}
fn address_at_sp(&self, _offset: u32) -> Self::Address {
todo!()
}
fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) {
let src = match src {
RegImm::Imm(imm) => {
@@ -124,10 +136,18 @@ impl Masm for MacroAssembler {
self.asm.str(src, dst, size);
}
fn call(&mut self, _callee: u32) {
todo!()
}
fn load(&mut self, src: Address, dst: Reg, size: OperandSize) {
self.asm.ldr(src, dst, size);
}
fn pop(&mut self, _dst: Reg) {
todo!()
}
fn sp_offset(&mut self) -> u32 {
self.sp_offset
}

View File

@@ -8,6 +8,7 @@ use crate::{
regalloc::RegAlloc,
regset::RegSet,
stack::Stack,
FuncEnv,
};
use anyhow::Result;
use cranelift_codegen::settings::{self, Flags};
@@ -84,6 +85,7 @@ impl TargetIsa for Aarch64 {
&self,
sig: &FuncType,
body: &FunctionBody,
env: &dyn FuncEnv,
mut validator: FuncValidator<ValidatorResources>,
) -> Result<MachBufferFinalized<Final>> {
let mut body = body.get_binary_reader();
@@ -95,7 +97,7 @@ impl TargetIsa for Aarch64 {
// TODO: Add floating point bitmask
let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), scratch());
let codegen_context = CodeGenContext::new(regalloc, stack, &frame);
let mut codegen = CodeGen::new::<abi::Aarch64ABI>(&mut masm, codegen_context, abi_sig);
let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig);
codegen.emit(&mut body, validator)?;
Ok(masm.finalize())

View File

@@ -10,6 +10,8 @@ use std::{
use target_lexicon::{Architecture, Triple};
use wasmparser::{FuncType, FuncValidator, FunctionBody, ValidatorResources};
use crate::FuncEnv;
#[cfg(feature = "x64")]
pub(crate) mod x64;
@@ -92,6 +94,7 @@ pub trait TargetIsa: Send + Sync {
&self,
sig: &FuncType,
body: &FunctionBody,
env: &dyn FuncEnv,
validator: FuncValidator<ValidatorResources>,
) -> Result<MachBufferFinalized<Final>>;

View File

@@ -39,6 +39,10 @@ impl ABI for X64ABI {
8
}
fn call_stack_align(&self) -> u8 {
16
}
fn arg_base_offset(&self) -> u8 {
// Two 8-byte slots, one for the return address and another
// one for the frame pointer.
@@ -75,7 +79,7 @@ impl ABI for X64ABI {
let reg = regs::rax();
let result = ABIResult::reg(ty, reg);
ABISig { params, result }
ABISig::new(params, result, stack_offset)
}
fn scratch_reg() -> Reg {

View File

@@ -5,18 +5,21 @@ use crate::{
masm::{DivKind, OperandSize, RemKind},
};
use cranelift_codegen::{
entity::EntityRef,
ir::TrapCode,
ir::{ExternalName, Opcode, UserExternalNameRef},
isa::x64::{
args::{
self, AluRmiROpcode, Amode, CmpOpcode, DivSignedness, ExtMode, FromWritableReg, Gpr,
GprMem, GprMemImm, RegMem, RegMemImm, SyntheticAmode, WritableGpr, CC,
},
settings as x64_settings, EmitInfo, EmitState, Inst,
settings as x64_settings, CallInfo, EmitInfo, EmitState, Inst,
},
settings, Final, MachBuffer, MachBufferFinalized, MachInstEmit, Writable,
};
use super::{address::Address, regs};
use smallvec::smallvec;
/// A x64 instruction operand.
#[derive(Debug, Copy, Clone)]
@@ -465,4 +468,18 @@ impl Assembler {
dst: dst.into(),
});
}
/// Direct function call to a user defined function.
pub fn call(&mut self, callee: u32) {
let dest = ExternalName::user(UserExternalNameRef::new(callee as usize));
self.emit(Inst::CallKnown {
dest,
info: Box::new(CallInfo {
uses: smallvec![],
defs: smallvec![],
clobbers: Default::default(),
opcode: Opcode::Call,
}),
});
}
}

View File

@@ -70,6 +70,14 @@ impl Masm for MacroAssembler {
self.increment_sp(bytes);
}
fn free_stack(&mut self, bytes: u32) {
if bytes == 0 {
return;
}
self.asm.add_ir(bytes as i32, rsp(), OperandSize::S64);
self.decrement_sp(bytes);
}
fn local_address(&mut self, local: &LocalSlot) -> Address {
let (reg, offset) = local
.addressed_from_sp()
@@ -85,6 +93,14 @@ impl Masm for MacroAssembler {
Address::offset(reg, offset)
}
fn address_from_sp(&self, offset: u32) -> Self::Address {
Address::offset(regs::rsp(), self.sp_offset - offset)
}
fn address_at_sp(&self, offset: u32) -> Self::Address {
Address::offset(regs::rsp(), offset)
}
fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) {
let src: Operand = src.into();
let dst: Operand = dst.into();
@@ -92,6 +108,16 @@ impl Masm for MacroAssembler {
self.asm.mov(src, dst, size);
}
fn pop(&mut self, dst: Reg) {
self.asm.pop_r(dst);
// Similar to the comment in `push`, we assume 8 bytes per pop.
self.decrement_sp(8);
}
fn call(&mut self, callee: u32) {
self.asm.call(callee);
}
fn load(&mut self, src: Address, dst: Reg, size: OperandSize) {
let src = src.into();
let dst = dst.into();
@@ -158,12 +184,12 @@ impl Masm for MacroAssembler {
let rax = context.gpr(regs::rax(), self);
// Allocate the divisor, which can be any gpr.
let divisor = context.pop_to_reg(self, size);
let divisor = context.pop_to_reg(self, None, size);
// Mark rax as allocatable.
context.regalloc.free_gpr(rax);
// Move the top value to rax.
let rax = context.pop_to_named_reg(self, rax, size);
let rax = context.pop_to_reg(self, Some(rax), size);
self.asm.div(divisor, (rax, rdx), kind, size);
// Free the divisor and rdx.
@@ -180,12 +206,12 @@ impl Masm for MacroAssembler {
let rax = context.gpr(regs::rax(), self);
// Allocate the divisor, which can be any gpr.
let divisor = context.pop_to_reg(self, size);
let divisor = context.pop_to_reg(self, None, size);
// Mark rax as allocatable.
context.regalloc.free_gpr(rax);
// Move the top value to rax.
let rax = context.pop_to_named_reg(self, rax, size);
let rax = context.pop_to_reg(self, Some(rax), size);
self.asm.rem(divisor, (rax, rdx), kind, size);
// Free the divisor and rax.
@@ -225,7 +251,6 @@ impl MacroAssembler {
self.sp_offset += bytes;
}
#[allow(dead_code)]
fn decrement_sp(&mut self, bytes: u32) {
assert!(
self.sp_offset >= bytes,

View File

@@ -5,6 +5,7 @@ use crate::isa::x64::masm::MacroAssembler as X64Masm;
use crate::masm::MacroAssembler;
use crate::regalloc::RegAlloc;
use crate::stack::Stack;
use crate::FuncEnv;
use crate::{
isa::{Builder, TargetIsa},
regset::RegSet,
@@ -85,6 +86,7 @@ impl TargetIsa for X64 {
&self,
sig: &FuncType,
body: &FunctionBody,
env: &dyn FuncEnv,
mut validator: FuncValidator<ValidatorResources>,
) -> Result<MachBufferFinalized<Final>> {
let mut body = body.get_binary_reader();
@@ -96,7 +98,7 @@ impl TargetIsa for X64 {
// TODO Add in floating point bitmask
let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), regs::scratch());
let codegen_context = CodeGenContext::new(regalloc, stack, &frame);
let mut codegen = CodeGen::new::<abi::X64ABI>(&mut masm, codegen_context, abi_sig);
let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig);
codegen.emit(&mut body, validator)?;

View File

@@ -22,7 +22,7 @@ pub(crate) enum RemKind {
}
/// Operand size, in bits.
#[derive(Copy, Clone, Eq, PartialEq)]
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
pub(crate) enum OperandSize {
/// 32 bits.
S32,
@@ -87,9 +87,24 @@ pub(crate) trait MacroAssembler {
/// Reserve stack space.
fn reserve_stack(&mut self, bytes: u32);
/// Free stack space.
fn free_stack(&mut self, bytes: u32);
/// Get the address of a local slot.
fn local_address(&mut self, local: &LocalSlot) -> Self::Address;
/// Constructs an address with an offset that is relative to the
/// current position of the stack pointer (e.g. [sp + (sp_offset -
/// offset)].
fn address_from_sp(&self, offset: u32) -> Self::Address;
/// Constructs an address with an offset that is absolute to the
/// current position of the stack pointer (e.g. [sp + offset].
fn address_at_sp(&self, offset: u32) -> Self::Address;
/// Emit a function call to a locally defined function.
fn call(&mut self, callee: u32);
/// Get stack pointer offset.
fn sp_offset(&mut self) -> u32;
@@ -99,6 +114,9 @@ pub(crate) trait MacroAssembler {
/// Perform a stack load.
fn load(&mut self, src: Self::Address, dst: Reg, size: OperandSize);
/// Pop a value from the machine stack into the given register.
fn pop(&mut self, dst: Reg);
/// Perform a move.
fn mov(&mut self, src: RegImm, dst: RegImm, size: OperandSize);

View File

@@ -35,6 +35,11 @@ impl RegAlloc {
})
}
/// Checks if a general purpose register is avaiable.
pub fn gpr_available(&self, reg: Reg) -> bool {
self.regset.named_gpr_available(reg.hw_enc() as u32)
}
/// Request a specific general purpose register,
/// spilling if not available.
pub fn gpr<F>(&mut self, named: Reg, spill: &mut F) -> Reg

View File

@@ -38,7 +38,9 @@ impl RegSet {
self.gpr |= 1 << index;
}
fn named_gpr_available(&self, index: u32) -> bool {
/// Returns true if the given general purpose register
/// is available.
pub fn named_gpr_available(&self, index: u32) -> bool {
let index = 1 << index;
(!self.gpr & index) == 0
}

View File

@@ -45,6 +45,14 @@ impl Val {
}
}
/// Check wheter the value is a memory offset.
pub fn is_mem(&self) -> bool {
match *self {
Self::Memory(_) => true,
_ => false,
}
}
/// Get the register representation of the value.
///
/// # Panics
@@ -109,6 +117,11 @@ impl Stack {
}
}
/// Get the length of the stack.
pub fn len(&self) -> usize {
self.inner.len()
}
/// Push a value to the stack.
pub fn push(&mut self, val: Val) {
self.inner.push_back(val);
@@ -119,6 +132,16 @@ impl Stack {
self.inner.back()
}
/// Returns an iterator referencing the last n items of the stack,
/// in bottom-most to top-most order.
pub fn peekn(&self, n: usize) -> impl Iterator<Item = &Val> + '_ {
let len = self.len();
assert!(n <= len);
let partition = len - n;
self.inner.range(partition..)
}
/// Pops the top element of the stack, if any.
pub fn pop(&mut self) -> Option<Val> {
self.inner.pop_back()

View File

@@ -4,6 +4,7 @@
//! which validates and dispatches to the corresponding
//! machine code emitter.
use crate::abi::ABI;
use crate::codegen::CodeGen;
use crate::masm::{DivKind, MacroAssembler, OperandSize, RegImm, RemKind};
use crate::stack::Val;
@@ -49,14 +50,16 @@ macro_rules! def_unsupported {
(emit I64Sub $($rest:tt)*) => {};
(emit LocalGet $($rest:tt)*) => {};
(emit LocalSet $($rest:tt)*) => {};
(emit Call $($rest:tt)*) => {};
(emit End $($rest:tt)*) => {};
(emit $unsupported:tt $($rest:tt)*) => {$($rest)*};
}
impl<'a, M> VisitOperator<'a> for CodeGen<'a, M>
impl<'a, A, M> VisitOperator<'a> for CodeGen<'a, A, M>
where
M: MacroAssembler,
A: ABI,
{
type Output = ();
@@ -188,12 +191,16 @@ where
.get_local(index)
.expect(&format!("vald local at slot = {}", index));
let size: OperandSize = slot.ty.into();
let src = self.context.pop_to_reg(self.masm, size);
let src = self.context.pop_to_reg(self.masm, None, size);
let addr = self.masm.local_address(&slot);
self.masm.store(RegImm::reg(src), addr, size);
self.context.regalloc.free_gpr(src);
}
fn visit_call(&mut self, index: u32) {
self.emit_call(index);
}
wasmparser::for_each_operator!(def_unsupported);
}

View File

@@ -0,0 +1,121 @@
;;! target = "x86_64"
(module
(func (export "main") (param i32) (param i32) (result i32)
(local.get 1)
(local.get 0)
(i32.div_u)
(call $add (i32.const 1) (i32.const 2) (i32.const 3) (i32.const 4) (i32.const 5) (i32.const 6) (i32.const 7) (i32.const 8))
(local.get 1)
(local.get 0)
(i32.div_u)
(call $add (i32.const 2) (i32.const 3) (i32.const 4) (i32.const 5) (i32.const 6) (i32.const 7) (i32.const 8))
)
(func $add (param i32 i32 i32 i32 i32 i32 i32 i32 i32) (result i32)
(local.get 0)
(local.get 1)
(i32.add)
(local.get 2)
(i32.add)
(local.get 3)
(i32.add)
(local.get 4)
(i32.add)
(local.get 5)
(i32.add)
(local.get 6)
(i32.add)
(local.get 7)
(i32.add)
(local.get 8)
(i32.add)
)
)
;; 0: 55 push rbp
;; 1: 4889e5 mov rbp, rsp
;; 4: 4883ec08 sub rsp, 8
;; 8: 897c2404 mov dword ptr [rsp + 4], edi
;; c: 893424 mov dword ptr [rsp], esi
;; f: 8b4c2404 mov ecx, dword ptr [rsp + 4]
;; 13: 8b0424 mov eax, dword ptr [rsp]
;; 16: 31d2 xor edx, edx
;; 18: f7f1 div ecx
;; 1a: 50 push rax
;; 1b: 4883ec20 sub rsp, 0x20
;; 1f: 8b7c2420 mov edi, dword ptr [rsp + 0x20]
;; 23: be01000000 mov esi, 1
;; 28: ba02000000 mov edx, 2
;; 2d: b903000000 mov ecx, 3
;; 32: 41b804000000 mov r8d, 4
;; 38: 41b905000000 mov r9d, 5
;; 3e: 41bb06000000 mov r11d, 6
;; 44: 44891c24 mov dword ptr [rsp], r11d
;; 48: 41bb07000000 mov r11d, 7
;; 4e: 44895c2408 mov dword ptr [rsp + 8], r11d
;; 53: 41bb08000000 mov r11d, 8
;; 59: 44895c2410 mov dword ptr [rsp + 0x10], r11d
;; 5e: e800000000 call 0x63
;; 63: 4883c428 add rsp, 0x28
;; 67: 50 push rax
;; 68: 448b5c2408 mov r11d, dword ptr [rsp + 8]
;; 6d: 4153 push r11
;; 6f: 448b5c2414 mov r11d, dword ptr [rsp + 0x14]
;; 74: 4153 push r11
;; 76: 59 pop rcx
;; 77: 58 pop rax
;; 78: 31d2 xor edx, edx
;; 7a: f7f1 div ecx
;; 7c: 50 push rax
;; 7d: 4883ec20 sub rsp, 0x20
;; 81: 8b7c2428 mov edi, dword ptr [rsp + 0x28]
;; 85: 8b742420 mov esi, dword ptr [rsp + 0x20]
;; 89: ba02000000 mov edx, 2
;; 8e: b903000000 mov ecx, 3
;; 93: 41b804000000 mov r8d, 4
;; 99: 41b905000000 mov r9d, 5
;; 9f: 41bb06000000 mov r11d, 6
;; a5: 44891c24 mov dword ptr [rsp], r11d
;; a9: 41bb07000000 mov r11d, 7
;; af: 44895c2408 mov dword ptr [rsp + 8], r11d
;; b4: 41bb08000000 mov r11d, 8
;; ba: 44895c2410 mov dword ptr [rsp + 0x10], r11d
;; bf: e800000000 call 0xc4
;; c4: 4883c430 add rsp, 0x30
;; c8: 4883c408 add rsp, 8
;; cc: 5d pop rbp
;; cd: c3 ret
;;
;; 0: 55 push rbp
;; 1: 4889e5 mov rbp, rsp
;; 4: 4883ec18 sub rsp, 0x18
;; 8: 897c2414 mov dword ptr [rsp + 0x14], edi
;; c: 89742410 mov dword ptr [rsp + 0x10], esi
;; 10: 8954240c mov dword ptr [rsp + 0xc], edx
;; 14: 894c2408 mov dword ptr [rsp + 8], ecx
;; 18: 4489442404 mov dword ptr [rsp + 4], r8d
;; 1d: 44890c24 mov dword ptr [rsp], r9d
;; 21: 8b442410 mov eax, dword ptr [rsp + 0x10]
;; 25: 8b4c2414 mov ecx, dword ptr [rsp + 0x14]
;; 29: 01c1 add ecx, eax
;; 2b: 8b44240c mov eax, dword ptr [rsp + 0xc]
;; 2f: 01c1 add ecx, eax
;; 31: 8b442408 mov eax, dword ptr [rsp + 8]
;; 35: 01c1 add ecx, eax
;; 37: 8b442404 mov eax, dword ptr [rsp + 4]
;; 3b: 01c1 add ecx, eax
;; 3d: 8b0424 mov eax, dword ptr [rsp]
;; 40: 01c1 add ecx, eax
;; 42: 8b4510 mov eax, dword ptr [rbp + 0x10]
;; 45: 01c1 add ecx, eax
;; 47: 8b4518 mov eax, dword ptr [rbp + 0x18]
;; 4a: 01c1 add ecx, eax
;; 4c: 8b4520 mov eax, dword ptr [rbp + 0x20]
;; 4f: 01c1 add ecx, eax
;; 51: 4889c8 mov rax, rcx
;; 54: 4883c418 add rsp, 0x18
;; 58: 5d pop rbp
;; 59: c3 ret

View File

@@ -0,0 +1,49 @@
;;! target = "x86_64"
(module
(func $main (result i32)
(local $var i32)
(call $product (i32.const 20) (i32.const 80))
(local.set $var (i32.const 2))
(local.get $var)
(i32.div_u))
(func $product (param i32 i32) (result i32)
(local.get 0)
(local.get 1)
(i32.mul))
)
;; 0: 55 push rbp
;; 1: 4889e5 mov rbp, rsp
;; 4: 4883ec08 sub rsp, 8
;; 8: 48c7042400000000 mov qword ptr [rsp], 0
;; 10: 4883ec10 sub rsp, 0x10
;; 14: bf14000000 mov edi, 0x14
;; 19: be50000000 mov esi, 0x50
;; 1e: e800000000 call 0x23
;; 23: 4883c410 add rsp, 0x10
;; 27: b902000000 mov ecx, 2
;; 2c: 894c2404 mov dword ptr [rsp + 4], ecx
;; 30: 50 push rax
;; 31: 448b5c240c mov r11d, dword ptr [rsp + 0xc]
;; 36: 4153 push r11
;; 38: 59 pop rcx
;; 39: 58 pop rax
;; 3a: 31d2 xor edx, edx
;; 3c: f7f1 div ecx
;; 3e: 4883c408 add rsp, 8
;; 42: 5d pop rbp
;; 43: c3 ret
;;
;; 0: 55 push rbp
;; 1: 4889e5 mov rbp, rsp
;; 4: 4883ec08 sub rsp, 8
;; 8: 897c2404 mov dword ptr [rsp + 4], edi
;; c: 893424 mov dword ptr [rsp], esi
;; f: 8b0424 mov eax, dword ptr [rsp]
;; 12: 8b4c2404 mov ecx, dword ptr [rsp + 4]
;; 16: 0fafc8 imul ecx, eax
;; 19: 4889c8 mov rax, rcx
;; 1c: 4883c408 add rsp, 8
;; 20: 5d pop rbp
;; 21: c3 ret

View File

@@ -155,7 +155,7 @@ mod test {
let buffer = env
.isa
.compile_function(&sig, &body, validator)
.compile_function(&sig, &body, env, validator)
.expect("Couldn't compile function");
disasm(buffer.data(), env.isa).unwrap()

View File

@@ -60,7 +60,7 @@ fn compile(env: &FuncEnv, f: (DefinedFuncIndex, FunctionBodyData<'_>)) -> Result
let validator = validator.into_validator(Default::default());
let buffer = env
.isa
.compile_function(&sig, &body, validator)
.compile_function(&sig, &body, env, validator)
.expect("Couldn't compile function");
println!("Disassembly for function: {}", index.as_u32());