winch(x64): Initial implementation for function calls (#6067)

* winch(x64): Initial implementation for function calls

This change adds the main building blocks for calling locally defined
functions. Support for function imports will be added iteratively after this
change lands and once trampolines are supported.

To support function calls, this change introduces the following functionality to
the MacroAssembler:

* `pop` to pop the machine stack into a given register, which in the case of
this change, translates to the x64 pop instruction.

* `call` to a emit a call to locally defined functions.

* `address_from_sp` to construct memory addresses with the SP as a base.

* `free_stack` to emit the necessary instrunctions to claim stack space.

The heavy lifting of setting up and emitting the function call is done through
the implementation of `FnCall`.

* Fix spill behaviour in function calls and add more documentation

This commits adds a more detailed documentation to the `call.rs` module.

It also fixes a couple of bugs, mainly:

* The previous commit didn't account for memory addresses used as arguments for
the function call, any memory entry in the value stack used as a function
argument should be tracked and then used to claim that memory when the function
call ends. We could `pop` and do this implicitly, but we can also track this
down and emit a single instruction to decrement the stack pointer, which will
result in better code.

* Introduce a differentiator between addresses relative or absolute to the stack
pointer. When passing arguments in the stack -- assuming that SP at that point
is aligned for the function call -- we should store the arguments relative to
the absolute position of the stack pointer and when addressing a memory entry in
the Wasm value stack, we should use an address relative to the offset and the
position of the stack pointer.

* Simplify tracking of the stack space needed for emitting a function call
This commit is contained in:
Saúl Cabrera
2023-03-28 14:30:31 -04:00
committed by GitHub
parent d54c00ba4d
commit af4d94c85a
22 changed files with 737 additions and 68 deletions

View File

@@ -47,6 +47,10 @@ impl ABI for Aarch64ABI {
8
}
fn call_stack_align(&self) -> u8 {
16
}
fn arg_base_offset(&self) -> u8 {
16
}
@@ -74,7 +78,7 @@ impl ABI for Aarch64ABI {
let reg = regs::xreg(0);
let result = ABIResult::reg(ty, reg);
ABISig { params, result }
ABISig::new(params, result, stack_offset)
}
fn scratch_reg() -> Reg {

View File

@@ -96,6 +96,10 @@ impl Masm for MacroAssembler {
self.increment_sp(bytes);
}
fn free_stack(&mut self, _bytes: u32) {
todo!()
}
fn local_address(&mut self, local: &LocalSlot) -> Address {
let (reg, offset) = local
.addressed_from_sp()
@@ -111,6 +115,14 @@ impl Masm for MacroAssembler {
Address::offset(reg, offset as i64)
}
fn address_from_sp(&self, _offset: u32) -> Self::Address {
todo!()
}
fn address_at_sp(&self, _offset: u32) -> Self::Address {
todo!()
}
fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) {
let src = match src {
RegImm::Imm(imm) => {
@@ -124,10 +136,18 @@ impl Masm for MacroAssembler {
self.asm.str(src, dst, size);
}
fn call(&mut self, _callee: u32) {
todo!()
}
fn load(&mut self, src: Address, dst: Reg, size: OperandSize) {
self.asm.ldr(src, dst, size);
}
fn pop(&mut self, _dst: Reg) {
todo!()
}
fn sp_offset(&mut self) -> u32 {
self.sp_offset
}

View File

@@ -8,6 +8,7 @@ use crate::{
regalloc::RegAlloc,
regset::RegSet,
stack::Stack,
FuncEnv,
};
use anyhow::Result;
use cranelift_codegen::settings::{self, Flags};
@@ -84,6 +85,7 @@ impl TargetIsa for Aarch64 {
&self,
sig: &FuncType,
body: &FunctionBody,
env: &dyn FuncEnv,
mut validator: FuncValidator<ValidatorResources>,
) -> Result<MachBufferFinalized<Final>> {
let mut body = body.get_binary_reader();
@@ -95,7 +97,7 @@ impl TargetIsa for Aarch64 {
// TODO: Add floating point bitmask
let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), scratch());
let codegen_context = CodeGenContext::new(regalloc, stack, &frame);
let mut codegen = CodeGen::new::<abi::Aarch64ABI>(&mut masm, codegen_context, abi_sig);
let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig);
codegen.emit(&mut body, validator)?;
Ok(masm.finalize())

View File

@@ -10,6 +10,8 @@ use std::{
use target_lexicon::{Architecture, Triple};
use wasmparser::{FuncType, FuncValidator, FunctionBody, ValidatorResources};
use crate::FuncEnv;
#[cfg(feature = "x64")]
pub(crate) mod x64;
@@ -92,6 +94,7 @@ pub trait TargetIsa: Send + Sync {
&self,
sig: &FuncType,
body: &FunctionBody,
env: &dyn FuncEnv,
validator: FuncValidator<ValidatorResources>,
) -> Result<MachBufferFinalized<Final>>;

View File

@@ -39,6 +39,10 @@ impl ABI for X64ABI {
8
}
fn call_stack_align(&self) -> u8 {
16
}
fn arg_base_offset(&self) -> u8 {
// Two 8-byte slots, one for the return address and another
// one for the frame pointer.
@@ -75,7 +79,7 @@ impl ABI for X64ABI {
let reg = regs::rax();
let result = ABIResult::reg(ty, reg);
ABISig { params, result }
ABISig::new(params, result, stack_offset)
}
fn scratch_reg() -> Reg {

View File

@@ -5,18 +5,21 @@ use crate::{
masm::{DivKind, OperandSize, RemKind},
};
use cranelift_codegen::{
entity::EntityRef,
ir::TrapCode,
ir::{ExternalName, Opcode, UserExternalNameRef},
isa::x64::{
args::{
self, AluRmiROpcode, Amode, CmpOpcode, DivSignedness, ExtMode, FromWritableReg, Gpr,
GprMem, GprMemImm, RegMem, RegMemImm, SyntheticAmode, WritableGpr, CC,
},
settings as x64_settings, EmitInfo, EmitState, Inst,
settings as x64_settings, CallInfo, EmitInfo, EmitState, Inst,
},
settings, Final, MachBuffer, MachBufferFinalized, MachInstEmit, Writable,
};
use super::{address::Address, regs};
use smallvec::smallvec;
/// A x64 instruction operand.
#[derive(Debug, Copy, Clone)]
@@ -465,4 +468,18 @@ impl Assembler {
dst: dst.into(),
});
}
/// Direct function call to a user defined function.
pub fn call(&mut self, callee: u32) {
let dest = ExternalName::user(UserExternalNameRef::new(callee as usize));
self.emit(Inst::CallKnown {
dest,
info: Box::new(CallInfo {
uses: smallvec![],
defs: smallvec![],
clobbers: Default::default(),
opcode: Opcode::Call,
}),
});
}
}

View File

@@ -70,6 +70,14 @@ impl Masm for MacroAssembler {
self.increment_sp(bytes);
}
fn free_stack(&mut self, bytes: u32) {
if bytes == 0 {
return;
}
self.asm.add_ir(bytes as i32, rsp(), OperandSize::S64);
self.decrement_sp(bytes);
}
fn local_address(&mut self, local: &LocalSlot) -> Address {
let (reg, offset) = local
.addressed_from_sp()
@@ -85,6 +93,14 @@ impl Masm for MacroAssembler {
Address::offset(reg, offset)
}
fn address_from_sp(&self, offset: u32) -> Self::Address {
Address::offset(regs::rsp(), self.sp_offset - offset)
}
fn address_at_sp(&self, offset: u32) -> Self::Address {
Address::offset(regs::rsp(), offset)
}
fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) {
let src: Operand = src.into();
let dst: Operand = dst.into();
@@ -92,6 +108,16 @@ impl Masm for MacroAssembler {
self.asm.mov(src, dst, size);
}
fn pop(&mut self, dst: Reg) {
self.asm.pop_r(dst);
// Similar to the comment in `push`, we assume 8 bytes per pop.
self.decrement_sp(8);
}
fn call(&mut self, callee: u32) {
self.asm.call(callee);
}
fn load(&mut self, src: Address, dst: Reg, size: OperandSize) {
let src = src.into();
let dst = dst.into();
@@ -158,12 +184,12 @@ impl Masm for MacroAssembler {
let rax = context.gpr(regs::rax(), self);
// Allocate the divisor, which can be any gpr.
let divisor = context.pop_to_reg(self, size);
let divisor = context.pop_to_reg(self, None, size);
// Mark rax as allocatable.
context.regalloc.free_gpr(rax);
// Move the top value to rax.
let rax = context.pop_to_named_reg(self, rax, size);
let rax = context.pop_to_reg(self, Some(rax), size);
self.asm.div(divisor, (rax, rdx), kind, size);
// Free the divisor and rdx.
@@ -180,12 +206,12 @@ impl Masm for MacroAssembler {
let rax = context.gpr(regs::rax(), self);
// Allocate the divisor, which can be any gpr.
let divisor = context.pop_to_reg(self, size);
let divisor = context.pop_to_reg(self, None, size);
// Mark rax as allocatable.
context.regalloc.free_gpr(rax);
// Move the top value to rax.
let rax = context.pop_to_named_reg(self, rax, size);
let rax = context.pop_to_reg(self, Some(rax), size);
self.asm.rem(divisor, (rax, rdx), kind, size);
// Free the divisor and rax.
@@ -225,7 +251,6 @@ impl MacroAssembler {
self.sp_offset += bytes;
}
#[allow(dead_code)]
fn decrement_sp(&mut self, bytes: u32) {
assert!(
self.sp_offset >= bytes,

View File

@@ -5,6 +5,7 @@ use crate::isa::x64::masm::MacroAssembler as X64Masm;
use crate::masm::MacroAssembler;
use crate::regalloc::RegAlloc;
use crate::stack::Stack;
use crate::FuncEnv;
use crate::{
isa::{Builder, TargetIsa},
regset::RegSet,
@@ -85,6 +86,7 @@ impl TargetIsa for X64 {
&self,
sig: &FuncType,
body: &FunctionBody,
env: &dyn FuncEnv,
mut validator: FuncValidator<ValidatorResources>,
) -> Result<MachBufferFinalized<Final>> {
let mut body = body.get_binary_reader();
@@ -96,7 +98,7 @@ impl TargetIsa for X64 {
// TODO Add in floating point bitmask
let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), regs::scratch());
let codegen_context = CodeGenContext::new(regalloc, stack, &frame);
let mut codegen = CodeGen::new::<abi::X64ABI>(&mut masm, codegen_context, abi_sig);
let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig);
codegen.emit(&mut body, validator)?;