winch(x64): Improve ABI support in trampolines (#6204)

This commit improves ABI support in Winch's trampolines mainly by:

* Adding support for the `fastcall` calling convention.
* By storing/restoring callee-saved registers.

One of the explicit goals of this change is to make tests available in the x86_64 target
as a whole and remove the need exclude the windows target.

This commit also introduces a `CallingConvention` enum, to better
reflect the subset of calling conventions that are supported by Winch.
This commit is contained in:
Saúl Cabrera
2023-04-14 17:13:23 -04:00
committed by GitHub
parent 9e1ff9726c
commit 9dd0b59c2a
11 changed files with 340 additions and 89 deletions

View File

@@ -1,6 +1,6 @@
use super::regs;
use crate::abi::{ABIArg, ABIResult, ABISig, ABI};
use crate::isa::reg::Reg;
use crate::isa::{reg::Reg, CallingConvention};
use smallvec::SmallVec;
use wasmparser::{FuncType, ValType};
@@ -59,7 +59,9 @@ impl ABI for Aarch64ABI {
64
}
fn sig(&self, wasm_sig: &FuncType) -> ABISig {
fn sig(&self, wasm_sig: &FuncType, call_conv: &CallingConvention) -> ABISig {
assert!(call_conv.is_apple_aarch64() || call_conv.is_default());
if wasm_sig.results().len() > 1 {
panic!("multi-value not supported");
}
@@ -84,6 +86,10 @@ impl ABI for Aarch64ABI {
fn scratch_reg() -> Reg {
todo!()
}
fn callee_saved_regs(_call_conv: &CallingConvention) -> SmallVec<[Reg; 9]> {
regs::callee_saved()
}
}
impl Aarch64ABI {
@@ -118,6 +124,7 @@ mod tests {
abi::{ABIArg, ABI},
isa::aarch64::regs,
isa::reg::Reg,
isa::CallingConvention,
};
use wasmparser::{
FuncType,
@@ -140,7 +147,7 @@ mod tests {
let wasm_sig = FuncType::new([I32, I64, I32, I64, I32, I32, I64, I32, I64], []);
let abi = Aarch64ABI::default();
let sig = abi.sig(&wasm_sig);
let sig = abi.sig(&wasm_sig, &CallingConvention::Default);
let params = sig.params;
match_reg_arg(params.get(0).unwrap(), I32, regs::xreg(0));
@@ -159,7 +166,7 @@ mod tests {
let wasm_sig = FuncType::new([F32, F64, F32, F64, F32, F32, F64, F32, F64], []);
let abi = Aarch64ABI::default();
let sig = abi.sig(&wasm_sig);
let sig = abi.sig(&wasm_sig, &CallingConvention::Default);
let params = sig.params;
match_reg_arg(params.get(0).unwrap(), F32, regs::vreg(0));
@@ -178,7 +185,7 @@ mod tests {
let wasm_sig = FuncType::new([F32, I32, I64, F64, I32, F32, F64, F32, F64], []);
let abi = Aarch64ABI::default();
let sig = abi.sig(&wasm_sig);
let sig = abi.sig(&wasm_sig, &CallingConvention::Default);
let params = sig.params;
match_reg_arg(params.get(0).unwrap(), F32, regs::vreg(0));

View File

@@ -3,7 +3,7 @@ use crate::{
abi::ABI,
codegen::{CodeGen, CodeGenContext},
frame::{DefinedLocals, Frame},
isa::{Builder, TargetIsa},
isa::{Builder, CallingConvention, TargetIsa},
masm::MacroAssembler,
regalloc::RegAlloc,
regset::RegSet,
@@ -92,7 +92,7 @@ impl TargetIsa for Aarch64 {
let mut masm = Aarch64Masm::new(self.shared_flags.clone());
let stack = Stack::new();
let abi = abi::Aarch64ABI::default();
let abi_sig = abi.sig(sig);
let abi_sig = abi.sig(sig, &CallingConvention::Default);
let defined_locals = DefinedLocals::new(&mut body, validator)?;
let frame = Frame::new(&abi_sig, &defined_locals, &abi)?;

View File

@@ -2,6 +2,7 @@
use crate::isa::reg::Reg;
use regalloc2::{PReg, RegClass};
use smallvec::{smallvec, SmallVec};
/// Construct a X-register from an index.
pub(crate) const fn xreg(num: u8) -> Reg {
@@ -135,3 +136,25 @@ const NON_ALLOCATABLE_GPR: u32 = (1 << ip0().hw_enc())
/// Bitmask to represent the available general purpose registers.
pub(crate) const ALL_GPR: u32 = u32::MAX & !NON_ALLOCATABLE_GPR;
/// Returns the callee-saved registers.
///
/// This function will return the set of registers that need to be saved
/// according to the system ABI and that are known not to be saved during the
/// prologue emission.
// TODO: Once float registers are supported,
// account for callee-saved float registers.
pub(crate) fn callee_saved() -> SmallVec<[Reg; 9]> {
smallvec![
xreg(19),
xreg(20),
xreg(21),
xreg(22),
xreg(23),
xreg(24),
xreg(25),
xreg(26),
xreg(27),
xreg(28),
]
}

View File

@@ -70,6 +70,60 @@ pub(crate) enum LookupError {
SupportDisabled,
}
/// Calling conventions supported by Winch. Winch supports the `Wasmtime*`
/// variations of the system's ABI calling conventions and an internal default
/// calling convention.
///
/// This enum is a reduced subset of the calling conventions defined in
/// [cranelift_codegen::isa::CallConv]. Introducing this enum makes it easier
/// to enforce the invariant of all the calling conventions supported by Winch.
pub enum CallingConvention {
/// See [cranelift_codegen::isa::CallConv::WasmtimeSystemV]
WasmtimeSystemV,
/// See [cranelift_codegen::isa::CallConv::WasmtimeFastcall]
WasmtimeFastcall,
/// See [cranelift_codegen::isa::CallConv::WasmtimeAppleAarch64]
WasmtimeAppleAarch64,
/// The default calling convention for Winch. It largely follows SystemV
/// for parameter and result handling. This calling convention is part of
/// Winch's default ABI [crate::abi::ABI].
Default,
}
impl CallingConvention {
/// Returns true if the current calling convention is `WasmtimeFastcall`.
fn is_fastcall(&self) -> bool {
match &self {
CallingConvention::WasmtimeFastcall => true,
_ => false,
}
}
/// Returns true if the current calling convention is `WasmtimeSystemV`.
fn is_systemv(&self) -> bool {
match &self {
CallingConvention::WasmtimeSystemV => true,
_ => false,
}
}
/// Returns true if the current calling convention is `WasmtimeAppleAarch64`.
fn is_apple_aarch64(&self) -> bool {
match &self {
CallingConvention::WasmtimeAppleAarch64 => true,
_ => false,
}
}
/// Returns true if the current calling convention is `Default`.
fn is_default(&self) -> bool {
match &self {
CallingConvention::Default => true,
_ => false,
}
}
}
/// A trait representing commonalities between the supported
/// instruction set architectures.
pub trait TargetIsa: Send + Sync {
@@ -100,10 +154,21 @@ pub trait TargetIsa: Send + Sync {
) -> Result<MachBufferFinalized<Final>>;
/// Get the default calling convention of the underlying target triple.
fn call_conv(&self) -> CallConv {
fn default_call_conv(&self) -> CallConv {
CallConv::triple_default(&self.triple())
}
/// Derive Wasmtime's calling convention from the triple's default
/// calling convention.
fn wasmtime_call_conv(&self) -> CallingConvention {
match self.default_call_conv() {
CallConv::AppleAarch64 => CallingConvention::WasmtimeAppleAarch64,
CallConv::SystemV => CallingConvention::WasmtimeSystemV,
CallConv::WindowsFastcall => CallingConvention::WasmtimeFastcall,
cc => unimplemented!("calling convention: {:?}", cc),
}
}
/// Get the endianess of the underlying target triple.
fn endianness(&self) -> target_lexicon::Endianness {
self.triple().endianness().unwrap()
@@ -131,7 +196,7 @@ impl Debug for &dyn TargetIsa {
f,
"Target ISA {{ triple: {:?}, calling convention: {:?} }}",
self.triple(),
self.call_conv()
self.default_call_conv()
)
}
}

View File

@@ -1,7 +1,7 @@
use super::regs;
use crate::{
abi::{ABIArg, ABIResult, ABISig, ABI},
isa::reg::Reg,
isa::{reg::Reg, CallingConvention},
};
use smallvec::SmallVec;
use wasmparser::{FuncType, ValType};
@@ -12,15 +12,39 @@ use wasmparser::{FuncType, ValType};
/// The first element tracks the general purpose register index.
/// The second element tracks the floating point register index.
#[derive(Default)]
struct RegIndexEnv(u8, u8);
struct RegIndexEnv {
/// General purpose register index or the field used for absolute
/// counts.
gpr_or_absolute_count: u8,
/// Floating point register index.
fpr: u8,
/// Whether the count should be absolute rather than per register class.
/// When this field is true, only the `gpr_or_absolute_count` field is
/// incremented.
absolute_count: bool,
}
impl RegIndexEnv {
fn with_absolute_count() -> Self {
Self {
gpr_or_absolute_count: 0,
fpr: 0,
absolute_count: true,
}
}
}
impl RegIndexEnv {
fn next_gpr(&mut self) -> u8 {
Self::increment(&mut self.0)
Self::increment(&mut self.gpr_or_absolute_count)
}
fn next_fpr(&mut self) -> u8 {
Self::increment(&mut self.1)
if self.absolute_count {
Self::increment(&mut self.gpr_or_absolute_count)
} else {
Self::increment(&mut self.fpr)
}
}
fn increment(index: &mut u8) -> u8 {
@@ -60,22 +84,33 @@ impl ABI for X64ABI {
64
}
fn sig(&self, wasm_sig: &FuncType) -> ABISig {
fn sig(&self, wasm_sig: &FuncType, call_conv: &CallingConvention) -> ABISig {
assert!(call_conv.is_fastcall() || call_conv.is_systemv() || call_conv.is_default());
if wasm_sig.results().len() > 1 {
panic!("multi-value not supported");
}
let mut stack_offset = 0;
let mut index_env = RegIndexEnv::default();
let is_fastcall = call_conv.is_fastcall();
// In the fastcall calling convention, the callee gets a contiguous
// stack area of 32 bytes (4 register arguments) just before its frame.
// See
// https://learn.microsoft.com/en-us/cpp/build/stack-usage?view=msvc-170#stack-allocation
let (mut stack_offset, mut index_env) = if is_fastcall {
(32, RegIndexEnv::with_absolute_count())
} else {
(0, RegIndexEnv::default())
};
let params: SmallVec<[ABIArg; 6]> = wasm_sig
.params()
.iter()
.map(|arg| Self::to_abi_arg(arg, &mut stack_offset, &mut index_env))
.map(|arg| Self::to_abi_arg(arg, &mut stack_offset, &mut index_env, is_fastcall))
.collect();
let ty = wasm_sig.results().get(0).map(|e| e.clone());
// NOTE temporarily defaulting to rax.
// The `Default`, `WasmtimeFastcall` and `WasmtimeSystemV use `rax`.
// NOTE This should be updated when supporting multi-value.
let reg = regs::rax();
let result = ABIResult::reg(ty, reg);
@@ -85,6 +120,10 @@ impl ABI for X64ABI {
fn scratch_reg() -> Reg {
regs::scratch()
}
fn callee_saved_regs(call_conv: &CallingConvention) -> SmallVec<[Reg; 9]> {
regs::callee_saved(call_conv)
}
}
impl X64ABI {
@@ -92,11 +131,16 @@ impl X64ABI {
wasm_arg: &ValType,
stack_offset: &mut u32,
index_env: &mut RegIndexEnv,
fastcall: bool,
) -> ABIArg {
let (reg, ty) = match wasm_arg {
ty @ (ValType::I32 | ValType::I64) => (Self::int_reg_for(index_env.next_gpr()), ty),
ty @ (ValType::I32 | ValType::I64) => {
(Self::int_reg_for(index_env.next_gpr(), fastcall), ty)
}
ty @ (ValType::F32 | ValType::F64) => (Self::float_reg_for(index_env.next_fpr()), ty),
ty @ (ValType::F32 | ValType::F64) => {
(Self::float_reg_for(index_env.next_fpr(), fastcall), ty)
}
ty => unreachable!("Unsupported argument type {:?}", ty),
};
@@ -111,28 +155,36 @@ impl X64ABI {
reg.map_or_else(default, |reg| ABIArg::reg(reg, *ty))
}
fn int_reg_for(index: u8) -> Option<Reg> {
match index {
0 => Some(regs::rdi()),
1 => Some(regs::rsi()),
2 => Some(regs::rdx()),
3 => Some(regs::rcx()),
4 => Some(regs::r8()),
5 => Some(regs::r9()),
fn int_reg_for(index: u8, fastcall: bool) -> Option<Reg> {
match (fastcall, index) {
(false, 0) => Some(regs::rdi()),
(false, 1) => Some(regs::rsi()),
(false, 2) => Some(regs::rdx()),
(false, 3) => Some(regs::rcx()),
(false, 4) => Some(regs::r8()),
(false, 5) => Some(regs::r9()),
(true, 0) => Some(regs::rcx()),
(true, 1) => Some(regs::rdx()),
(true, 2) => Some(regs::r8()),
(true, 3) => Some(regs::r9()),
_ => None,
}
}
fn float_reg_for(index: u8) -> Option<Reg> {
match index {
0 => Some(regs::xmm0()),
1 => Some(regs::xmm1()),
2 => Some(regs::xmm2()),
3 => Some(regs::xmm3()),
4 => Some(regs::xmm4()),
5 => Some(regs::xmm5()),
6 => Some(regs::xmm6()),
7 => Some(regs::xmm7()),
fn float_reg_for(index: u8, fastcall: bool) -> Option<Reg> {
match (fastcall, index) {
(false, 0) => Some(regs::xmm0()),
(false, 1) => Some(regs::xmm1()),
(false, 2) => Some(regs::xmm2()),
(false, 3) => Some(regs::xmm3()),
(false, 4) => Some(regs::xmm4()),
(false, 5) => Some(regs::xmm5()),
(false, 6) => Some(regs::xmm6()),
(false, 7) => Some(regs::xmm7()),
(true, 0) => Some(regs::xmm0()),
(true, 1) => Some(regs::xmm1()),
(true, 2) => Some(regs::xmm2()),
(true, 3) => Some(regs::xmm3()),
_ => None,
}
}
@@ -145,6 +197,7 @@ mod tests {
abi::{ABIArg, ABI},
isa::reg::Reg,
isa::x64::regs,
isa::CallingConvention,
};
use wasmparser::{
FuncType,
@@ -162,12 +215,21 @@ mod tests {
assert_eq!(index_env.next_gpr(), 2);
}
#[test]
fn test_reg_index_env_absolute_count() {
let mut e = RegIndexEnv::with_absolute_count();
assert!(e.next_gpr() == 0);
assert!(e.next_fpr() == 1);
assert!(e.next_gpr() == 2);
assert!(e.next_fpr() == 3);
}
#[test]
fn int_abi_sig() {
let wasm_sig = FuncType::new([I32, I64, I32, I64, I32, I32, I64, I32], []);
let abi = X64ABI::default();
let sig = abi.sig(&wasm_sig);
let sig = abi.sig(&wasm_sig, &CallingConvention::Default);
let params = sig.params;
match_reg_arg(params.get(0).unwrap(), I32, regs::rdi());
@@ -185,7 +247,7 @@ mod tests {
let wasm_sig = FuncType::new([F32, F64, F32, F64, F32, F32, F64, F32, F64], []);
let abi = X64ABI::default();
let sig = abi.sig(&wasm_sig);
let sig = abi.sig(&wasm_sig, &CallingConvention::Default);
let params = sig.params;
match_reg_arg(params.get(0).unwrap(), F32, regs::xmm0());
@@ -204,7 +266,7 @@ mod tests {
let wasm_sig = FuncType::new([F32, I32, I64, F64, I32, F32, F64, F32, F64], []);
let abi = X64ABI::default();
let sig = abi.sig(&wasm_sig);
let sig = abi.sig(&wasm_sig, &CallingConvention::Default);
let params = sig.params;
match_reg_arg(params.get(0).unwrap(), F32, regs::xmm0());
@@ -218,6 +280,41 @@ mod tests {
match_reg_arg(params.get(8).unwrap(), F64, regs::xmm5());
}
#[test]
fn system_v_call_conv() {
let wasm_sig = FuncType::new([F32, I32, I64, F64, I32, F32, F64, F32, F64], []);
let abi = X64ABI::default();
let sig = abi.sig(&wasm_sig, &CallingConvention::WasmtimeSystemV);
let params = sig.params;
match_reg_arg(params.get(0).unwrap(), F32, regs::xmm0());
match_reg_arg(params.get(1).unwrap(), I32, regs::rdi());
match_reg_arg(params.get(2).unwrap(), I64, regs::rsi());
match_reg_arg(params.get(3).unwrap(), F64, regs::xmm1());
match_reg_arg(params.get(4).unwrap(), I32, regs::rdx());
match_reg_arg(params.get(5).unwrap(), F32, regs::xmm2());
match_reg_arg(params.get(6).unwrap(), F64, regs::xmm3());
match_reg_arg(params.get(7).unwrap(), F32, regs::xmm4());
match_reg_arg(params.get(8).unwrap(), F64, regs::xmm5());
}
#[test]
fn fastcall_call_conv() {
let wasm_sig = FuncType::new([F32, I32, I64, F64, I32, F32, F64, F32, F64], []);
let abi = X64ABI::default();
let sig = abi.sig(&wasm_sig, &CallingConvention::WasmtimeFastcall);
let params = sig.params;
match_reg_arg(params.get(0).unwrap(), F32, regs::xmm0());
match_reg_arg(params.get(1).unwrap(), I32, regs::rdx());
match_reg_arg(params.get(2).unwrap(), I64, regs::r8());
match_reg_arg(params.get(3).unwrap(), F64, regs::xmm3());
match_stack_arg(params.get(4).unwrap(), I32, 32);
match_stack_arg(params.get(5).unwrap(), F32, 40);
}
fn match_reg_arg(abi_arg: &ABIArg, expected_ty: ValType, expected_reg: Reg) {
match abi_arg {
&ABIArg::Reg { reg, ty } => {

View File

@@ -4,7 +4,7 @@ use crate::{
};
use crate::frame::{DefinedLocals, Frame};
use crate::isa::x64::masm::MacroAssembler as X64Masm;
use crate::isa::{x64::masm::MacroAssembler as X64Masm, CallingConvention};
use crate::masm::MacroAssembler;
use crate::regalloc::RegAlloc;
use crate::stack::Stack;
@@ -97,7 +97,7 @@ impl TargetIsa for X64 {
let mut masm = X64Masm::new(self.shared_flags.clone(), self.isa_flags.clone());
let stack = Stack::new();
let abi = abi::X64ABI::default();
let abi_sig = abi.sig(sig);
let abi_sig = abi.sig(sig, &CallingConvention::Default);
let defined_locals = DefinedLocals::new(&mut body, validator)?;
let frame = Frame::new(&abi_sig, &defined_locals, &abi)?;
@@ -123,8 +123,10 @@ impl TargetIsa for X64 {
fn host_to_wasm_trampoline(&self, ty: &FuncType) -> Result<MachBufferFinalized<Final>> {
let abi = abi::X64ABI::default();
let mut masm = X64Masm::new(self.shared_flags.clone(), self.isa_flags.clone());
let call_conv = self.wasmtime_call_conv();
let mut trampoline = Trampoline::new(&mut masm, &abi, regs::scratch(), regs::argv());
let mut trampoline =
Trampoline::new(&mut masm, &abi, regs::scratch(), regs::argv(), &call_conv);
trampoline.emit_host_to_wasm(ty);

View File

@@ -1,7 +1,8 @@
//! X64 register definition.
use crate::isa::reg::Reg;
use crate::isa::{reg::Reg, CallingConvention};
use regalloc2::{PReg, RegClass};
use smallvec::{smallvec, SmallVec};
const ENC_RAX: u8 = 0;
const ENC_RCX: u8 = 1;
@@ -154,3 +155,24 @@ const NON_ALLOCATABLE_GPR: u32 = (1 << ENC_RBP) | (1 << ENC_RSP) | (1 << ENC_R11
/// Bitmask to represent the available general purpose registers.
pub(crate) const ALL_GPR: u32 = ALLOCATABLE_GPR & !NON_ALLOCATABLE_GPR;
/// Returns the callee-saved registers according to a particular calling
/// convention.
///
/// This function will return the set of registers that need to be saved
/// according to the system ABI and that are known not to be saved during the
/// prologue emission.
pub(crate) fn callee_saved(call_conv: &CallingConvention) -> SmallVec<[Reg; 9]> {
use CallingConvention::*;
match call_conv {
WasmtimeSystemV => {
smallvec![rbx(), r12(), r13(), r14(), r15(),]
}
// TODO: Once float registers are supported,
// account for callee-saved float registers.
WasmtimeFastcall => {
smallvec![rbx(), rdi(), rsi(), r12(), r13(), r14(), r15(),]
}
_ => unreachable!(),
}
}