x86-64 Windows fastcall ABI support.
This adds support for the "fastcall" ABI, which is the native C/C++ ABI on Windows platforms on x86-64. It is similar to but not exactly like System V; primarily, its argument register assignments are different, and it requires stack shadow space. Note that this also adjusts the handling of multi-register values in the shared ABI implementation, and with this change, adjusts handling of `i128`s on *both* Fastcall/x64 *and* SysV/x64 platforms. This was done to align with actual behavior by the "rustc ABI" on both platforms, as mapped out experimentally (Compiler Explorer link in comments). This behavior is gated under the `enable_llvm_abi_extensions` flag. Note also that this does *not* add x64 unwind info on Windows. That will come in a future PR (but is planned!).
This commit is contained in:
@@ -216,6 +216,25 @@ pub(crate) fn define() -> SettingGroup {
|
|||||||
0,
|
0,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
settings.add_bool(
|
||||||
|
"enable_llvm_abi_extensions",
|
||||||
|
r#"
|
||||||
|
Enable various ABI extensions defined by LLVM's behavior.
|
||||||
|
|
||||||
|
In some cases, LLVM's implementation of an ABI (calling convention)
|
||||||
|
goes beyond a standard and supports additional argument types or
|
||||||
|
behavior. This option instructs Cranelift codegen to follow LLVM's
|
||||||
|
behavior where applicable.
|
||||||
|
|
||||||
|
Currently, this applies only to Windows Fastcall on x86-64, and
|
||||||
|
allows an `i128` argument to be spread across two 64-bit integer
|
||||||
|
registers. The Fastcall implementation otherwise does not support
|
||||||
|
`i128` arguments, and will panic if they are present and this
|
||||||
|
option is not set.
|
||||||
|
"#,
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
|
||||||
// BaldrMonkey requires that not-yet-relocated function addresses be encoded
|
// BaldrMonkey requires that not-yet-relocated function addresses be encoded
|
||||||
// as all-ones bitpatterns.
|
// as all-ones bitpatterns.
|
||||||
settings.add_bool(
|
settings.add_bool(
|
||||||
|
|||||||
@@ -78,41 +78,41 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
|
|||||||
match ¶m.purpose {
|
match ¶m.purpose {
|
||||||
&ir::ArgumentPurpose::VMContext => {
|
&ir::ArgumentPurpose::VMContext => {
|
||||||
// This is SpiderMonkey's `WasmTlsReg`.
|
// This is SpiderMonkey's `WasmTlsReg`.
|
||||||
Some(ABIArg::Reg {
|
Some(ABIArg::reg(
|
||||||
regs: ValueRegs::one(xreg(BALDRDASH_TLS_REG).to_real_reg()),
|
xreg(BALDRDASH_TLS_REG).to_real_reg(),
|
||||||
ty: ir::types::I64,
|
ir::types::I64,
|
||||||
extension: param.extension,
|
param.extension,
|
||||||
purpose: param.purpose,
|
param.purpose,
|
||||||
})
|
))
|
||||||
}
|
}
|
||||||
&ir::ArgumentPurpose::SignatureId => {
|
&ir::ArgumentPurpose::SignatureId => {
|
||||||
// This is SpiderMonkey's `WasmTableCallSigReg`.
|
// This is SpiderMonkey's `WasmTableCallSigReg`.
|
||||||
Some(ABIArg::Reg {
|
Some(ABIArg::reg(
|
||||||
regs: ValueRegs::one(xreg(BALDRDASH_SIG_REG).to_real_reg()),
|
xreg(BALDRDASH_SIG_REG).to_real_reg(),
|
||||||
ty: ir::types::I64,
|
ir::types::I64,
|
||||||
extension: param.extension,
|
param.extension,
|
||||||
purpose: param.purpose,
|
param.purpose,
|
||||||
})
|
))
|
||||||
}
|
}
|
||||||
&ir::ArgumentPurpose::CalleeTLS => {
|
&ir::ArgumentPurpose::CalleeTLS => {
|
||||||
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
|
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
|
||||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||||
Some(ABIArg::Stack {
|
Some(ABIArg::stack(
|
||||||
offset: BALDRDASH_CALLEE_TLS_OFFSET,
|
BALDRDASH_CALLEE_TLS_OFFSET,
|
||||||
ty: ir::types::I64,
|
ir::types::I64,
|
||||||
extension: ir::ArgumentExtension::None,
|
ir::ArgumentExtension::None,
|
||||||
purpose: param.purpose,
|
param.purpose,
|
||||||
})
|
))
|
||||||
}
|
}
|
||||||
&ir::ArgumentPurpose::CallerTLS => {
|
&ir::ArgumentPurpose::CallerTLS => {
|
||||||
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
|
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
|
||||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||||
Some(ABIArg::Stack {
|
Some(ABIArg::stack(
|
||||||
offset: BALDRDASH_CALLER_TLS_OFFSET,
|
BALDRDASH_CALLER_TLS_OFFSET,
|
||||||
ty: ir::types::I64,
|
ir::types::I64,
|
||||||
extension: ir::ArgumentExtension::None,
|
ir::ArgumentExtension::None,
|
||||||
purpose: param.purpose,
|
param.purpose,
|
||||||
})
|
))
|
||||||
}
|
}
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
@@ -161,6 +161,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
|
|
||||||
fn compute_arg_locs(
|
fn compute_arg_locs(
|
||||||
call_conv: isa::CallConv,
|
call_conv: isa::CallConv,
|
||||||
|
_flags: &settings::Flags,
|
||||||
params: &[ir::AbiParam],
|
params: &[ir::AbiParam],
|
||||||
args_or_rets: ArgsOrRets,
|
args_or_rets: ArgsOrRets,
|
||||||
add_ret_area_ptr: bool,
|
add_ret_area_ptr: bool,
|
||||||
@@ -253,12 +254,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
RegClass::V128 => vreg(*next_reg),
|
RegClass::V128 => vreg(*next_reg),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
ret.push(ABIArg::Reg {
|
ret.push(ABIArg::reg(
|
||||||
regs: ValueRegs::one(reg.to_real_reg()),
|
reg.to_real_reg(),
|
||||||
ty: param.value_type,
|
param.value_type,
|
||||||
extension: param.extension,
|
param.extension,
|
||||||
purpose: param.purpose,
|
param.purpose,
|
||||||
});
|
));
|
||||||
*next_reg += 1;
|
*next_reg += 1;
|
||||||
remaining_reg_vals -= 1;
|
remaining_reg_vals -= 1;
|
||||||
} else {
|
} else {
|
||||||
@@ -268,13 +269,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
let size = std::cmp::max(size, 8);
|
let size = std::cmp::max(size, 8);
|
||||||
// Align.
|
// Align.
|
||||||
debug_assert!(size.is_power_of_two());
|
debug_assert!(size.is_power_of_two());
|
||||||
next_stack = (next_stack + size - 1) & !(size - 1);
|
next_stack = align_to(next_stack, size);
|
||||||
ret.push(ABIArg::Stack {
|
ret.push(ABIArg::stack(
|
||||||
offset: next_stack as i64,
|
next_stack as i64,
|
||||||
ty: param.value_type,
|
param.value_type,
|
||||||
extension: param.extension,
|
param.extension,
|
||||||
purpose: param.purpose,
|
param.purpose,
|
||||||
});
|
));
|
||||||
next_stack += size;
|
next_stack += size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -286,19 +287,19 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
let extra_arg = if add_ret_area_ptr {
|
let extra_arg = if add_ret_area_ptr {
|
||||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||||
if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
|
if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
|
||||||
ret.push(ABIArg::Reg {
|
ret.push(ABIArg::reg(
|
||||||
regs: ValueRegs::one(xreg(next_xreg).to_real_reg()),
|
xreg(next_xreg).to_real_reg(),
|
||||||
ty: I64,
|
I64,
|
||||||
extension: ir::ArgumentExtension::None,
|
ir::ArgumentExtension::None,
|
||||||
purpose: ir::ArgumentPurpose::Normal,
|
ir::ArgumentPurpose::Normal,
|
||||||
});
|
));
|
||||||
} else {
|
} else {
|
||||||
ret.push(ABIArg::Stack {
|
ret.push(ABIArg::stack(
|
||||||
offset: next_stack as i64,
|
next_stack as i64,
|
||||||
ty: I64,
|
I64,
|
||||||
extension: ir::ArgumentExtension::None,
|
ir::ArgumentExtension::None,
|
||||||
purpose: ir::ArgumentPurpose::Normal,
|
ir::ArgumentPurpose::Normal,
|
||||||
});
|
));
|
||||||
next_stack += 8;
|
next_stack += 8;
|
||||||
}
|
}
|
||||||
Some(ret.len() - 1)
|
Some(ret.len() - 1)
|
||||||
@@ -306,7 +307,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
next_stack = (next_stack + 15) & !15;
|
next_stack = align_to(next_stack, 16);
|
||||||
|
|
||||||
// To avoid overflow issues, limit the arg/return size to something
|
// To avoid overflow issues, limit the arg/return size to something
|
||||||
// reasonable -- here, 128 MB.
|
// reasonable -- here, 128 MB.
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
|||||||
|
|
||||||
fn compute_arg_locs(
|
fn compute_arg_locs(
|
||||||
_call_conv: isa::CallConv,
|
_call_conv: isa::CallConv,
|
||||||
|
_flags: &settings::Flags,
|
||||||
params: &[ir::AbiParam],
|
params: &[ir::AbiParam],
|
||||||
args_or_rets: ArgsOrRets,
|
args_or_rets: ArgsOrRets,
|
||||||
add_ret_area_ptr: bool,
|
add_ret_area_ptr: bool,
|
||||||
@@ -81,12 +82,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
|||||||
if next_rreg < max_reg_val {
|
if next_rreg < max_reg_val {
|
||||||
let reg = rreg(next_rreg);
|
let reg = rreg(next_rreg);
|
||||||
|
|
||||||
ret.push(ABIArg::Reg {
|
ret.push(ABIArg::reg(
|
||||||
regs: ValueRegs::one(reg.to_real_reg()),
|
reg.to_real_reg(),
|
||||||
ty: param.value_type,
|
param.value_type,
|
||||||
extension: param.extension,
|
param.extension,
|
||||||
purpose: param.purpose,
|
param.purpose,
|
||||||
});
|
));
|
||||||
next_rreg += 1;
|
next_rreg += 1;
|
||||||
} else {
|
} else {
|
||||||
// Arguments are stored on stack in reversed order.
|
// Arguments are stored on stack in reversed order.
|
||||||
@@ -101,12 +102,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
|||||||
let extra_arg = if add_ret_area_ptr {
|
let extra_arg = if add_ret_area_ptr {
|
||||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||||
if next_rreg < max_reg_val {
|
if next_rreg < max_reg_val {
|
||||||
ret.push(ABIArg::Reg {
|
ret.push(ABIArg::reg(
|
||||||
regs: ValueRegs::one(rreg(next_rreg).to_real_reg()),
|
rreg(next_rreg).to_real_reg(),
|
||||||
ty: I32,
|
I32,
|
||||||
extension: ir::ArgumentExtension::None,
|
ir::ArgumentExtension::None,
|
||||||
purpose: ir::ArgumentPurpose::Normal,
|
ir::ArgumentPurpose::Normal,
|
||||||
});
|
));
|
||||||
} else {
|
} else {
|
||||||
stack_args.push((
|
stack_args.push((
|
||||||
I32,
|
I32,
|
||||||
@@ -124,12 +125,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
|||||||
let max_stack = next_stack;
|
let max_stack = next_stack;
|
||||||
for (ty, ext, purpose) in stack_args.into_iter().rev() {
|
for (ty, ext, purpose) in stack_args.into_iter().rev() {
|
||||||
next_stack -= 4;
|
next_stack -= 4;
|
||||||
ret.push(ABIArg::Stack {
|
ret.push(ABIArg::stack(
|
||||||
offset: (max_stack - next_stack) as i64,
|
(max_stack - next_stack) as i64,
|
||||||
ty,
|
ty,
|
||||||
extension: ext,
|
ext,
|
||||||
purpose,
|
purpose,
|
||||||
});
|
));
|
||||||
}
|
}
|
||||||
assert_eq!(next_stack, 0);
|
assert_eq!(next_stack, 0);
|
||||||
|
|
||||||
|
|||||||
@@ -31,41 +31,41 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
|
|||||||
match ¶m.purpose {
|
match ¶m.purpose {
|
||||||
&ir::ArgumentPurpose::VMContext => {
|
&ir::ArgumentPurpose::VMContext => {
|
||||||
// This is SpiderMonkey's `WasmTlsReg`.
|
// This is SpiderMonkey's `WasmTlsReg`.
|
||||||
Some(ABIArg::Reg {
|
Some(ABIArg::reg(
|
||||||
regs: ValueRegs::one(regs::r14().to_real_reg()),
|
regs::r14().to_real_reg(),
|
||||||
ty: types::I64,
|
types::I64,
|
||||||
extension: param.extension,
|
param.extension,
|
||||||
purpose: param.purpose,
|
param.purpose,
|
||||||
})
|
))
|
||||||
}
|
}
|
||||||
&ir::ArgumentPurpose::SignatureId => {
|
&ir::ArgumentPurpose::SignatureId => {
|
||||||
// This is SpiderMonkey's `WasmTableCallSigReg`.
|
// This is SpiderMonkey's `WasmTableCallSigReg`.
|
||||||
Some(ABIArg::Reg {
|
Some(ABIArg::reg(
|
||||||
regs: ValueRegs::one(regs::r10().to_real_reg()),
|
regs::r10().to_real_reg(),
|
||||||
ty: types::I64,
|
types::I64,
|
||||||
extension: param.extension,
|
param.extension,
|
||||||
purpose: param.purpose,
|
param.purpose,
|
||||||
})
|
))
|
||||||
}
|
}
|
||||||
&ir::ArgumentPurpose::CalleeTLS => {
|
&ir::ArgumentPurpose::CalleeTLS => {
|
||||||
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
|
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
|
||||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||||
Some(ABIArg::Stack {
|
Some(ABIArg::stack(
|
||||||
offset: BALDRDASH_CALLEE_TLS_OFFSET,
|
BALDRDASH_CALLEE_TLS_OFFSET,
|
||||||
ty: ir::types::I64,
|
ir::types::I64,
|
||||||
extension: ir::ArgumentExtension::None,
|
ir::ArgumentExtension::None,
|
||||||
purpose: param.purpose,
|
param.purpose,
|
||||||
})
|
))
|
||||||
}
|
}
|
||||||
&ir::ArgumentPurpose::CallerTLS => {
|
&ir::ArgumentPurpose::CallerTLS => {
|
||||||
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
|
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
|
||||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||||
Some(ABIArg::Stack {
|
Some(ABIArg::stack(
|
||||||
offset: BALDRDASH_CALLER_TLS_OFFSET,
|
BALDRDASH_CALLER_TLS_OFFSET,
|
||||||
ty: ir::types::I64,
|
ir::types::I64,
|
||||||
extension: ir::ArgumentExtension::None,
|
ir::ArgumentExtension::None,
|
||||||
purpose: param.purpose,
|
param.purpose,
|
||||||
})
|
))
|
||||||
}
|
}
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
@@ -97,18 +97,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
|
|
||||||
fn compute_arg_locs(
|
fn compute_arg_locs(
|
||||||
call_conv: isa::CallConv,
|
call_conv: isa::CallConv,
|
||||||
|
flags: &settings::Flags,
|
||||||
params: &[ir::AbiParam],
|
params: &[ir::AbiParam],
|
||||||
args_or_rets: ArgsOrRets,
|
args_or_rets: ArgsOrRets,
|
||||||
add_ret_area_ptr: bool,
|
add_ret_area_ptr: bool,
|
||||||
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
||||||
let is_baldrdash = call_conv.extends_baldrdash();
|
let is_baldrdash = call_conv.extends_baldrdash();
|
||||||
|
let is_fastcall = call_conv.extends_windows_fastcall();
|
||||||
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
|
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
|
||||||
|
|
||||||
let mut next_gpr = 0;
|
let mut next_gpr = 0;
|
||||||
let mut next_vreg = 0;
|
let mut next_vreg = 0;
|
||||||
let mut next_stack: u64 = 0;
|
let mut next_stack: u64 = 0;
|
||||||
|
let mut next_param_idx = 0; // Fastcall cares about overall param index
|
||||||
let mut ret = vec![];
|
let mut ret = vec![];
|
||||||
|
|
||||||
|
if args_or_rets == ArgsOrRets::Args && is_fastcall {
|
||||||
|
// Fastcall always reserves 32 bytes of shadow space corresponding to
|
||||||
|
// the four initial in-arg parameters.
|
||||||
|
//
|
||||||
|
// (See:
|
||||||
|
// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160)
|
||||||
|
next_stack = 32;
|
||||||
|
}
|
||||||
|
|
||||||
if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
|
if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
|
||||||
// Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
|
// Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
|
||||||
// caller TLS-register values, respectively.
|
// caller TLS-register values, respectively.
|
||||||
@@ -159,94 +171,114 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Find regclass(es) of the register(s) used to store a value of this type.
|
// Find regclass(es) of the register(s) used to store a value of this type.
|
||||||
let (rcs, _) = Inst::rc_for_type(param.value_type)?;
|
let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
|
||||||
let intreg = rcs[0] == RegClass::I64;
|
|
||||||
let num_regs = rcs.len();
|
// Now assign ABIArgSlots for each register-sized part.
|
||||||
assert!(num_regs <= 2);
|
//
|
||||||
if num_regs == 2 {
|
// Note that the handling of `i128` values is unique here:
|
||||||
assert_eq!(rcs[0], rcs[1]);
|
//
|
||||||
|
// - If `enable_llvm_abi_extensions` is set in the flags, each
|
||||||
|
// `i128` is split into two `i64`s and assigned exactly as if it
|
||||||
|
// were two consecutive 64-bit args. This is consistent with LLVM's
|
||||||
|
// behavior, and is needed for some uses of Cranelift (e.g., the
|
||||||
|
// rustc backend).
|
||||||
|
//
|
||||||
|
// - Otherwise, both SysV and Fastcall specify behavior (use of
|
||||||
|
// vector register, a register pair, or passing by reference
|
||||||
|
// depending on the case), but for simplicity, we will just panic if
|
||||||
|
// an i128 type appears in a signature and the LLVM extensions flag
|
||||||
|
// is not set.
|
||||||
|
//
|
||||||
|
// For examples of how rustc compiles i128 args and return values on
|
||||||
|
// both SysV and Fastcall platforms, see:
|
||||||
|
// https://godbolt.org/z/PhG3ob
|
||||||
|
|
||||||
|
if param.value_type.bits() > 64
|
||||||
|
&& !param.value_type.is_vector()
|
||||||
|
&& !flags.enable_llvm_abi_extensions()
|
||||||
|
{
|
||||||
|
panic!(
|
||||||
|
"i128 args/return values not supported unless LLVM ABI extensions are enabled"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut regs: SmallVec<[RealReg; 2]> = smallvec![];
|
let mut slots = vec![];
|
||||||
for j in 0..num_regs {
|
for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
|
||||||
|
let intreg = *rc == RegClass::I64;
|
||||||
let nextreg = if intreg {
|
let nextreg = if intreg {
|
||||||
match args_or_rets {
|
match args_or_rets {
|
||||||
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr + j),
|
ArgsOrRets::Args => {
|
||||||
|
get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
|
||||||
|
}
|
||||||
ArgsOrRets::Rets => {
|
ArgsOrRets::Rets => {
|
||||||
get_intreg_for_retval_systemv(&call_conv, next_gpr + j, i + j)
|
get_intreg_for_retval(&call_conv, next_gpr, next_param_idx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
match args_or_rets {
|
match args_or_rets {
|
||||||
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg + j),
|
ArgsOrRets::Args => {
|
||||||
|
get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
|
||||||
|
}
|
||||||
ArgsOrRets::Rets => {
|
ArgsOrRets::Rets => {
|
||||||
get_fltreg_for_retval_systemv(&call_conv, next_vreg + j, i + j)
|
get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
next_param_idx += 1;
|
||||||
if let Some(reg) = nextreg {
|
if let Some(reg) = nextreg {
|
||||||
regs.push(reg.to_real_reg());
|
|
||||||
} else {
|
|
||||||
regs.clear();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if regs.len() > 0 {
|
|
||||||
let regs = match num_regs {
|
|
||||||
1 => ValueRegs::one(regs[0]),
|
|
||||||
2 => ValueRegs::two(regs[0], regs[1]),
|
|
||||||
_ => panic!("More than two registers unexpected"),
|
|
||||||
};
|
|
||||||
ret.push(ABIArg::Reg {
|
|
||||||
regs,
|
|
||||||
ty: param.value_type,
|
|
||||||
extension: param.extension,
|
|
||||||
purpose: param.purpose,
|
|
||||||
});
|
|
||||||
if intreg {
|
if intreg {
|
||||||
next_gpr += num_regs;
|
next_gpr += 1;
|
||||||
} else {
|
} else {
|
||||||
next_vreg += num_regs;
|
next_vreg += 1;
|
||||||
}
|
}
|
||||||
|
slots.push(ABIArgSlot::Reg {
|
||||||
|
reg: reg.to_real_reg(),
|
||||||
|
ty: *reg_ty,
|
||||||
|
extension: param.extension,
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
|
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
|
||||||
// stack alignment happens separately after all args.)
|
// stack alignment happens separately after all args.)
|
||||||
let size = (param.value_type.bits() / 8) as u64;
|
let size = (reg_ty.bits() / 8) as u64;
|
||||||
let size = std::cmp::max(size, 8);
|
let size = std::cmp::max(size, 8);
|
||||||
// Align.
|
// Align.
|
||||||
debug_assert!(size.is_power_of_two());
|
debug_assert!(size.is_power_of_two());
|
||||||
next_stack = (next_stack + size - 1) & !(size - 1);
|
next_stack = align_to(next_stack, size);
|
||||||
ret.push(ABIArg::Stack {
|
slots.push(ABIArgSlot::Stack {
|
||||||
offset: next_stack as i64,
|
offset: next_stack as i64,
|
||||||
ty: param.value_type,
|
ty: *reg_ty,
|
||||||
extension: param.extension,
|
extension: param.extension,
|
||||||
purpose: param.purpose,
|
|
||||||
});
|
});
|
||||||
next_stack += size;
|
next_stack += size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ret.push(ABIArg::Slots {
|
||||||
|
slots,
|
||||||
|
purpose: param.purpose,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
|
if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
|
||||||
ret.reverse();
|
ret.reverse();
|
||||||
}
|
}
|
||||||
|
|
||||||
let extra_arg = if add_ret_area_ptr {
|
let extra_arg = if add_ret_area_ptr {
|
||||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||||
if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
|
if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
|
||||||
ret.push(ABIArg::Reg {
|
ret.push(ABIArg::reg(
|
||||||
regs: ValueRegs::one(reg.to_real_reg()),
|
reg.to_real_reg(),
|
||||||
ty: types::I64,
|
types::I64,
|
||||||
extension: ir::ArgumentExtension::None,
|
ir::ArgumentExtension::None,
|
||||||
purpose: ir::ArgumentPurpose::Normal,
|
ir::ArgumentPurpose::Normal,
|
||||||
});
|
));
|
||||||
} else {
|
} else {
|
||||||
ret.push(ABIArg::Stack {
|
ret.push(ABIArg::stack(
|
||||||
offset: next_stack as i64,
|
next_stack as i64,
|
||||||
ty: types::I64,
|
types::I64,
|
||||||
extension: ir::ArgumentExtension::None,
|
ir::ArgumentExtension::None,
|
||||||
purpose: ir::ArgumentPurpose::Normal,
|
ir::ArgumentPurpose::Normal,
|
||||||
});
|
));
|
||||||
next_stack += 8;
|
next_stack += 8;
|
||||||
}
|
}
|
||||||
Some(ret.len() - 1)
|
Some(ret.len() - 1)
|
||||||
@@ -254,7 +286,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
next_stack = (next_stack + 15) & !15;
|
next_stack = align_to(next_stack, 16);
|
||||||
|
|
||||||
// To avoid overflow issues, limit the arg/return size to something reasonable.
|
// To avoid overflow issues, limit the arg/return size to something reasonable.
|
||||||
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
|
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
|
||||||
@@ -452,10 +484,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
// registers (all XMM registers are caller-save) so we can compute the
|
// registers (all XMM registers are caller-save) so we can compute the
|
||||||
// total size of the needed stack space easily.
|
// total size of the needed stack space easily.
|
||||||
let clobbered = get_callee_saves(&call_conv, clobbers);
|
let clobbered = get_callee_saves(&call_conv, clobbers);
|
||||||
let clobbered_size = 8 * clobbered.len() as u32;
|
let stack_size = compute_clobber_size(&clobbered) + fixed_frame_storage_size;
|
||||||
let stack_size = clobbered_size + fixed_frame_storage_size;
|
|
||||||
// Align to 16 bytes.
|
// Align to 16 bytes.
|
||||||
let stack_size = (stack_size + 15) & !15;
|
let stack_size = align_to(stack_size, 16);
|
||||||
let clobbered_size = stack_size - fixed_frame_storage_size;
|
let clobbered_size = stack_size - fixed_frame_storage_size;
|
||||||
// Adjust the stack pointer downward with one `sub rsp, IMM`
|
// Adjust the stack pointer downward with one `sub rsp, IMM`
|
||||||
// instruction.
|
// instruction.
|
||||||
@@ -473,16 +504,23 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
let r_reg = reg.to_reg();
|
let r_reg = reg.to_reg();
|
||||||
match r_reg.get_class() {
|
match r_reg.get_class() {
|
||||||
RegClass::I64 => {
|
RegClass::I64 => {
|
||||||
insts.push(Inst::mov_r_m(
|
insts.push(Inst::store(
|
||||||
OperandSize::Size64,
|
types::I64,
|
||||||
r_reg.to_reg(),
|
r_reg.to_reg(),
|
||||||
Amode::imm_reg(cur_offset, regs::rsp()),
|
Amode::imm_reg(cur_offset, regs::rsp()),
|
||||||
));
|
));
|
||||||
cur_offset += 8;
|
cur_offset += 8;
|
||||||
}
|
}
|
||||||
// No XMM regs are callee-save, so we do not need to implement
|
RegClass::V128 => {
|
||||||
// this.
|
cur_offset = align_to(cur_offset, 16);
|
||||||
_ => unimplemented!(),
|
insts.push(Inst::store(
|
||||||
|
types::I8X16,
|
||||||
|
r_reg.to_reg(),
|
||||||
|
Amode::imm_reg(cur_offset, regs::rsp()),
|
||||||
|
));
|
||||||
|
cur_offset += 16;
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -499,8 +537,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
let mut insts = SmallVec::new();
|
let mut insts = SmallVec::new();
|
||||||
|
|
||||||
let clobbered = get_callee_saves(&call_conv, clobbers);
|
let clobbered = get_callee_saves(&call_conv, clobbers);
|
||||||
let stack_size = 8 * clobbered.len() as u32;
|
let stack_size = compute_clobber_size(&clobbered);
|
||||||
let stack_size = (stack_size + 15) & !15;
|
let stack_size = align_to(stack_size, 16);
|
||||||
|
|
||||||
// Restore regs by loading from offsets of RSP.
|
// Restore regs by loading from offsets of RSP.
|
||||||
let mut cur_offset = 0;
|
let mut cur_offset = 0;
|
||||||
@@ -514,7 +552,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
));
|
));
|
||||||
cur_offset += 8;
|
cur_offset += 8;
|
||||||
}
|
}
|
||||||
_ => unimplemented!(),
|
RegClass::V128 => {
|
||||||
|
cur_offset = align_to(cur_offset, 16);
|
||||||
|
insts.push(Inst::load(
|
||||||
|
types::I8X16,
|
||||||
|
Amode::imm_reg(cur_offset, regs::rsp()),
|
||||||
|
Writable::from_reg(rreg.to_reg()),
|
||||||
|
ExtKind::None,
|
||||||
|
));
|
||||||
|
cur_offset += 16;
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Adjust RSP back upward.
|
// Adjust RSP back upward.
|
||||||
@@ -592,14 +640,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
// Baldrdash should not use struct args.
|
// Baldrdash should not use struct args.
|
||||||
assert!(!call_conv.extends_baldrdash());
|
assert!(!call_conv.extends_baldrdash());
|
||||||
let mut insts = SmallVec::new();
|
let mut insts = SmallVec::new();
|
||||||
let arg0 = get_intreg_for_arg_systemv(&call_conv, 0).unwrap();
|
let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap();
|
||||||
let arg1 = get_intreg_for_arg_systemv(&call_conv, 1).unwrap();
|
let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap();
|
||||||
let arg2 = get_intreg_for_arg_systemv(&call_conv, 2).unwrap();
|
let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap();
|
||||||
// We need a register to load the address of `memcpy()` below and we
|
// We need a register to load the address of `memcpy()` below and we
|
||||||
// don't have a lowering context to allocate a temp here; so just use a
|
// don't have a lowering context to allocate a temp here; so just use a
|
||||||
// register we know we are free to mutate as part of this sequence
|
// register we know we are free to mutate as part of this sequence
|
||||||
// (because it is clobbered by the call as per the ABI anyway).
|
// (because it is clobbered by the call as per the ABI anyway).
|
||||||
let memcpy_addr = get_intreg_for_arg_systemv(&call_conv, 3).unwrap();
|
let memcpy_addr = get_intreg_for_arg(&call_conv, 3, 3).unwrap();
|
||||||
insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64));
|
insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64));
|
||||||
insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64));
|
insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64));
|
||||||
insts.extend(
|
insts.extend(
|
||||||
@@ -648,10 +696,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
|
|
||||||
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
|
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
|
||||||
let mut caller_saved = vec![
|
let mut caller_saved = vec![
|
||||||
// Systemv calling convention:
|
// intersection of Systemv and FastCall calling conventions:
|
||||||
// - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved).
|
// - GPR: all except RDI, RSI, RBX, RBP, R12 to R15.
|
||||||
Writable::from_reg(regs::rsi()),
|
// SysV adds RDI, RSI (FastCall makes these callee-saved).
|
||||||
Writable::from_reg(regs::rdi()),
|
|
||||||
Writable::from_reg(regs::rax()),
|
Writable::from_reg(regs::rax()),
|
||||||
Writable::from_reg(regs::rcx()),
|
Writable::from_reg(regs::rcx()),
|
||||||
Writable::from_reg(regs::rdx()),
|
Writable::from_reg(regs::rdx()),
|
||||||
@@ -659,25 +706,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
Writable::from_reg(regs::r9()),
|
Writable::from_reg(regs::r9()),
|
||||||
Writable::from_reg(regs::r10()),
|
Writable::from_reg(regs::r10()),
|
||||||
Writable::from_reg(regs::r11()),
|
Writable::from_reg(regs::r11()),
|
||||||
// - XMM: all the registers!
|
// - XMM: XMM0-5. SysV adds the rest (XMM6-XMM15).
|
||||||
Writable::from_reg(regs::xmm0()),
|
Writable::from_reg(regs::xmm0()),
|
||||||
Writable::from_reg(regs::xmm1()),
|
Writable::from_reg(regs::xmm1()),
|
||||||
Writable::from_reg(regs::xmm2()),
|
Writable::from_reg(regs::xmm2()),
|
||||||
Writable::from_reg(regs::xmm3()),
|
Writable::from_reg(regs::xmm3()),
|
||||||
Writable::from_reg(regs::xmm4()),
|
Writable::from_reg(regs::xmm4()),
|
||||||
Writable::from_reg(regs::xmm5()),
|
Writable::from_reg(regs::xmm5()),
|
||||||
Writable::from_reg(regs::xmm6()),
|
|
||||||
Writable::from_reg(regs::xmm7()),
|
|
||||||
Writable::from_reg(regs::xmm8()),
|
|
||||||
Writable::from_reg(regs::xmm9()),
|
|
||||||
Writable::from_reg(regs::xmm10()),
|
|
||||||
Writable::from_reg(regs::xmm11()),
|
|
||||||
Writable::from_reg(regs::xmm12()),
|
|
||||||
Writable::from_reg(regs::xmm13()),
|
|
||||||
Writable::from_reg(regs::xmm14()),
|
|
||||||
Writable::from_reg(regs::xmm15()),
|
|
||||||
];
|
];
|
||||||
|
|
||||||
|
if !call_conv_of_callee.extends_windows_fastcall() {
|
||||||
|
caller_saved.push(Writable::from_reg(regs::rsi()));
|
||||||
|
caller_saved.push(Writable::from_reg(regs::rdi()));
|
||||||
|
caller_saved.push(Writable::from_reg(regs::xmm6()));
|
||||||
|
caller_saved.push(Writable::from_reg(regs::xmm7()));
|
||||||
|
caller_saved.push(Writable::from_reg(regs::xmm8()));
|
||||||
|
caller_saved.push(Writable::from_reg(regs::xmm9()));
|
||||||
|
caller_saved.push(Writable::from_reg(regs::xmm10()));
|
||||||
|
caller_saved.push(Writable::from_reg(regs::xmm11()));
|
||||||
|
caller_saved.push(Writable::from_reg(regs::xmm12()));
|
||||||
|
caller_saved.push(Writable::from_reg(regs::xmm13()));
|
||||||
|
caller_saved.push(Writable::from_reg(regs::xmm14()));
|
||||||
|
caller_saved.push(Writable::from_reg(regs::xmm15()));
|
||||||
|
}
|
||||||
|
|
||||||
if call_conv_of_callee.extends_baldrdash() {
|
if call_conv_of_callee.extends_baldrdash() {
|
||||||
caller_saved.push(Writable::from_reg(regs::r12()));
|
caller_saved.push(Writable::from_reg(regs::r12()));
|
||||||
caller_saved.push(Writable::from_reg(regs::r13()));
|
caller_saved.push(Writable::from_reg(regs::r13()));
|
||||||
@@ -739,49 +791,67 @@ impl From<StackAMode> for SyntheticAmode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
|
||||||
match call_conv {
|
let is_fastcall = match call_conv {
|
||||||
CallConv::Fast
|
CallConv::Fast
|
||||||
| CallConv::Cold
|
| CallConv::Cold
|
||||||
| CallConv::SystemV
|
| CallConv::SystemV
|
||||||
| CallConv::BaldrdashSystemV
|
| CallConv::BaldrdashSystemV
|
||||||
| CallConv::Baldrdash2020 => {}
|
| CallConv::Baldrdash2020 => false,
|
||||||
_ => panic!("int args only supported for SysV calling convention"),
|
CallConv::WindowsFastcall => true,
|
||||||
|
_ => panic!("int args only supported for SysV or Fastcall calling convention"),
|
||||||
};
|
};
|
||||||
match idx {
|
|
||||||
0 => Some(regs::rdi()),
|
// Fastcall counts by absolute argument number; SysV counts by argument of
|
||||||
1 => Some(regs::rsi()),
|
// this (integer) class.
|
||||||
2 => Some(regs::rdx()),
|
let i = if is_fastcall { arg_idx } else { idx };
|
||||||
3 => Some(regs::rcx()),
|
match (i, is_fastcall) {
|
||||||
4 => Some(regs::r8()),
|
(0, false) => Some(regs::rdi()),
|
||||||
5 => Some(regs::r9()),
|
(1, false) => Some(regs::rsi()),
|
||||||
|
(2, false) => Some(regs::rdx()),
|
||||||
|
(3, false) => Some(regs::rcx()),
|
||||||
|
(4, false) => Some(regs::r8()),
|
||||||
|
(5, false) => Some(regs::r9()),
|
||||||
|
(0, true) => Some(regs::rcx()),
|
||||||
|
(1, true) => Some(regs::rdx()),
|
||||||
|
(2, true) => Some(regs::r8()),
|
||||||
|
(3, true) => Some(regs::r9()),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
|
||||||
match call_conv {
|
let is_fastcall = match call_conv {
|
||||||
CallConv::Fast
|
CallConv::Fast
|
||||||
| CallConv::Cold
|
| CallConv::Cold
|
||||||
| CallConv::SystemV
|
| CallConv::SystemV
|
||||||
| CallConv::BaldrdashSystemV
|
| CallConv::BaldrdashSystemV
|
||||||
| CallConv::Baldrdash2020 => {}
|
| CallConv::Baldrdash2020 => false,
|
||||||
_ => panic!("float args only supported for SysV calling convention"),
|
CallConv::WindowsFastcall => true,
|
||||||
|
_ => panic!("float args only supported for SysV or Fastcall calling convention"),
|
||||||
};
|
};
|
||||||
match idx {
|
|
||||||
0 => Some(regs::xmm0()),
|
// Fastcall counts by absolute argument number; SysV counts by argument of
|
||||||
1 => Some(regs::xmm1()),
|
// this (floating-point) class.
|
||||||
2 => Some(regs::xmm2()),
|
let i = if is_fastcall { arg_idx } else { idx };
|
||||||
3 => Some(regs::xmm3()),
|
match (i, is_fastcall) {
|
||||||
4 => Some(regs::xmm4()),
|
(0, false) => Some(regs::xmm0()),
|
||||||
5 => Some(regs::xmm5()),
|
(1, false) => Some(regs::xmm1()),
|
||||||
6 => Some(regs::xmm6()),
|
(2, false) => Some(regs::xmm2()),
|
||||||
7 => Some(regs::xmm7()),
|
(3, false) => Some(regs::xmm3()),
|
||||||
|
(4, false) => Some(regs::xmm4()),
|
||||||
|
(5, false) => Some(regs::xmm5()),
|
||||||
|
(6, false) => Some(regs::xmm6()),
|
||||||
|
(7, false) => Some(regs::xmm7()),
|
||||||
|
(0, true) => Some(regs::xmm0()),
|
||||||
|
(1, true) => Some(regs::xmm1()),
|
||||||
|
(2, true) => Some(regs::xmm2()),
|
||||||
|
(3, true) => Some(regs::xmm3()),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_intreg_for_retval_systemv(
|
fn get_intreg_for_retval(
|
||||||
call_conv: &CallConv,
|
call_conv: &CallConv,
|
||||||
intreg_idx: usize,
|
intreg_idx: usize,
|
||||||
retval_idx: usize,
|
retval_idx: usize,
|
||||||
@@ -799,11 +869,16 @@ fn get_intreg_for_retval_systemv(
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
CallConv::WindowsFastcall => match intreg_idx {
|
||||||
|
0 => Some(regs::rax()),
|
||||||
|
1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
|
||||||
|
_ => None,
|
||||||
|
},
|
||||||
|
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_fltreg_for_retval_systemv(
|
fn get_fltreg_for_retval(
|
||||||
call_conv: &CallConv,
|
call_conv: &CallConv,
|
||||||
fltreg_idx: usize,
|
fltreg_idx: usize,
|
||||||
retval_idx: usize,
|
retval_idx: usize,
|
||||||
@@ -821,7 +896,11 @@ fn get_fltreg_for_retval_systemv(
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
CallConv::WindowsFastcall => match fltreg_idx {
|
||||||
|
0 => Some(regs::xmm0()),
|
||||||
|
_ => None,
|
||||||
|
},
|
||||||
|
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -854,6 +933,21 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_callee_save_fastcall(r: RealReg) -> bool {
|
||||||
|
use regs::*;
|
||||||
|
match r.get_class() {
|
||||||
|
RegClass::I64 => match r.get_hw_encoding() as u8 {
|
||||||
|
ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
|
||||||
|
_ => false,
|
||||||
|
},
|
||||||
|
RegClass::V128 => match r.get_hw_encoding() as u8 {
|
||||||
|
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
|
||||||
|
_ => false,
|
||||||
|
},
|
||||||
|
_ => panic!("Unknown register class: {:?}", r.get_class()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
|
fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
|
||||||
let mut regs: Vec<Writable<RealReg>> = match call_conv {
|
let mut regs: Vec<Writable<RealReg>> = match call_conv {
|
||||||
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
|
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
|
||||||
@@ -869,7 +963,11 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
|
|||||||
.cloned()
|
.cloned()
|
||||||
.filter(|r| is_callee_save_systemv(r.to_reg()))
|
.filter(|r| is_callee_save_systemv(r.to_reg()))
|
||||||
.collect(),
|
.collect(),
|
||||||
CallConv::WindowsFastcall => todo!("windows fastcall"),
|
CallConv::WindowsFastcall => regs
|
||||||
|
.iter()
|
||||||
|
.cloned()
|
||||||
|
.filter(|r| is_callee_save_fastcall(r.to_reg()))
|
||||||
|
.collect(),
|
||||||
CallConv::Probestack => todo!("probestack?"),
|
CallConv::Probestack => todo!("probestack?"),
|
||||||
};
|
};
|
||||||
// Sort registers for deterministic code output. We can do an unstable sort because the
|
// Sort registers for deterministic code output. We can do an unstable sort because the
|
||||||
@@ -877,3 +975,20 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
|
|||||||
regs.sort_unstable_by_key(|r| r.to_reg().get_index());
|
regs.sort_unstable_by_key(|r| r.to_reg().get_index());
|
||||||
regs
|
regs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
|
||||||
|
let mut clobbered_size = 0;
|
||||||
|
for reg in clobbers {
|
||||||
|
match reg.to_reg().get_class() {
|
||||||
|
RegClass::I64 => {
|
||||||
|
clobbered_size += 8;
|
||||||
|
}
|
||||||
|
RegClass::V128 => {
|
||||||
|
clobbered_size = align_to(clobbered_size, 16);
|
||||||
|
clobbered_size += 16;
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clobbered_size
|
||||||
|
}
|
||||||
|
|||||||
@@ -456,6 +456,7 @@ pub(crate) enum InstructionSet {
|
|||||||
Popcnt,
|
Popcnt,
|
||||||
Lzcnt,
|
Lzcnt,
|
||||||
BMI1,
|
BMI1,
|
||||||
|
#[allow(dead_code)] // never constructed (yet).
|
||||||
BMI2,
|
BMI2,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -23,11 +23,20 @@ use regalloc::{
|
|||||||
};
|
};
|
||||||
use std::string::String;
|
use std::string::String;
|
||||||
|
|
||||||
// Hardware encodings for a few registers.
|
// Hardware encodings (note the special rax, rcx, rdx, rbx order).
|
||||||
|
|
||||||
|
pub const ENC_RAX: u8 = 0;
|
||||||
|
pub const ENC_RCX: u8 = 1;
|
||||||
|
pub const ENC_RDX: u8 = 2;
|
||||||
pub const ENC_RBX: u8 = 3;
|
pub const ENC_RBX: u8 = 3;
|
||||||
pub const ENC_RSP: u8 = 4;
|
pub const ENC_RSP: u8 = 4;
|
||||||
pub const ENC_RBP: u8 = 5;
|
pub const ENC_RBP: u8 = 5;
|
||||||
|
pub const ENC_RSI: u8 = 6;
|
||||||
|
pub const ENC_RDI: u8 = 7;
|
||||||
|
pub const ENC_R8: u8 = 8;
|
||||||
|
pub const ENC_R9: u8 = 9;
|
||||||
|
pub const ENC_R10: u8 = 10;
|
||||||
|
pub const ENC_R11: u8 = 11;
|
||||||
pub const ENC_R12: u8 = 12;
|
pub const ENC_R12: u8 = 12;
|
||||||
pub const ENC_R13: u8 = 13;
|
pub const ENC_R13: u8 = 13;
|
||||||
pub const ENC_R14: u8 = 14;
|
pub const ENC_R14: u8 = 14;
|
||||||
@@ -38,31 +47,31 @@ fn gpr(enc: u8, index: u8) -> Reg {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn rsi() -> Reg {
|
pub(crate) fn rsi() -> Reg {
|
||||||
gpr(6, 16)
|
gpr(ENC_RSI, 16)
|
||||||
}
|
}
|
||||||
pub(crate) fn rdi() -> Reg {
|
pub(crate) fn rdi() -> Reg {
|
||||||
gpr(7, 17)
|
gpr(ENC_RDI, 17)
|
||||||
}
|
}
|
||||||
pub(crate) fn rax() -> Reg {
|
pub(crate) fn rax() -> Reg {
|
||||||
gpr(0, 18)
|
gpr(ENC_RAX, 18)
|
||||||
}
|
}
|
||||||
pub(crate) fn rcx() -> Reg {
|
pub(crate) fn rcx() -> Reg {
|
||||||
gpr(1, 19)
|
gpr(ENC_RCX, 19)
|
||||||
}
|
}
|
||||||
pub(crate) fn rdx() -> Reg {
|
pub(crate) fn rdx() -> Reg {
|
||||||
gpr(2, 20)
|
gpr(ENC_RDX, 20)
|
||||||
}
|
}
|
||||||
pub(crate) fn r8() -> Reg {
|
pub(crate) fn r8() -> Reg {
|
||||||
gpr(8, 21)
|
gpr(ENC_R8, 21)
|
||||||
}
|
}
|
||||||
pub(crate) fn r9() -> Reg {
|
pub(crate) fn r9() -> Reg {
|
||||||
gpr(9, 22)
|
gpr(ENC_R9, 22)
|
||||||
}
|
}
|
||||||
pub(crate) fn r10() -> Reg {
|
pub(crate) fn r10() -> Reg {
|
||||||
gpr(10, 23)
|
gpr(ENC_R10, 23)
|
||||||
}
|
}
|
||||||
pub(crate) fn r11() -> Reg {
|
pub(crate) fn r11() -> Reg {
|
||||||
gpr(11, 24)
|
gpr(ENC_R11, 24)
|
||||||
}
|
}
|
||||||
pub(crate) fn r12() -> Reg {
|
pub(crate) fn r12() -> Reg {
|
||||||
gpr(ENC_R12, 25)
|
gpr(ENC_R12, 25)
|
||||||
|
|||||||
@@ -124,19 +124,18 @@ use std::convert::TryFrom;
|
|||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
|
||||||
/// A location for an argument or return value.
|
/// A location for (part of) an argument or return value. These "storage slots"
|
||||||
#[derive(Clone, Copy, Debug)]
|
/// are specified for each register-sized part of an argument.
|
||||||
pub enum ABIArg {
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
/// In a real register (or set of registers).
|
pub enum ABIArgSlot {
|
||||||
|
/// In a real register.
|
||||||
Reg {
|
Reg {
|
||||||
/// Register(s) that hold this arg.
|
/// Register that holds this arg.
|
||||||
regs: ValueRegs<RealReg>,
|
reg: RealReg,
|
||||||
/// Value type of this arg.
|
/// Value type of this arg.
|
||||||
ty: ir::Type,
|
ty: ir::Type,
|
||||||
/// Should this arg be zero- or sign-extended?
|
/// Should this arg be zero- or sign-extended?
|
||||||
extension: ir::ArgumentExtension,
|
extension: ir::ArgumentExtension,
|
||||||
/// Purpose of this arg.
|
|
||||||
purpose: ir::ArgumentPurpose,
|
|
||||||
},
|
},
|
||||||
/// Arguments only: on stack, at given offset from SP at entry.
|
/// Arguments only: on stack, at given offset from SP at entry.
|
||||||
Stack {
|
Stack {
|
||||||
@@ -146,6 +145,26 @@ pub enum ABIArg {
|
|||||||
ty: ir::Type,
|
ty: ir::Type,
|
||||||
/// Should this arg be zero- or sign-extended?
|
/// Should this arg be zero- or sign-extended?
|
||||||
extension: ir::ArgumentExtension,
|
extension: ir::ArgumentExtension,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An ABIArg is composed of one or more parts. This allows for a CLIF-level
|
||||||
|
/// Value to be passed with its parts in more than one location at the ABI
|
||||||
|
/// level. For example, a 128-bit integer may be passed in two 64-bit registers,
|
||||||
|
/// or even a 64-bit register and a 64-bit stack slot, on a 64-bit machine. The
|
||||||
|
/// number of "parts" should correspond to the number of registers used to store
|
||||||
|
/// this type according to the machine backend.
|
||||||
|
///
|
||||||
|
/// As an invariant, the `purpose` for every part must match. As a further
|
||||||
|
/// invariant, a `StructArg` part cannot appear with any other part.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum ABIArg {
|
||||||
|
/// Storage slots (registers or stack locations) for each part of the
|
||||||
|
/// argument value. The number of slots must equal the number of register
|
||||||
|
/// parts used to store a value of this type.
|
||||||
|
Slots {
|
||||||
|
/// Slots, one per register part.
|
||||||
|
slots: Vec<ABIArgSlot>,
|
||||||
/// Purpose of this arg.
|
/// Purpose of this arg.
|
||||||
purpose: ir::ArgumentPurpose,
|
purpose: ir::ArgumentPurpose,
|
||||||
},
|
},
|
||||||
@@ -167,21 +186,50 @@ pub enum ABIArg {
|
|||||||
|
|
||||||
impl ABIArg {
|
impl ABIArg {
|
||||||
/// Get the purpose of this arg.
|
/// Get the purpose of this arg.
|
||||||
fn get_purpose(self) -> ir::ArgumentPurpose {
|
fn get_purpose(&self) -> ir::ArgumentPurpose {
|
||||||
match self {
|
match self {
|
||||||
ABIArg::Reg { purpose, .. } => purpose,
|
&ABIArg::Slots { purpose, .. } => purpose,
|
||||||
ABIArg::Stack { purpose, .. } => purpose,
|
&ABIArg::StructArg { purpose, .. } => purpose,
|
||||||
ABIArg::StructArg { purpose, .. } => purpose,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Is this a StructArg?
|
/// Is this a StructArg?
|
||||||
fn is_struct_arg(self) -> bool {
|
fn is_struct_arg(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
ABIArg::StructArg { .. } => true,
|
&ABIArg::StructArg { .. } => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create an ABIArg from one register.
|
||||||
|
pub fn reg(
|
||||||
|
reg: RealReg,
|
||||||
|
ty: ir::Type,
|
||||||
|
extension: ir::ArgumentExtension,
|
||||||
|
purpose: ir::ArgumentPurpose,
|
||||||
|
) -> ABIArg {
|
||||||
|
ABIArg::Slots {
|
||||||
|
slots: vec![ABIArgSlot::Reg { reg, ty, extension }],
|
||||||
|
purpose,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create an ABIArg from one stack slot.
|
||||||
|
pub fn stack(
|
||||||
|
offset: i64,
|
||||||
|
ty: ir::Type,
|
||||||
|
extension: ir::ArgumentExtension,
|
||||||
|
purpose: ir::ArgumentPurpose,
|
||||||
|
) -> ABIArg {
|
||||||
|
ABIArg::Slots {
|
||||||
|
slots: vec![ABIArgSlot::Stack {
|
||||||
|
offset,
|
||||||
|
ty,
|
||||||
|
extension,
|
||||||
|
}],
|
||||||
|
purpose,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Are we computing information about arguments or return values? Much of the
|
/// Are we computing information about arguments or return values? Much of the
|
||||||
@@ -275,6 +323,7 @@ pub trait ABIMachineSpec {
|
|||||||
/// index of the extra synthetic arg that was added.
|
/// index of the extra synthetic arg that was added.
|
||||||
fn compute_arg_locs(
|
fn compute_arg_locs(
|
||||||
call_conv: isa::CallConv,
|
call_conv: isa::CallConv,
|
||||||
|
flags: &settings::Flags,
|
||||||
params: &[ir::AbiParam],
|
params: &[ir::AbiParam],
|
||||||
args_or_rets: ArgsOrRets,
|
args_or_rets: ArgsOrRets,
|
||||||
add_ret_area_ptr: bool,
|
add_ret_area_ptr: bool,
|
||||||
@@ -461,11 +510,15 @@ struct ABISig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl ABISig {
|
impl ABISig {
|
||||||
fn from_func_sig<M: ABIMachineSpec>(sig: &ir::Signature) -> CodegenResult<ABISig> {
|
fn from_func_sig<M: ABIMachineSpec>(
|
||||||
|
sig: &ir::Signature,
|
||||||
|
flags: &settings::Flags,
|
||||||
|
) -> CodegenResult<ABISig> {
|
||||||
// Compute args and retvals from signature. Handle retvals first,
|
// Compute args and retvals from signature. Handle retvals first,
|
||||||
// because we may need to add a return-area arg to the args.
|
// because we may need to add a return-area arg to the args.
|
||||||
let (rets, stack_ret_space, _) = M::compute_arg_locs(
|
let (rets, stack_ret_space, _) = M::compute_arg_locs(
|
||||||
sig.call_conv,
|
sig.call_conv,
|
||||||
|
flags,
|
||||||
&sig.returns,
|
&sig.returns,
|
||||||
ArgsOrRets::Rets,
|
ArgsOrRets::Rets,
|
||||||
/* extra ret-area ptr = */ false,
|
/* extra ret-area ptr = */ false,
|
||||||
@@ -473,6 +526,7 @@ impl ABISig {
|
|||||||
let need_stack_return_area = stack_ret_space > 0;
|
let need_stack_return_area = stack_ret_space > 0;
|
||||||
let (args, stack_arg_space, stack_ret_arg) = M::compute_arg_locs(
|
let (args, stack_arg_space, stack_ret_arg) = M::compute_arg_locs(
|
||||||
sig.call_conv,
|
sig.call_conv,
|
||||||
|
flags,
|
||||||
&sig.params,
|
&sig.params,
|
||||||
ArgsOrRets::Args,
|
ArgsOrRets::Args,
|
||||||
need_stack_return_area,
|
need_stack_return_area,
|
||||||
@@ -557,8 +611,11 @@ fn get_special_purpose_param_register(
|
|||||||
purpose: ir::ArgumentPurpose,
|
purpose: ir::ArgumentPurpose,
|
||||||
) -> Option<Reg> {
|
) -> Option<Reg> {
|
||||||
let idx = f.signature.special_param_index(purpose)?;
|
let idx = f.signature.special_param_index(purpose)?;
|
||||||
match abi.args[idx] {
|
match &abi.args[idx] {
|
||||||
ABIArg::Reg { regs, .. } => Some(regs.only_reg().unwrap().to_reg()),
|
&ABIArg::Slots { ref slots, .. } => match &slots[0] {
|
||||||
|
&ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()),
|
||||||
|
_ => None,
|
||||||
|
},
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -569,7 +626,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
|||||||
debug!("ABI: func signature {:?}", f.signature);
|
debug!("ABI: func signature {:?}", f.signature);
|
||||||
|
|
||||||
let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature);
|
let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature);
|
||||||
let sig = ABISig::from_func_sig::<M>(&ir_sig)?;
|
let sig = ABISig::from_func_sig::<M>(&ir_sig, &flags)?;
|
||||||
|
|
||||||
let call_conv = f.signature.call_conv;
|
let call_conv = f.signature.call_conv;
|
||||||
// Only these calling conventions are supported.
|
// Only these calling conventions are supported.
|
||||||
@@ -577,7 +634,8 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
|||||||
call_conv == isa::CallConv::SystemV
|
call_conv == isa::CallConv::SystemV
|
||||||
|| call_conv == isa::CallConv::Fast
|
|| call_conv == isa::CallConv::Fast
|
||||||
|| call_conv == isa::CallConv::Cold
|
|| call_conv == isa::CallConv::Cold
|
||||||
|| call_conv.extends_baldrdash(),
|
|| call_conv.extends_baldrdash()
|
||||||
|
|| call_conv.extends_windows_fastcall(),
|
||||||
"Unsupported calling convention: {:?}",
|
"Unsupported calling convention: {:?}",
|
||||||
call_conv
|
call_conv
|
||||||
);
|
);
|
||||||
@@ -776,19 +834,6 @@ fn ty_from_ty_hint_or_reg_class<M: ABIMachineSpec>(r: Reg, ty: Option<Type>) ->
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_move_multi<M: ABIMachineSpec>(
|
|
||||||
dst: ValueRegs<Writable<Reg>>,
|
|
||||||
src: ValueRegs<Reg>,
|
|
||||||
ty: Type,
|
|
||||||
) -> SmallInstVec<M::I> {
|
|
||||||
let mut ret = smallvec![];
|
|
||||||
let (_, tys) = M::I::rc_for_type(ty).unwrap();
|
|
||||||
for ((&dst, &src), &ty) in dst.regs().iter().zip(src.regs().iter()).zip(tys.iter()) {
|
|
||||||
ret.push(M::gen_move(dst, src, ty));
|
|
||||||
}
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
|
|
||||||
fn gen_load_stack_multi<M: ABIMachineSpec>(
|
fn gen_load_stack_multi<M: ABIMachineSpec>(
|
||||||
from: StackAMode,
|
from: StackAMode,
|
||||||
dst: ValueRegs<Writable<Reg>>,
|
dst: ValueRegs<Writable<Reg>>,
|
||||||
@@ -821,22 +866,6 @@ fn gen_store_stack_multi<M: ABIMachineSpec>(
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_store_base_offset_multi<M: ABIMachineSpec>(
|
|
||||||
base: Reg,
|
|
||||||
mut offset: i32,
|
|
||||||
src: ValueRegs<Reg>,
|
|
||||||
ty: Type,
|
|
||||||
) -> SmallInstVec<M::I> {
|
|
||||||
let mut ret = smallvec![];
|
|
||||||
let (_, tys) = M::I::rc_for_type(ty).unwrap();
|
|
||||||
// N.B.: registers are given in the `ValueRegs` in target endian order.
|
|
||||||
for (&src, &ty) in src.regs().iter().zip(tys.iter()) {
|
|
||||||
ret.push(M::gen_store_base_offset(base, offset, src, ty));
|
|
||||||
offset += ty.bytes() as i32;
|
|
||||||
}
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
|
|
||||||
fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature {
|
fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature {
|
||||||
let params_structret = sig
|
let params_structret = sig
|
||||||
.params
|
.params
|
||||||
@@ -892,10 +921,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
|||||||
|
|
||||||
fn liveins(&self) -> Set<RealReg> {
|
fn liveins(&self) -> Set<RealReg> {
|
||||||
let mut set: Set<RealReg> = Set::empty();
|
let mut set: Set<RealReg> = Set::empty();
|
||||||
for &arg in &self.sig.args {
|
for arg in &self.sig.args {
|
||||||
if let ABIArg::Reg { regs, .. } = arg {
|
if let &ABIArg::Slots { ref slots, .. } = arg {
|
||||||
for &r in regs.regs() {
|
for slot in slots {
|
||||||
set.insert(r);
|
if let ABIArgSlot::Reg { reg, .. } = slot {
|
||||||
|
set.insert(*reg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -904,10 +935,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
|||||||
|
|
||||||
fn liveouts(&self) -> Set<RealReg> {
|
fn liveouts(&self) -> Set<RealReg> {
|
||||||
let mut set: Set<RealReg> = Set::empty();
|
let mut set: Set<RealReg> = Set::empty();
|
||||||
for &ret in &self.sig.rets {
|
for ret in &self.sig.rets {
|
||||||
if let ABIArg::Reg { regs, .. } = ret {
|
if let &ABIArg::Slots { ref slots, .. } = ret {
|
||||||
for &r in regs.regs() {
|
for slot in slots {
|
||||||
set.insert(r);
|
if let ABIArgSlot::Reg { reg, .. } = slot {
|
||||||
|
set.insert(*reg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -935,30 +968,44 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
|||||||
idx: usize,
|
idx: usize,
|
||||||
into_regs: ValueRegs<Writable<Reg>>,
|
into_regs: ValueRegs<Writable<Reg>>,
|
||||||
) -> SmallInstVec<Self::I> {
|
) -> SmallInstVec<Self::I> {
|
||||||
|
let mut insts = smallvec![];
|
||||||
match &self.sig.args[idx] {
|
match &self.sig.args[idx] {
|
||||||
|
&ABIArg::Slots { ref slots, .. } => {
|
||||||
|
assert_eq!(into_regs.len(), slots.len());
|
||||||
|
for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
|
||||||
|
match slot {
|
||||||
// Extension mode doesn't matter (we're copying out, not in; we
|
// Extension mode doesn't matter (we're copying out, not in; we
|
||||||
// ignore high bits by convention).
|
// ignore high bits by convention).
|
||||||
&ABIArg::Reg { regs, ty, .. } => {
|
&ABIArgSlot::Reg { reg, ty, .. } => {
|
||||||
gen_move_multi::<M>(into_regs, regs.map(|r| r.to_reg()), ty)
|
insts.push(M::gen_move(*into_reg, reg.to_reg(), ty));
|
||||||
}
|
}
|
||||||
&ABIArg::Stack { offset, ty, .. } => gen_load_stack_multi::<M>(
|
&ABIArgSlot::Stack { offset, ty, .. } => {
|
||||||
|
insts.push(M::gen_load_stack(
|
||||||
StackAMode::FPOffset(
|
StackAMode::FPOffset(
|
||||||
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
|
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
|
||||||
ty,
|
ty,
|
||||||
),
|
),
|
||||||
into_regs,
|
*into_reg,
|
||||||
ty,
|
ty,
|
||||||
),
|
));
|
||||||
&ABIArg::StructArg { offset, .. } => smallvec![M::gen_get_stack_addr(
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
&ABIArg::StructArg { offset, .. } => {
|
||||||
|
let into_reg = into_regs.only_reg().unwrap();
|
||||||
|
insts.push(M::gen_get_stack_addr(
|
||||||
StackAMode::FPOffset(
|
StackAMode::FPOffset(
|
||||||
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
|
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
|
||||||
I8,
|
I8,
|
||||||
),
|
),
|
||||||
into_regs.only_reg().unwrap(),
|
into_reg,
|
||||||
I8,
|
I8,
|
||||||
)],
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
insts
|
||||||
|
}
|
||||||
|
|
||||||
fn arg_is_needed_in_body(&self, idx: usize) -> bool {
|
fn arg_is_needed_in_body(&self, idx: usize) -> bool {
|
||||||
match self.sig.args[idx].get_purpose() {
|
match self.sig.args[idx].get_purpose() {
|
||||||
@@ -978,41 +1025,38 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
|||||||
let mut ret = smallvec![];
|
let mut ret = smallvec![];
|
||||||
let word_bits = M::word_bits() as u8;
|
let word_bits = M::word_bits() as u8;
|
||||||
match &self.sig.rets[idx] {
|
match &self.sig.rets[idx] {
|
||||||
&ABIArg::Reg {
|
&ABIArg::Slots { ref slots, .. } => {
|
||||||
regs,
|
assert_eq!(from_regs.len(), slots.len());
|
||||||
ty,
|
for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
|
||||||
extension,
|
match slot {
|
||||||
..
|
&ABIArgSlot::Reg {
|
||||||
|
reg, ty, extension, ..
|
||||||
} => {
|
} => {
|
||||||
let from_bits = ty_bits(ty) as u8;
|
let from_bits = ty_bits(ty) as u8;
|
||||||
let dest_regs = writable_value_regs(regs.map(|r| r.to_reg()));
|
|
||||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||||
match (ext, from_bits) {
|
match (ext, from_bits) {
|
||||||
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
|
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
|
||||||
if n < word_bits =>
|
if n < word_bits =>
|
||||||
{
|
{
|
||||||
let signed = ext == ArgumentExtension::Sext;
|
let signed = ext == ArgumentExtension::Sext;
|
||||||
let dest_reg = dest_regs
|
|
||||||
.only_reg()
|
|
||||||
.expect("extension only possible from one-reg value");
|
|
||||||
let from_reg = from_regs
|
|
||||||
.only_reg()
|
|
||||||
.expect("extension only possible from one-reg value");
|
|
||||||
ret.push(M::gen_extend(
|
ret.push(M::gen_extend(
|
||||||
dest_reg,
|
Writable::from_reg(reg.to_reg()),
|
||||||
from_reg.to_reg(),
|
from_reg.to_reg(),
|
||||||
signed,
|
signed,
|
||||||
from_bits,
|
from_bits,
|
||||||
/* to_bits = */ word_bits,
|
/* to_bits = */ word_bits,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
_ => ret.extend(
|
_ => {
|
||||||
gen_move_multi::<M>(dest_regs, non_writable_value_regs(from_regs), ty)
|
ret.push(M::gen_move(
|
||||||
.into_iter(),
|
Writable::from_reg(reg.to_reg()),
|
||||||
),
|
from_reg.to_reg(),
|
||||||
|
ty,
|
||||||
|
));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
&ABIArg::Stack {
|
&ABIArgSlot::Stack {
|
||||||
offset,
|
offset,
|
||||||
ty,
|
ty,
|
||||||
extension,
|
extension,
|
||||||
@@ -1023,21 +1067,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
|||||||
// A machine ABI implementation should ensure that stack frames
|
// A machine ABI implementation should ensure that stack frames
|
||||||
// have "reasonable" size. All current ABIs for machinst
|
// have "reasonable" size. All current ABIs for machinst
|
||||||
// backends (aarch64 and x64) enforce a 128MB limit.
|
// backends (aarch64 and x64) enforce a 128MB limit.
|
||||||
let off = i32::try_from(offset)
|
let off = i32::try_from(offset).expect(
|
||||||
.expect("Argument stack offset greater than 2GB; should hit impl limit first");
|
"Argument stack offset greater than 2GB; should hit impl limit first",
|
||||||
|
);
|
||||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||||
// Trash the from_reg; it should be its last use.
|
// Trash the from_reg; it should be its last use.
|
||||||
match (ext, from_bits) {
|
match (ext, from_bits) {
|
||||||
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
|
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
|
||||||
if n < word_bits =>
|
if n < word_bits =>
|
||||||
{
|
{
|
||||||
let from_reg = from_regs
|
|
||||||
.only_reg()
|
|
||||||
.expect("extension only possible from one-reg value");
|
|
||||||
assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
|
assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
|
||||||
let signed = ext == ArgumentExtension::Sext;
|
let signed = ext == ArgumentExtension::Sext;
|
||||||
ret.push(M::gen_extend(
|
ret.push(M::gen_extend(
|
||||||
from_reg,
|
Writable::from_reg(from_reg.to_reg()),
|
||||||
from_reg.to_reg(),
|
from_reg.to_reg(),
|
||||||
signed,
|
signed,
|
||||||
from_bits,
|
from_bits,
|
||||||
@@ -1048,17 +1090,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
|||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
};
|
};
|
||||||
ret.extend(
|
ret.push(M::gen_store_base_offset(
|
||||||
gen_store_base_offset_multi::<M>(
|
|
||||||
self.ret_area_ptr.unwrap().to_reg(),
|
self.ret_area_ptr.unwrap().to_reg(),
|
||||||
off,
|
off,
|
||||||
non_writable_value_regs(from_regs),
|
from_reg.to_reg(),
|
||||||
ty,
|
ty,
|
||||||
)
|
));
|
||||||
.into_iter(),
|
}
|
||||||
);
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
&ABIArg::StructArg { .. } => {
|
||||||
|
panic!("StructArg in return position is unsupported");
|
||||||
}
|
}
|
||||||
&ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"),
|
|
||||||
}
|
}
|
||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
@@ -1345,22 +1389,32 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
|
|||||||
// Compute uses: all arg regs.
|
// Compute uses: all arg regs.
|
||||||
let mut uses = Vec::new();
|
let mut uses = Vec::new();
|
||||||
for arg in &sig.args {
|
for arg in &sig.args {
|
||||||
match arg {
|
if let &ABIArg::Slots { ref slots, .. } = arg {
|
||||||
&ABIArg::Reg { regs, .. } => uses.extend(regs.regs().iter().map(|r| r.to_reg())),
|
for slot in slots {
|
||||||
|
match slot {
|
||||||
|
&ABIArgSlot::Reg { reg, .. } => {
|
||||||
|
uses.push(reg.to_reg());
|
||||||
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
|
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
|
||||||
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
|
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
|
||||||
for ret in &sig.rets {
|
for ret in &sig.rets {
|
||||||
match ret {
|
if let &ABIArg::Slots { ref slots, .. } = ret {
|
||||||
&ABIArg::Reg { regs, .. } => {
|
for slot in slots {
|
||||||
defs.extend(regs.regs().iter().map(|r| Writable::from_reg(r.to_reg())))
|
match slot {
|
||||||
|
&ABIArgSlot::Reg { reg, .. } => {
|
||||||
|
defs.push(Writable::from_reg(reg.to_reg()));
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
(uses, defs)
|
(uses, defs)
|
||||||
}
|
}
|
||||||
@@ -1406,7 +1460,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
|
|||||||
flags: &settings::Flags,
|
flags: &settings::Flags,
|
||||||
) -> CodegenResult<ABICallerImpl<M>> {
|
) -> CodegenResult<ABICallerImpl<M>> {
|
||||||
let ir_sig = ensure_struct_return_ptr_is_returned(sig);
|
let ir_sig = ensure_struct_return_ptr_is_returned(sig);
|
||||||
let sig = ABISig::from_func_sig::<M>(&ir_sig)?;
|
let sig = ABISig::from_func_sig::<M>(&ir_sig, flags)?;
|
||||||
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
|
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
|
||||||
Ok(ABICallerImpl {
|
Ok(ABICallerImpl {
|
||||||
ir_sig,
|
ir_sig,
|
||||||
@@ -1431,7 +1485,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
|
|||||||
flags: &settings::Flags,
|
flags: &settings::Flags,
|
||||||
) -> CodegenResult<ABICallerImpl<M>> {
|
) -> CodegenResult<ABICallerImpl<M>> {
|
||||||
let ir_sig = ensure_struct_return_ptr_is_returned(sig);
|
let ir_sig = ensure_struct_return_ptr_is_returned(sig);
|
||||||
let sig = ABISig::from_func_sig::<M>(&ir_sig)?;
|
let sig = ABISig::from_func_sig::<M>(&ir_sig, flags)?;
|
||||||
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
|
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
|
||||||
Ok(ABICallerImpl {
|
Ok(ABICallerImpl {
|
||||||
ir_sig,
|
ir_sig,
|
||||||
@@ -1501,15 +1555,15 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
|||||||
let word_rc = M::word_reg_class();
|
let word_rc = M::word_reg_class();
|
||||||
let word_bits = M::word_bits() as usize;
|
let word_bits = M::word_bits() as usize;
|
||||||
match &self.sig.args[idx] {
|
match &self.sig.args[idx] {
|
||||||
&ABIArg::Reg {
|
&ABIArg::Slots { ref slots, .. } => {
|
||||||
regs,
|
assert_eq!(from_regs.len(), slots.len());
|
||||||
ty,
|
for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
|
||||||
extension,
|
match slot {
|
||||||
..
|
&ABIArgSlot::Reg {
|
||||||
|
reg, ty, extension, ..
|
||||||
} => {
|
} => {
|
||||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||||
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
|
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
|
||||||
let reg = regs.only_reg().unwrap();
|
|
||||||
assert_eq!(word_rc, reg.get_class());
|
assert_eq!(word_rc, reg.get_class());
|
||||||
let signed = match ext {
|
let signed = match ext {
|
||||||
ir::ArgumentExtension::Uext => false,
|
ir::ArgumentExtension::Uext => false,
|
||||||
@@ -1518,22 +1572,20 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
|||||||
};
|
};
|
||||||
ctx.emit(M::gen_extend(
|
ctx.emit(M::gen_extend(
|
||||||
Writable::from_reg(reg.to_reg()),
|
Writable::from_reg(reg.to_reg()),
|
||||||
from_regs.only_reg().unwrap(),
|
*from_reg,
|
||||||
signed,
|
signed,
|
||||||
ty_bits(ty) as u8,
|
ty_bits(ty) as u8,
|
||||||
word_bits as u8,
|
word_bits as u8,
|
||||||
));
|
));
|
||||||
} else {
|
} else {
|
||||||
for insn in gen_move_multi::<M>(
|
ctx.emit(M::gen_move(
|
||||||
writable_value_regs(regs.map(|r| r.to_reg())),
|
Writable::from_reg(reg.to_reg()),
|
||||||
from_regs,
|
*from_reg,
|
||||||
ty,
|
ty,
|
||||||
) {
|
));
|
||||||
ctx.emit(insn);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
&ABIArgSlot::Stack {
|
||||||
&ABIArg::Stack {
|
|
||||||
offset,
|
offset,
|
||||||
ty,
|
ty,
|
||||||
extension,
|
extension,
|
||||||
@@ -1542,9 +1594,6 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
|||||||
let mut ty = ty;
|
let mut ty = ty;
|
||||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||||
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
|
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
|
||||||
let from_reg = from_regs
|
|
||||||
.only_reg()
|
|
||||||
.expect("only one reg for sub-word value width");
|
|
||||||
assert_eq!(word_rc, from_reg.get_class());
|
assert_eq!(word_rc, from_reg.get_class());
|
||||||
let signed = match ext {
|
let signed = match ext {
|
||||||
ir::ArgumentExtension::Uext => false,
|
ir::ArgumentExtension::Uext => false,
|
||||||
@@ -1555,8 +1604,8 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
|||||||
// treat high bits as undefined for values in registers, so this
|
// treat high bits as undefined for values in registers, so this
|
||||||
// is safe, even for an argument that is nominally read-only.
|
// is safe, even for an argument that is nominally read-only.
|
||||||
ctx.emit(M::gen_extend(
|
ctx.emit(M::gen_extend(
|
||||||
Writable::from_reg(from_reg),
|
Writable::from_reg(*from_reg),
|
||||||
from_reg,
|
*from_reg,
|
||||||
signed,
|
signed,
|
||||||
ty_bits(ty) as u8,
|
ty_bits(ty) as u8,
|
||||||
word_bits as u8,
|
word_bits as u8,
|
||||||
@@ -1564,10 +1613,13 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
|||||||
// Store the extended version.
|
// Store the extended version.
|
||||||
ty = M::word_type();
|
ty = M::word_type();
|
||||||
}
|
}
|
||||||
for insn in
|
ctx.emit(M::gen_store_stack(
|
||||||
gen_store_stack_multi::<M>(StackAMode::SPOffset(offset, ty), from_regs, ty)
|
StackAMode::SPOffset(offset, ty),
|
||||||
{
|
*from_reg,
|
||||||
ctx.emit(insn);
|
ty,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
&ABIArg::StructArg { offset, size, .. } => {
|
&ABIArg::StructArg { offset, size, .. } => {
|
||||||
@@ -1618,24 +1670,29 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
|||||||
into_regs: ValueRegs<Writable<Reg>>,
|
into_regs: ValueRegs<Writable<Reg>>,
|
||||||
) {
|
) {
|
||||||
match &self.sig.rets[idx] {
|
match &self.sig.rets[idx] {
|
||||||
|
&ABIArg::Slots { ref slots, .. } => {
|
||||||
|
assert_eq!(into_regs.len(), slots.len());
|
||||||
|
for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
|
||||||
|
match slot {
|
||||||
// Extension mode doesn't matter because we're copying out, not in,
|
// Extension mode doesn't matter because we're copying out, not in,
|
||||||
// and we ignore high bits in our own registers by convention.
|
// and we ignore high bits in our own registers by convention.
|
||||||
&ABIArg::Reg { regs, ty, .. } => {
|
&ABIArgSlot::Reg { reg, ty, .. } => {
|
||||||
for insn in gen_move_multi::<M>(into_regs, regs.map(|r| r.to_reg()), ty) {
|
ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty));
|
||||||
ctx.emit(insn);
|
|
||||||
}
|
}
|
||||||
}
|
&ABIArgSlot::Stack { offset, ty, .. } => {
|
||||||
&ABIArg::Stack { offset, ty, .. } => {
|
|
||||||
let ret_area_base = self.sig.stack_arg_space;
|
let ret_area_base = self.sig.stack_arg_space;
|
||||||
for insn in gen_load_stack_multi::<M>(
|
ctx.emit(M::gen_load_stack(
|
||||||
StackAMode::SPOffset(offset + ret_area_base, ty),
|
StackAMode::SPOffset(offset + ret_area_base, ty),
|
||||||
into_regs,
|
*into_reg,
|
||||||
ty,
|
ty,
|
||||||
) {
|
));
|
||||||
ctx.emit(insn);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
&ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"),
|
}
|
||||||
|
}
|
||||||
|
&ABIArg::StructArg { .. } => {
|
||||||
|
panic!("StructArg not supported in return position");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs};
|
use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs};
|
||||||
use crate::ir::Type;
|
use crate::ir::Type;
|
||||||
use regalloc::{Reg, Writable};
|
use regalloc::{Reg, Writable};
|
||||||
|
use std::ops::{Add, BitAnd, Not, Sub};
|
||||||
|
|
||||||
/// Returns the size (in bits) of a given type.
|
/// Returns the size (in bits) of a given type.
|
||||||
pub fn ty_bits(ty: Type) -> usize {
|
pub fn ty_bits(ty: Type) -> usize {
|
||||||
@@ -26,3 +27,17 @@ pub(crate) fn get_output_reg<I: VCodeInst, C: LowerCtx<I = I>>(
|
|||||||
) -> ValueRegs<Writable<Reg>> {
|
) -> ValueRegs<Writable<Reg>> {
|
||||||
ctx.get_output(spec.insn, spec.output)
|
ctx.get_output(spec.insn, spec.output)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Align a size up to a power-of-two alignment.
|
||||||
|
pub(crate) fn align_to<N>(x: N, alignment: N) -> N
|
||||||
|
where
|
||||||
|
N: Not<Output = N>
|
||||||
|
+ BitAnd<N, Output = N>
|
||||||
|
+ Add<N, Output = N>
|
||||||
|
+ Sub<N, Output = N>
|
||||||
|
+ From<u8>
|
||||||
|
+ Copy,
|
||||||
|
{
|
||||||
|
let alignment_mask = alignment - 1.into();
|
||||||
|
(x + alignment_mask) & !alignment_mask
|
||||||
|
}
|
||||||
|
|||||||
@@ -175,11 +175,13 @@ impl<R: Clone + Copy + Debug + PartialEq + Eq + InvalidSentinel> ValueRegs<R> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Create a writable ValueRegs.
|
/// Create a writable ValueRegs.
|
||||||
|
#[allow(dead_code)]
|
||||||
pub(crate) fn writable_value_regs(regs: ValueRegs<Reg>) -> ValueRegs<Writable<Reg>> {
|
pub(crate) fn writable_value_regs(regs: ValueRegs<Reg>) -> ValueRegs<Writable<Reg>> {
|
||||||
regs.map(|r| Writable::from_reg(r))
|
regs.map(|r| Writable::from_reg(r))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Strip a writable ValueRegs down to a readonly ValueRegs.
|
/// Strip a writable ValueRegs down to a readonly ValueRegs.
|
||||||
|
#[allow(dead_code)]
|
||||||
pub(crate) fn non_writable_value_regs(regs: ValueRegs<Writable<Reg>>) -> ValueRegs<Reg> {
|
pub(crate) fn non_writable_value_regs(regs: ValueRegs<Writable<Reg>>) -> ValueRegs<Reg> {
|
||||||
regs.map(|r| r.to_reg())
|
regs.map(|r| r.to_reg())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -398,6 +398,7 @@ use_pinned_reg_as_heap_base = false
|
|||||||
enable_simd = false
|
enable_simd = false
|
||||||
enable_atomics = true
|
enable_atomics = true
|
||||||
enable_safepoints = false
|
enable_safepoints = false
|
||||||
|
enable_llvm_abi_extensions = false
|
||||||
emit_all_ones_funcaddrs = false
|
emit_all_ones_funcaddrs = false
|
||||||
enable_probestack = true
|
enable_probestack = true
|
||||||
probestack_func_adjusts_sp = false
|
probestack_func_adjusts_sp = false
|
||||||
|
|||||||
299
cranelift/filetests/filetests/isa/x64/fastcall.clif
Normal file
299
cranelift/filetests/filetests/isa/x64/fastcall.clif
Normal file
@@ -0,0 +1,299 @@
|
|||||||
|
test compile
|
||||||
|
set enable_llvm_abi_extensions=true
|
||||||
|
target x86_64
|
||||||
|
feature "experimental_x64"
|
||||||
|
|
||||||
|
function %f0(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||||
|
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||||
|
return v0
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
; nextln: movq %rcx, %rax
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f1(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||||
|
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
; nextln: movq %rdx, %rax
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f2(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||||
|
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
; nextln: movq %r8, %rax
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f3(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||||
|
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
; nextln: movq %r9, %rax
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f4(i64, i64, f64, i64) -> f64 windows_fastcall {
|
||||||
|
block0(v0: i64, v1: i64, v2: f64, v3: i64):
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
; nextln: movaps %xmm2, %xmm0
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f5(i64, i64, f64, i64) -> i64 windows_fastcall {
|
||||||
|
block0(v0: i64, v1: i64, v2: f64, v3: i64):
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
; nextln: movq %r9, %rax
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f6(i64, i64, i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||||
|
block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
|
||||||
|
;; This is truly odd (because of the regalloc ordering), but it works. Note
|
||||||
|
;; that we're spilling and using rsi, which is a callee-save in fastcall, because
|
||||||
|
;; the regalloc order is optimized for SysV. Also note that because we copy args
|
||||||
|
;; out of their input locations to separate vregs, we have a spurious load
|
||||||
|
;; from [rbp+48]. Ordinarily these moves are coalesced because the dest vreg
|
||||||
|
;; is allocated as a caller-save (volatile), but here again we allocate rsi
|
||||||
|
;; first and so have to spill it (and consequently don't coalesce).
|
||||||
|
;;
|
||||||
|
;; TODO(#2704): fix regalloc's register priority ordering!
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
; nextln: subq $$16, %rsp
|
||||||
|
; nextln: movq %rsi, 0(%rsp)
|
||||||
|
; nextln: virtual_sp_offset_adjust 16
|
||||||
|
; nextln: movq 48(%rbp), %rsi
|
||||||
|
; nextln: movq 56(%rbp), %rsi
|
||||||
|
; nextln: movq %rsi, %rax
|
||||||
|
; nextln: movq 0(%rsp), %rsi
|
||||||
|
; nextln: addq $$16, %rsp
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f7(i128, i64, i128, i128) -> i128 windows_fastcall {
|
||||||
|
block0(v0: i128, v1: i64, v2: i128, v3: i128):
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
;; Again, terrible regalloc behavior. The important part is that `v3` comes
|
||||||
|
;; from [rbp+56] and [rbp+64], i.e., the second and third non-shadow
|
||||||
|
;; stack slot.
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
; nextln: subq $$16, %rsp
|
||||||
|
; nextln: movq %rsi, 0(%rsp)
|
||||||
|
; nextln: movq %rdi, 8(%rsp)
|
||||||
|
; nextln: virtual_sp_offset_adjust 16
|
||||||
|
; nextln: movq 48(%rbp), %rsi
|
||||||
|
; nextln: movq 56(%rbp), %rsi
|
||||||
|
; nextln: movq 64(%rbp), %rdi
|
||||||
|
; nextln: movq %rsi, %rax
|
||||||
|
; nextln: movq %rdi, %rdx
|
||||||
|
; nextln: movq 0(%rsp), %rsi
|
||||||
|
; nextln: movq 8(%rsp), %rdi
|
||||||
|
; nextln: addq $$16, %rsp
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f8(i64) -> i64 windows_fastcall {
|
||||||
|
sig0 = (i64, i64, f64, f64, i64, i64) -> i64 windows_fastcall
|
||||||
|
fn0 = %g sig0
|
||||||
|
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = fcvt_from_sint.f64 v0
|
||||||
|
v2 = call fn0(v0, v0, v1, v1, v0, v0)
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
; nextln: subq $$16, %rsp
|
||||||
|
; nextln: movq %rsi, 0(%rsp)
|
||||||
|
; nextln: virtual_sp_offset_adjust 16
|
||||||
|
; nextln: movq %rcx, %rsi
|
||||||
|
; nextln: cvtsi2sd %rsi, %xmm3
|
||||||
|
; nextln: subq $$48, %rsp
|
||||||
|
; nextln: virtual_sp_offset_adjust 48
|
||||||
|
; nextln: movq %rsi, %rcx
|
||||||
|
; nextln: movq %rsi, %rdx
|
||||||
|
; nextln: movaps %xmm3, %xmm2
|
||||||
|
; nextln: movq %rsi, 32(%rsp)
|
||||||
|
; nextln: movq %rsi, 40(%rsp)
|
||||||
|
; nextln: load_ext_name %g+0, %rsi
|
||||||
|
; nextln: call *%rsi
|
||||||
|
; nextln: addq $$48, %rsp
|
||||||
|
; nextln: virtual_sp_offset_adjust -48
|
||||||
|
; nextln: movq 0(%rsp), %rsi
|
||||||
|
; nextln: addq $$16, %rsp
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f9(i64) -> f64 windows_fastcall {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = load.f64 v0+0
|
||||||
|
v2 = load.f64 v0+8
|
||||||
|
v3 = load.f64 v0+16
|
||||||
|
v4 = load.f64 v0+24
|
||||||
|
v5 = load.f64 v0+32
|
||||||
|
v6 = load.f64 v0+40
|
||||||
|
v7 = load.f64 v0+48
|
||||||
|
v8 = load.f64 v0+56
|
||||||
|
v9 = load.f64 v0+64
|
||||||
|
v10 = load.f64 v0+72
|
||||||
|
v11 = load.f64 v0+80
|
||||||
|
v12 = load.f64 v0+88
|
||||||
|
v13 = load.f64 v0+96
|
||||||
|
v14 = load.f64 v0+104
|
||||||
|
v15 = load.f64 v0+112
|
||||||
|
v16 = load.f64 v0+120
|
||||||
|
v17 = load.f64 v0+128
|
||||||
|
v18 = load.f64 v0+136
|
||||||
|
v19 = load.f64 v0+144
|
||||||
|
v20 = load.f64 v0+152
|
||||||
|
|
||||||
|
v21 = fadd.f64 v1, v2
|
||||||
|
v22 = fadd.f64 v3, v4
|
||||||
|
v23 = fadd.f64 v5, v6
|
||||||
|
v24 = fadd.f64 v7, v8
|
||||||
|
v25 = fadd.f64 v9, v10
|
||||||
|
v26 = fadd.f64 v11, v12
|
||||||
|
v27 = fadd.f64 v13, v14
|
||||||
|
v28 = fadd.f64 v15, v16
|
||||||
|
v29 = fadd.f64 v17, v18
|
||||||
|
v30 = fadd.f64 v19, v20
|
||||||
|
|
||||||
|
v31 = fadd.f64 v21, v22
|
||||||
|
v32 = fadd.f64 v23, v24
|
||||||
|
v33 = fadd.f64 v25, v26
|
||||||
|
v34 = fadd.f64 v27, v28
|
||||||
|
v35 = fadd.f64 v29, v30
|
||||||
|
|
||||||
|
v36 = fadd.f64 v31, v32
|
||||||
|
v37 = fadd.f64 v33, v34
|
||||||
|
|
||||||
|
v38 = fadd.f64 v36, v37
|
||||||
|
|
||||||
|
v39 = fadd.f64 v38, v35
|
||||||
|
|
||||||
|
return v39
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
; nextln: subq $$208, %rsp
|
||||||
|
; nextln: movdqu %xmm6, 0(%rsp)
|
||||||
|
; nextln: movdqu %xmm7, 16(%rsp)
|
||||||
|
; nextln: movdqu %xmm8, 32(%rsp)
|
||||||
|
; nextln: movdqu %xmm9, 48(%rsp)
|
||||||
|
; nextln: movdqu %xmm10, 64(%rsp)
|
||||||
|
; nextln: movdqu %xmm11, 80(%rsp)
|
||||||
|
; nextln: movdqu %xmm12, 96(%rsp)
|
||||||
|
; nextln: movdqu %xmm13, 112(%rsp)
|
||||||
|
; nextln: movdqu %xmm14, 128(%rsp)
|
||||||
|
; nextln: movdqu %xmm15, 144(%rsp)
|
||||||
|
; nextln: virtual_sp_offset_adjust 160
|
||||||
|
; nextln: movsd 0(%rcx), %xmm0
|
||||||
|
; nextln: movsd %xmm0, rsp(16 + virtual offset)
|
||||||
|
; nextln: movsd 8(%rcx), %xmm1
|
||||||
|
; nextln: movsd 16(%rcx), %xmm0
|
||||||
|
; nextln: movsd %xmm0, rsp(24 + virtual offset)
|
||||||
|
; nextln: movsd 24(%rcx), %xmm3
|
||||||
|
; nextln: movsd 32(%rcx), %xmm0
|
||||||
|
; nextln: movsd %xmm0, rsp(32 + virtual offset)
|
||||||
|
; nextln: movsd 40(%rcx), %xmm5
|
||||||
|
; nextln: movsd 48(%rcx), %xmm6
|
||||||
|
; nextln: movsd 56(%rcx), %xmm7
|
||||||
|
; nextln: movsd 64(%rcx), %xmm8
|
||||||
|
; nextln: movsd 72(%rcx), %xmm9
|
||||||
|
; nextln: movsd 80(%rcx), %xmm10
|
||||||
|
; nextln: movsd 88(%rcx), %xmm11
|
||||||
|
; nextln: movsd 96(%rcx), %xmm12
|
||||||
|
; nextln: movsd 104(%rcx), %xmm13
|
||||||
|
; nextln: movsd 112(%rcx), %xmm14
|
||||||
|
; nextln: movsd 120(%rcx), %xmm15
|
||||||
|
; nextln: movsd 128(%rcx), %xmm0
|
||||||
|
; nextln: movsd %xmm0, rsp(0 + virtual offset)
|
||||||
|
; nextln: movsd 136(%rcx), %xmm0
|
||||||
|
; nextln: movsd 144(%rcx), %xmm2
|
||||||
|
; nextln: movsd %xmm2, rsp(8 + virtual offset)
|
||||||
|
; nextln: movsd 152(%rcx), %xmm2
|
||||||
|
; nextln: nop len=0
|
||||||
|
; nextln: movsd rsp(16 + virtual offset), %xmm4
|
||||||
|
; nextln: addsd %xmm1, %xmm4
|
||||||
|
; nextln: movsd %xmm4, rsp(16 + virtual offset)
|
||||||
|
; nextln: movsd rsp(24 + virtual offset), %xmm1
|
||||||
|
; nextln: addsd %xmm3, %xmm1
|
||||||
|
; nextln: movsd rsp(32 + virtual offset), %xmm4
|
||||||
|
; nextln: addsd %xmm5, %xmm4
|
||||||
|
; nextln: addsd %xmm7, %xmm6
|
||||||
|
; nextln: addsd %xmm9, %xmm8
|
||||||
|
; nextln: addsd %xmm11, %xmm10
|
||||||
|
; nextln: addsd %xmm13, %xmm12
|
||||||
|
; nextln: addsd %xmm15, %xmm14
|
||||||
|
; nextln: movsd rsp(0 + virtual offset), %xmm3
|
||||||
|
; nextln: addsd %xmm0, %xmm3
|
||||||
|
; nextln: movsd rsp(8 + virtual offset), %xmm0
|
||||||
|
; nextln: addsd %xmm2, %xmm0
|
||||||
|
; nextln: movsd rsp(16 + virtual offset), %xmm2
|
||||||
|
; nextln: addsd %xmm1, %xmm2
|
||||||
|
; nextln: addsd %xmm6, %xmm4
|
||||||
|
; nextln: addsd %xmm10, %xmm8
|
||||||
|
; nextln: addsd %xmm14, %xmm12
|
||||||
|
; nextln: addsd %xmm0, %xmm3
|
||||||
|
; nextln: addsd %xmm4, %xmm2
|
||||||
|
; nextln: addsd %xmm12, %xmm8
|
||||||
|
; nextln: addsd %xmm8, %xmm2
|
||||||
|
; nextln: addsd %xmm3, %xmm2
|
||||||
|
; nextln: movaps %xmm2, %xmm0
|
||||||
|
; nextln: movdqu 0(%rsp), %xmm6
|
||||||
|
; nextln: movdqu 16(%rsp), %xmm7
|
||||||
|
; nextln: movdqu 32(%rsp), %xmm8
|
||||||
|
; nextln: movdqu 48(%rsp), %xmm9
|
||||||
|
; nextln: movdqu 64(%rsp), %xmm10
|
||||||
|
; nextln: movdqu 80(%rsp), %xmm11
|
||||||
|
; nextln: movdqu 96(%rsp), %xmm12
|
||||||
|
; nextln: movdqu 112(%rsp), %xmm13
|
||||||
|
; nextln: movdqu 128(%rsp), %xmm14
|
||||||
|
; nextln: movdqu 144(%rsp), %xmm15
|
||||||
|
; nextln: addq $$160, %rsp
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
test compile
|
test compile
|
||||||
|
set enable_llvm_abi_extensions=true
|
||||||
target x86_64
|
target x86_64
|
||||||
feature "experimental_x64"
|
feature "experimental_x64"
|
||||||
|
|
||||||
@@ -941,17 +942,17 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128):
|
|||||||
v11 = iadd.i128 v9, v10
|
v11 = iadd.i128 v9, v10
|
||||||
return v11
|
return v11
|
||||||
|
|
||||||
; check: movq %rsp, %rbp
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
; nextln: subq $$16, %rsp
|
; nextln: subq $$16, %rsp
|
||||||
; nextln: movq %r12, 0(%rsp)
|
; nextln: movq %r12, 0(%rsp)
|
||||||
; nextln: movq %r13, 8(%rsp)
|
; nextln: movq %r13, 8(%rsp)
|
||||||
; nextln: virtual_sp_offset_adjust 16
|
; nextln: virtual_sp_offset_adjust 16
|
||||||
; nextln: movq 16(%rbp), %r9
|
; nextln: movq 16(%rbp), %r10
|
||||||
; nextln: movq 24(%rbp), %r10
|
; nextln: movq 24(%rbp), %r12
|
||||||
; nextln: movq 32(%rbp), %r12
|
; nextln: movq 32(%rbp), %r11
|
||||||
; nextln: movq 40(%rbp), %r11
|
; nextln: movq 40(%rbp), %rax
|
||||||
; nextln: movq 48(%rbp), %rax
|
; nextln: movq 48(%rbp), %r13
|
||||||
; nextln: movq 56(%rbp), %r13
|
|
||||||
; nextln: addq %rdx, %rdi
|
; nextln: addq %rdx, %rdi
|
||||||
; nextln: adcq %rcx, %rsi
|
; nextln: adcq %rcx, %rsi
|
||||||
; nextln: xorq %rcx, %rcx
|
; nextln: xorq %rcx, %rcx
|
||||||
@@ -989,10 +990,10 @@ block0(v0: i128):
|
|||||||
; nextln: movq %r10, 16(%rsi)
|
; nextln: movq %r10, 16(%rsi)
|
||||||
; nextln: movq %r11, 24(%rsi)
|
; nextln: movq %r11, 24(%rsi)
|
||||||
; nextln: movq %r12, 32(%rsi)
|
; nextln: movq %r12, 32(%rsi)
|
||||||
; nextln: movq %r13, 48(%rsi)
|
; nextln: movq %r13, 40(%rsi)
|
||||||
; nextln: movq %r14, 56(%rsi)
|
; nextln: movq %r14, 48(%rsi)
|
||||||
; nextln: movq %rdi, 64(%rsi)
|
; nextln: movq %rdi, 56(%rsi)
|
||||||
; nextln: movq %rbx, 72(%rsi)
|
; nextln: movq %rbx, 64(%rsi)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
test compile
|
test compile
|
||||||
|
set enable_llvm_abi_extensions=true
|
||||||
target x86_64
|
target x86_64
|
||||||
feature "experimental_x64"
|
feature "experimental_x64"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user