Merge pull request #2678 from cfallin/x64-fastcall
x86-64 Windows fastcall ABI support.
This commit is contained in:
@@ -216,6 +216,25 @@ pub(crate) fn define() -> SettingGroup {
|
||||
0,
|
||||
);
|
||||
|
||||
settings.add_bool(
|
||||
"enable_llvm_abi_extensions",
|
||||
r#"
|
||||
Enable various ABI extensions defined by LLVM's behavior.
|
||||
|
||||
In some cases, LLVM's implementation of an ABI (calling convention)
|
||||
goes beyond a standard and supports additional argument types or
|
||||
behavior. This option instructs Cranelift codegen to follow LLVM's
|
||||
behavior where applicable.
|
||||
|
||||
Currently, this applies only to Windows Fastcall on x86-64, and
|
||||
allows an `i128` argument to be spread across two 64-bit integer
|
||||
registers. The Fastcall implementation otherwise does not support
|
||||
`i128` arguments, and will panic if they are present and this
|
||||
option is not set.
|
||||
"#,
|
||||
false,
|
||||
);
|
||||
|
||||
// BaldrMonkey requires that not-yet-relocated function addresses be encoded
|
||||
// as all-ones bitpatterns.
|
||||
settings.add_bool(
|
||||
|
||||
@@ -78,41 +78,41 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
|
||||
match ¶m.purpose {
|
||||
&ir::ArgumentPurpose::VMContext => {
|
||||
// This is SpiderMonkey's `WasmTlsReg`.
|
||||
Some(ABIArg::Reg {
|
||||
regs: ValueRegs::one(xreg(BALDRDASH_TLS_REG).to_real_reg()),
|
||||
ty: ir::types::I64,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::reg(
|
||||
xreg(BALDRDASH_TLS_REG).to_real_reg(),
|
||||
ir::types::I64,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::SignatureId => {
|
||||
// This is SpiderMonkey's `WasmTableCallSigReg`.
|
||||
Some(ABIArg::Reg {
|
||||
regs: ValueRegs::one(xreg(BALDRDASH_SIG_REG).to_real_reg()),
|
||||
ty: ir::types::I64,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::reg(
|
||||
xreg(BALDRDASH_SIG_REG).to_real_reg(),
|
||||
ir::types::I64,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::CalleeTLS => {
|
||||
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
|
||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||
Some(ABIArg::Stack {
|
||||
offset: BALDRDASH_CALLEE_TLS_OFFSET,
|
||||
ty: ir::types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::stack(
|
||||
BALDRDASH_CALLEE_TLS_OFFSET,
|
||||
ir::types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::CallerTLS => {
|
||||
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
|
||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||
Some(ABIArg::Stack {
|
||||
offset: BALDRDASH_CALLER_TLS_OFFSET,
|
||||
ty: ir::types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::stack(
|
||||
BALDRDASH_CALLER_TLS_OFFSET,
|
||||
ir::types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
@@ -161,6 +161,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
|
||||
fn compute_arg_locs(
|
||||
call_conv: isa::CallConv,
|
||||
_flags: &settings::Flags,
|
||||
params: &[ir::AbiParam],
|
||||
args_or_rets: ArgsOrRets,
|
||||
add_ret_area_ptr: bool,
|
||||
@@ -253,12 +254,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
RegClass::V128 => vreg(*next_reg),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ret.push(ABIArg::Reg {
|
||||
regs: ValueRegs::one(reg.to_real_reg()),
|
||||
ty: param.value_type,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
ret.push(ABIArg::reg(
|
||||
reg.to_real_reg(),
|
||||
param.value_type,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
));
|
||||
*next_reg += 1;
|
||||
remaining_reg_vals -= 1;
|
||||
} else {
|
||||
@@ -268,13 +269,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
let size = std::cmp::max(size, 8);
|
||||
// Align.
|
||||
debug_assert!(size.is_power_of_two());
|
||||
next_stack = (next_stack + size - 1) & !(size - 1);
|
||||
ret.push(ABIArg::Stack {
|
||||
offset: next_stack as i64,
|
||||
ty: param.value_type,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
next_stack = align_to(next_stack, size);
|
||||
ret.push(ABIArg::stack(
|
||||
next_stack as i64,
|
||||
param.value_type,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
));
|
||||
next_stack += size;
|
||||
}
|
||||
}
|
||||
@@ -286,19 +287,19 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
let extra_arg = if add_ret_area_ptr {
|
||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||
if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
|
||||
ret.push(ABIArg::Reg {
|
||||
regs: ValueRegs::one(xreg(next_xreg).to_real_reg()),
|
||||
ty: I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: ir::ArgumentPurpose::Normal,
|
||||
});
|
||||
ret.push(ABIArg::reg(
|
||||
xreg(next_xreg).to_real_reg(),
|
||||
I64,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
} else {
|
||||
ret.push(ABIArg::Stack {
|
||||
offset: next_stack as i64,
|
||||
ty: I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: ir::ArgumentPurpose::Normal,
|
||||
});
|
||||
ret.push(ABIArg::stack(
|
||||
next_stack as i64,
|
||||
I64,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
next_stack += 8;
|
||||
}
|
||||
Some(ret.len() - 1)
|
||||
@@ -306,7 +307,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
None
|
||||
};
|
||||
|
||||
next_stack = (next_stack + 15) & !15;
|
||||
next_stack = align_to(next_stack, 16);
|
||||
|
||||
// To avoid overflow issues, limit the arg/return size to something
|
||||
// reasonable -- here, 128 MB.
|
||||
|
||||
@@ -51,6 +51,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
||||
|
||||
fn compute_arg_locs(
|
||||
_call_conv: isa::CallConv,
|
||||
_flags: &settings::Flags,
|
||||
params: &[ir::AbiParam],
|
||||
args_or_rets: ArgsOrRets,
|
||||
add_ret_area_ptr: bool,
|
||||
@@ -81,12 +82,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
||||
if next_rreg < max_reg_val {
|
||||
let reg = rreg(next_rreg);
|
||||
|
||||
ret.push(ABIArg::Reg {
|
||||
regs: ValueRegs::one(reg.to_real_reg()),
|
||||
ty: param.value_type,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
ret.push(ABIArg::reg(
|
||||
reg.to_real_reg(),
|
||||
param.value_type,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
));
|
||||
next_rreg += 1;
|
||||
} else {
|
||||
// Arguments are stored on stack in reversed order.
|
||||
@@ -101,12 +102,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
||||
let extra_arg = if add_ret_area_ptr {
|
||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||
if next_rreg < max_reg_val {
|
||||
ret.push(ABIArg::Reg {
|
||||
regs: ValueRegs::one(rreg(next_rreg).to_real_reg()),
|
||||
ty: I32,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: ir::ArgumentPurpose::Normal,
|
||||
});
|
||||
ret.push(ABIArg::reg(
|
||||
rreg(next_rreg).to_real_reg(),
|
||||
I32,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
} else {
|
||||
stack_args.push((
|
||||
I32,
|
||||
@@ -124,12 +125,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
||||
let max_stack = next_stack;
|
||||
for (ty, ext, purpose) in stack_args.into_iter().rev() {
|
||||
next_stack -= 4;
|
||||
ret.push(ABIArg::Stack {
|
||||
offset: (max_stack - next_stack) as i64,
|
||||
ret.push(ABIArg::stack(
|
||||
(max_stack - next_stack) as i64,
|
||||
ty,
|
||||
extension: ext,
|
||||
ext,
|
||||
purpose,
|
||||
});
|
||||
));
|
||||
}
|
||||
assert_eq!(next_stack, 0);
|
||||
|
||||
|
||||
@@ -31,41 +31,41 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
|
||||
match ¶m.purpose {
|
||||
&ir::ArgumentPurpose::VMContext => {
|
||||
// This is SpiderMonkey's `WasmTlsReg`.
|
||||
Some(ABIArg::Reg {
|
||||
regs: ValueRegs::one(regs::r14().to_real_reg()),
|
||||
ty: types::I64,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::reg(
|
||||
regs::r14().to_real_reg(),
|
||||
types::I64,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::SignatureId => {
|
||||
// This is SpiderMonkey's `WasmTableCallSigReg`.
|
||||
Some(ABIArg::Reg {
|
||||
regs: ValueRegs::one(regs::r10().to_real_reg()),
|
||||
ty: types::I64,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::reg(
|
||||
regs::r10().to_real_reg(),
|
||||
types::I64,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::CalleeTLS => {
|
||||
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
|
||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||
Some(ABIArg::Stack {
|
||||
offset: BALDRDASH_CALLEE_TLS_OFFSET,
|
||||
ty: ir::types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::stack(
|
||||
BALDRDASH_CALLEE_TLS_OFFSET,
|
||||
ir::types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::CallerTLS => {
|
||||
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
|
||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||
Some(ABIArg::Stack {
|
||||
offset: BALDRDASH_CALLER_TLS_OFFSET,
|
||||
ty: ir::types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::stack(
|
||||
BALDRDASH_CALLER_TLS_OFFSET,
|
||||
ir::types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
@@ -97,18 +97,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
|
||||
fn compute_arg_locs(
|
||||
call_conv: isa::CallConv,
|
||||
flags: &settings::Flags,
|
||||
params: &[ir::AbiParam],
|
||||
args_or_rets: ArgsOrRets,
|
||||
add_ret_area_ptr: bool,
|
||||
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
||||
let is_baldrdash = call_conv.extends_baldrdash();
|
||||
let is_fastcall = call_conv.extends_windows_fastcall();
|
||||
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
|
||||
|
||||
let mut next_gpr = 0;
|
||||
let mut next_vreg = 0;
|
||||
let mut next_stack: u64 = 0;
|
||||
let mut next_param_idx = 0; // Fastcall cares about overall param index
|
||||
let mut ret = vec![];
|
||||
|
||||
if args_or_rets == ArgsOrRets::Args && is_fastcall {
|
||||
// Fastcall always reserves 32 bytes of shadow space corresponding to
|
||||
// the four initial in-arg parameters.
|
||||
//
|
||||
// (See:
|
||||
// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160)
|
||||
next_stack = 32;
|
||||
}
|
||||
|
||||
if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
|
||||
// Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
|
||||
// caller TLS-register values, respectively.
|
||||
@@ -159,94 +171,114 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
}
|
||||
|
||||
// Find regclass(es) of the register(s) used to store a value of this type.
|
||||
let (rcs, _) = Inst::rc_for_type(param.value_type)?;
|
||||
let intreg = rcs[0] == RegClass::I64;
|
||||
let num_regs = rcs.len();
|
||||
assert!(num_regs <= 2);
|
||||
if num_regs == 2 {
|
||||
assert_eq!(rcs[0], rcs[1]);
|
||||
let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
|
||||
|
||||
// Now assign ABIArgSlots for each register-sized part.
|
||||
//
|
||||
// Note that the handling of `i128` values is unique here:
|
||||
//
|
||||
// - If `enable_llvm_abi_extensions` is set in the flags, each
|
||||
// `i128` is split into two `i64`s and assigned exactly as if it
|
||||
// were two consecutive 64-bit args. This is consistent with LLVM's
|
||||
// behavior, and is needed for some uses of Cranelift (e.g., the
|
||||
// rustc backend).
|
||||
//
|
||||
// - Otherwise, both SysV and Fastcall specify behavior (use of
|
||||
// vector register, a register pair, or passing by reference
|
||||
// depending on the case), but for simplicity, we will just panic if
|
||||
// an i128 type appears in a signature and the LLVM extensions flag
|
||||
// is not set.
|
||||
//
|
||||
// For examples of how rustc compiles i128 args and return values on
|
||||
// both SysV and Fastcall platforms, see:
|
||||
// https://godbolt.org/z/PhG3ob
|
||||
|
||||
if param.value_type.bits() > 64
|
||||
&& !param.value_type.is_vector()
|
||||
&& !flags.enable_llvm_abi_extensions()
|
||||
{
|
||||
panic!(
|
||||
"i128 args/return values not supported unless LLVM ABI extensions are enabled"
|
||||
);
|
||||
}
|
||||
|
||||
let mut regs: SmallVec<[RealReg; 2]> = smallvec![];
|
||||
for j in 0..num_regs {
|
||||
let mut slots = vec![];
|
||||
for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
|
||||
let intreg = *rc == RegClass::I64;
|
||||
let nextreg = if intreg {
|
||||
match args_or_rets {
|
||||
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr + j),
|
||||
ArgsOrRets::Args => {
|
||||
get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
|
||||
}
|
||||
ArgsOrRets::Rets => {
|
||||
get_intreg_for_retval_systemv(&call_conv, next_gpr + j, i + j)
|
||||
get_intreg_for_retval(&call_conv, next_gpr, next_param_idx)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
match args_or_rets {
|
||||
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg + j),
|
||||
ArgsOrRets::Args => {
|
||||
get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
|
||||
}
|
||||
ArgsOrRets::Rets => {
|
||||
get_fltreg_for_retval_systemv(&call_conv, next_vreg + j, i + j)
|
||||
get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx)
|
||||
}
|
||||
}
|
||||
};
|
||||
next_param_idx += 1;
|
||||
if let Some(reg) = nextreg {
|
||||
regs.push(reg.to_real_reg());
|
||||
} else {
|
||||
regs.clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if regs.len() > 0 {
|
||||
let regs = match num_regs {
|
||||
1 => ValueRegs::one(regs[0]),
|
||||
2 => ValueRegs::two(regs[0], regs[1]),
|
||||
_ => panic!("More than two registers unexpected"),
|
||||
};
|
||||
ret.push(ABIArg::Reg {
|
||||
regs,
|
||||
ty: param.value_type,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
if intreg {
|
||||
next_gpr += num_regs;
|
||||
next_gpr += 1;
|
||||
} else {
|
||||
next_vreg += num_regs;
|
||||
next_vreg += 1;
|
||||
}
|
||||
slots.push(ABIArgSlot::Reg {
|
||||
reg: reg.to_real_reg(),
|
||||
ty: *reg_ty,
|
||||
extension: param.extension,
|
||||
});
|
||||
} else {
|
||||
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
|
||||
// stack alignment happens separately after all args.)
|
||||
let size = (param.value_type.bits() / 8) as u64;
|
||||
let size = (reg_ty.bits() / 8) as u64;
|
||||
let size = std::cmp::max(size, 8);
|
||||
// Align.
|
||||
debug_assert!(size.is_power_of_two());
|
||||
next_stack = (next_stack + size - 1) & !(size - 1);
|
||||
ret.push(ABIArg::Stack {
|
||||
next_stack = align_to(next_stack, size);
|
||||
slots.push(ABIArgSlot::Stack {
|
||||
offset: next_stack as i64,
|
||||
ty: param.value_type,
|
||||
ty: *reg_ty,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
next_stack += size;
|
||||
}
|
||||
}
|
||||
|
||||
ret.push(ABIArg::Slots {
|
||||
slots,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
}
|
||||
|
||||
if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
|
||||
ret.reverse();
|
||||
}
|
||||
|
||||
let extra_arg = if add_ret_area_ptr {
|
||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||
if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
|
||||
ret.push(ABIArg::Reg {
|
||||
regs: ValueRegs::one(reg.to_real_reg()),
|
||||
ty: types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: ir::ArgumentPurpose::Normal,
|
||||
});
|
||||
if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
|
||||
ret.push(ABIArg::reg(
|
||||
reg.to_real_reg(),
|
||||
types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
} else {
|
||||
ret.push(ABIArg::Stack {
|
||||
offset: next_stack as i64,
|
||||
ty: types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: ir::ArgumentPurpose::Normal,
|
||||
});
|
||||
ret.push(ABIArg::stack(
|
||||
next_stack as i64,
|
||||
types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
next_stack += 8;
|
||||
}
|
||||
Some(ret.len() - 1)
|
||||
@@ -254,7 +286,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
None
|
||||
};
|
||||
|
||||
next_stack = (next_stack + 15) & !15;
|
||||
next_stack = align_to(next_stack, 16);
|
||||
|
||||
// To avoid overflow issues, limit the arg/return size to something reasonable.
|
||||
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
|
||||
@@ -452,10 +484,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
// registers (all XMM registers are caller-save) so we can compute the
|
||||
// total size of the needed stack space easily.
|
||||
let clobbered = get_callee_saves(&call_conv, clobbers);
|
||||
let clobbered_size = 8 * clobbered.len() as u32;
|
||||
let stack_size = clobbered_size + fixed_frame_storage_size;
|
||||
let stack_size = compute_clobber_size(&clobbered) + fixed_frame_storage_size;
|
||||
// Align to 16 bytes.
|
||||
let stack_size = (stack_size + 15) & !15;
|
||||
let stack_size = align_to(stack_size, 16);
|
||||
let clobbered_size = stack_size - fixed_frame_storage_size;
|
||||
// Adjust the stack pointer downward with one `sub rsp, IMM`
|
||||
// instruction.
|
||||
@@ -473,16 +504,23 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
let r_reg = reg.to_reg();
|
||||
match r_reg.get_class() {
|
||||
RegClass::I64 => {
|
||||
insts.push(Inst::mov_r_m(
|
||||
OperandSize::Size64,
|
||||
insts.push(Inst::store(
|
||||
types::I64,
|
||||
r_reg.to_reg(),
|
||||
Amode::imm_reg(cur_offset, regs::rsp()),
|
||||
));
|
||||
cur_offset += 8;
|
||||
}
|
||||
// No XMM regs are callee-save, so we do not need to implement
|
||||
// this.
|
||||
_ => unimplemented!(),
|
||||
RegClass::V128 => {
|
||||
cur_offset = align_to(cur_offset, 16);
|
||||
insts.push(Inst::store(
|
||||
types::I8X16,
|
||||
r_reg.to_reg(),
|
||||
Amode::imm_reg(cur_offset, regs::rsp()),
|
||||
));
|
||||
cur_offset += 16;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -499,8 +537,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
let mut insts = SmallVec::new();
|
||||
|
||||
let clobbered = get_callee_saves(&call_conv, clobbers);
|
||||
let stack_size = 8 * clobbered.len() as u32;
|
||||
let stack_size = (stack_size + 15) & !15;
|
||||
let stack_size = compute_clobber_size(&clobbered);
|
||||
let stack_size = align_to(stack_size, 16);
|
||||
|
||||
// Restore regs by loading from offsets of RSP.
|
||||
let mut cur_offset = 0;
|
||||
@@ -514,7 +552,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
));
|
||||
cur_offset += 8;
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
RegClass::V128 => {
|
||||
cur_offset = align_to(cur_offset, 16);
|
||||
insts.push(Inst::load(
|
||||
types::I8X16,
|
||||
Amode::imm_reg(cur_offset, regs::rsp()),
|
||||
Writable::from_reg(rreg.to_reg()),
|
||||
ExtKind::None,
|
||||
));
|
||||
cur_offset += 16;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
// Adjust RSP back upward.
|
||||
@@ -592,14 +640,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
// Baldrdash should not use struct args.
|
||||
assert!(!call_conv.extends_baldrdash());
|
||||
let mut insts = SmallVec::new();
|
||||
let arg0 = get_intreg_for_arg_systemv(&call_conv, 0).unwrap();
|
||||
let arg1 = get_intreg_for_arg_systemv(&call_conv, 1).unwrap();
|
||||
let arg2 = get_intreg_for_arg_systemv(&call_conv, 2).unwrap();
|
||||
let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap();
|
||||
let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap();
|
||||
let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap();
|
||||
// We need a register to load the address of `memcpy()` below and we
|
||||
// don't have a lowering context to allocate a temp here; so just use a
|
||||
// register we know we are free to mutate as part of this sequence
|
||||
// (because it is clobbered by the call as per the ABI anyway).
|
||||
let memcpy_addr = get_intreg_for_arg_systemv(&call_conv, 3).unwrap();
|
||||
let memcpy_addr = get_intreg_for_arg(&call_conv, 3, 3).unwrap();
|
||||
insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64));
|
||||
insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64));
|
||||
insts.extend(
|
||||
@@ -648,10 +696,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
|
||||
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
|
||||
let mut caller_saved = vec![
|
||||
// Systemv calling convention:
|
||||
// - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved).
|
||||
Writable::from_reg(regs::rsi()),
|
||||
Writable::from_reg(regs::rdi()),
|
||||
// intersection of Systemv and FastCall calling conventions:
|
||||
// - GPR: all except RDI, RSI, RBX, RBP, R12 to R15.
|
||||
// SysV adds RDI, RSI (FastCall makes these callee-saved).
|
||||
Writable::from_reg(regs::rax()),
|
||||
Writable::from_reg(regs::rcx()),
|
||||
Writable::from_reg(regs::rdx()),
|
||||
@@ -659,25 +706,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
Writable::from_reg(regs::r9()),
|
||||
Writable::from_reg(regs::r10()),
|
||||
Writable::from_reg(regs::r11()),
|
||||
// - XMM: all the registers!
|
||||
// - XMM: XMM0-5. SysV adds the rest (XMM6-XMM15).
|
||||
Writable::from_reg(regs::xmm0()),
|
||||
Writable::from_reg(regs::xmm1()),
|
||||
Writable::from_reg(regs::xmm2()),
|
||||
Writable::from_reg(regs::xmm3()),
|
||||
Writable::from_reg(regs::xmm4()),
|
||||
Writable::from_reg(regs::xmm5()),
|
||||
Writable::from_reg(regs::xmm6()),
|
||||
Writable::from_reg(regs::xmm7()),
|
||||
Writable::from_reg(regs::xmm8()),
|
||||
Writable::from_reg(regs::xmm9()),
|
||||
Writable::from_reg(regs::xmm10()),
|
||||
Writable::from_reg(regs::xmm11()),
|
||||
Writable::from_reg(regs::xmm12()),
|
||||
Writable::from_reg(regs::xmm13()),
|
||||
Writable::from_reg(regs::xmm14()),
|
||||
Writable::from_reg(regs::xmm15()),
|
||||
];
|
||||
|
||||
if !call_conv_of_callee.extends_windows_fastcall() {
|
||||
caller_saved.push(Writable::from_reg(regs::rsi()));
|
||||
caller_saved.push(Writable::from_reg(regs::rdi()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm6()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm7()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm8()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm9()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm10()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm11()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm12()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm13()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm14()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm15()));
|
||||
}
|
||||
|
||||
if call_conv_of_callee.extends_baldrdash() {
|
||||
caller_saved.push(Writable::from_reg(regs::r12()));
|
||||
caller_saved.push(Writable::from_reg(regs::r13()));
|
||||
@@ -739,49 +791,67 @@ impl From<StackAMode> for SyntheticAmode {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
|
||||
let is_fastcall = match call_conv {
|
||||
CallConv::Fast
|
||||
| CallConv::Cold
|
||||
| CallConv::SystemV
|
||||
| CallConv::BaldrdashSystemV
|
||||
| CallConv::Baldrdash2020 => {}
|
||||
_ => panic!("int args only supported for SysV calling convention"),
|
||||
| CallConv::Baldrdash2020 => false,
|
||||
CallConv::WindowsFastcall => true,
|
||||
_ => panic!("int args only supported for SysV or Fastcall calling convention"),
|
||||
};
|
||||
match idx {
|
||||
0 => Some(regs::rdi()),
|
||||
1 => Some(regs::rsi()),
|
||||
2 => Some(regs::rdx()),
|
||||
3 => Some(regs::rcx()),
|
||||
4 => Some(regs::r8()),
|
||||
5 => Some(regs::r9()),
|
||||
|
||||
// Fastcall counts by absolute argument number; SysV counts by argument of
|
||||
// this (integer) class.
|
||||
let i = if is_fastcall { arg_idx } else { idx };
|
||||
match (i, is_fastcall) {
|
||||
(0, false) => Some(regs::rdi()),
|
||||
(1, false) => Some(regs::rsi()),
|
||||
(2, false) => Some(regs::rdx()),
|
||||
(3, false) => Some(regs::rcx()),
|
||||
(4, false) => Some(regs::r8()),
|
||||
(5, false) => Some(regs::r9()),
|
||||
(0, true) => Some(regs::rcx()),
|
||||
(1, true) => Some(regs::rdx()),
|
||||
(2, true) => Some(regs::r8()),
|
||||
(3, true) => Some(regs::r9()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
|
||||
let is_fastcall = match call_conv {
|
||||
CallConv::Fast
|
||||
| CallConv::Cold
|
||||
| CallConv::SystemV
|
||||
| CallConv::BaldrdashSystemV
|
||||
| CallConv::Baldrdash2020 => {}
|
||||
_ => panic!("float args only supported for SysV calling convention"),
|
||||
| CallConv::Baldrdash2020 => false,
|
||||
CallConv::WindowsFastcall => true,
|
||||
_ => panic!("float args only supported for SysV or Fastcall calling convention"),
|
||||
};
|
||||
match idx {
|
||||
0 => Some(regs::xmm0()),
|
||||
1 => Some(regs::xmm1()),
|
||||
2 => Some(regs::xmm2()),
|
||||
3 => Some(regs::xmm3()),
|
||||
4 => Some(regs::xmm4()),
|
||||
5 => Some(regs::xmm5()),
|
||||
6 => Some(regs::xmm6()),
|
||||
7 => Some(regs::xmm7()),
|
||||
|
||||
// Fastcall counts by absolute argument number; SysV counts by argument of
|
||||
// this (floating-point) class.
|
||||
let i = if is_fastcall { arg_idx } else { idx };
|
||||
match (i, is_fastcall) {
|
||||
(0, false) => Some(regs::xmm0()),
|
||||
(1, false) => Some(regs::xmm1()),
|
||||
(2, false) => Some(regs::xmm2()),
|
||||
(3, false) => Some(regs::xmm3()),
|
||||
(4, false) => Some(regs::xmm4()),
|
||||
(5, false) => Some(regs::xmm5()),
|
||||
(6, false) => Some(regs::xmm6()),
|
||||
(7, false) => Some(regs::xmm7()),
|
||||
(0, true) => Some(regs::xmm0()),
|
||||
(1, true) => Some(regs::xmm1()),
|
||||
(2, true) => Some(regs::xmm2()),
|
||||
(3, true) => Some(regs::xmm3()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_retval_systemv(
|
||||
fn get_intreg_for_retval(
|
||||
call_conv: &CallConv,
|
||||
intreg_idx: usize,
|
||||
retval_idx: usize,
|
||||
@@ -799,11 +869,16 @@ fn get_intreg_for_retval_systemv(
|
||||
None
|
||||
}
|
||||
}
|
||||
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
CallConv::WindowsFastcall => match intreg_idx {
|
||||
0 => Some(regs::rax()),
|
||||
1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fltreg_for_retval_systemv(
|
||||
fn get_fltreg_for_retval(
|
||||
call_conv: &CallConv,
|
||||
fltreg_idx: usize,
|
||||
retval_idx: usize,
|
||||
@@ -821,7 +896,11 @@ fn get_fltreg_for_retval_systemv(
|
||||
None
|
||||
}
|
||||
}
|
||||
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
CallConv::WindowsFastcall => match fltreg_idx {
|
||||
0 => Some(regs::xmm0()),
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -854,6 +933,21 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_callee_save_fastcall(r: RealReg) -> bool {
|
||||
use regs::*;
|
||||
match r.get_class() {
|
||||
RegClass::I64 => match r.get_hw_encoding() as u8 {
|
||||
ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
|
||||
_ => false,
|
||||
},
|
||||
RegClass::V128 => match r.get_hw_encoding() as u8 {
|
||||
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
|
||||
_ => false,
|
||||
},
|
||||
_ => panic!("Unknown register class: {:?}", r.get_class()),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
|
||||
let mut regs: Vec<Writable<RealReg>> = match call_conv {
|
||||
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
|
||||
@@ -869,7 +963,11 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
|
||||
.cloned()
|
||||
.filter(|r| is_callee_save_systemv(r.to_reg()))
|
||||
.collect(),
|
||||
CallConv::WindowsFastcall => todo!("windows fastcall"),
|
||||
CallConv::WindowsFastcall => regs
|
||||
.iter()
|
||||
.cloned()
|
||||
.filter(|r| is_callee_save_fastcall(r.to_reg()))
|
||||
.collect(),
|
||||
CallConv::Probestack => todo!("probestack?"),
|
||||
};
|
||||
// Sort registers for deterministic code output. We can do an unstable sort because the
|
||||
@@ -877,3 +975,20 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
|
||||
regs.sort_unstable_by_key(|r| r.to_reg().get_index());
|
||||
regs
|
||||
}
|
||||
|
||||
fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
|
||||
let mut clobbered_size = 0;
|
||||
for reg in clobbers {
|
||||
match reg.to_reg().get_class() {
|
||||
RegClass::I64 => {
|
||||
clobbered_size += 8;
|
||||
}
|
||||
RegClass::V128 => {
|
||||
clobbered_size = align_to(clobbered_size, 16);
|
||||
clobbered_size += 16;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
clobbered_size
|
||||
}
|
||||
|
||||
@@ -456,6 +456,7 @@ pub(crate) enum InstructionSet {
|
||||
Popcnt,
|
||||
Lzcnt,
|
||||
BMI1,
|
||||
#[allow(dead_code)] // never constructed (yet).
|
||||
BMI2,
|
||||
}
|
||||
|
||||
|
||||
@@ -23,11 +23,20 @@ use regalloc::{
|
||||
};
|
||||
use std::string::String;
|
||||
|
||||
// Hardware encodings for a few registers.
|
||||
// Hardware encodings (note the special rax, rcx, rdx, rbx order).
|
||||
|
||||
pub const ENC_RAX: u8 = 0;
|
||||
pub const ENC_RCX: u8 = 1;
|
||||
pub const ENC_RDX: u8 = 2;
|
||||
pub const ENC_RBX: u8 = 3;
|
||||
pub const ENC_RSP: u8 = 4;
|
||||
pub const ENC_RBP: u8 = 5;
|
||||
pub const ENC_RSI: u8 = 6;
|
||||
pub const ENC_RDI: u8 = 7;
|
||||
pub const ENC_R8: u8 = 8;
|
||||
pub const ENC_R9: u8 = 9;
|
||||
pub const ENC_R10: u8 = 10;
|
||||
pub const ENC_R11: u8 = 11;
|
||||
pub const ENC_R12: u8 = 12;
|
||||
pub const ENC_R13: u8 = 13;
|
||||
pub const ENC_R14: u8 = 14;
|
||||
@@ -38,31 +47,31 @@ fn gpr(enc: u8, index: u8) -> Reg {
|
||||
}
|
||||
|
||||
pub(crate) fn rsi() -> Reg {
|
||||
gpr(6, 16)
|
||||
gpr(ENC_RSI, 16)
|
||||
}
|
||||
pub(crate) fn rdi() -> Reg {
|
||||
gpr(7, 17)
|
||||
gpr(ENC_RDI, 17)
|
||||
}
|
||||
pub(crate) fn rax() -> Reg {
|
||||
gpr(0, 18)
|
||||
gpr(ENC_RAX, 18)
|
||||
}
|
||||
pub(crate) fn rcx() -> Reg {
|
||||
gpr(1, 19)
|
||||
gpr(ENC_RCX, 19)
|
||||
}
|
||||
pub(crate) fn rdx() -> Reg {
|
||||
gpr(2, 20)
|
||||
gpr(ENC_RDX, 20)
|
||||
}
|
||||
pub(crate) fn r8() -> Reg {
|
||||
gpr(8, 21)
|
||||
gpr(ENC_R8, 21)
|
||||
}
|
||||
pub(crate) fn r9() -> Reg {
|
||||
gpr(9, 22)
|
||||
gpr(ENC_R9, 22)
|
||||
}
|
||||
pub(crate) fn r10() -> Reg {
|
||||
gpr(10, 23)
|
||||
gpr(ENC_R10, 23)
|
||||
}
|
||||
pub(crate) fn r11() -> Reg {
|
||||
gpr(11, 24)
|
||||
gpr(ENC_R11, 24)
|
||||
}
|
||||
pub(crate) fn r12() -> Reg {
|
||||
gpr(ENC_R12, 25)
|
||||
|
||||
@@ -124,19 +124,18 @@ use std::convert::TryFrom;
|
||||
use std::marker::PhantomData;
|
||||
use std::mem;
|
||||
|
||||
/// A location for an argument or return value.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum ABIArg {
|
||||
/// In a real register (or set of registers).
|
||||
/// A location for (part of) an argument or return value. These "storage slots"
|
||||
/// are specified for each register-sized part of an argument.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum ABIArgSlot {
|
||||
/// In a real register.
|
||||
Reg {
|
||||
/// Register(s) that hold this arg.
|
||||
regs: ValueRegs<RealReg>,
|
||||
/// Register that holds this arg.
|
||||
reg: RealReg,
|
||||
/// Value type of this arg.
|
||||
ty: ir::Type,
|
||||
/// Should this arg be zero- or sign-extended?
|
||||
extension: ir::ArgumentExtension,
|
||||
/// Purpose of this arg.
|
||||
purpose: ir::ArgumentPurpose,
|
||||
},
|
||||
/// Arguments only: on stack, at given offset from SP at entry.
|
||||
Stack {
|
||||
@@ -146,6 +145,26 @@ pub enum ABIArg {
|
||||
ty: ir::Type,
|
||||
/// Should this arg be zero- or sign-extended?
|
||||
extension: ir::ArgumentExtension,
|
||||
},
|
||||
}
|
||||
|
||||
/// An ABIArg is composed of one or more parts. This allows for a CLIF-level
|
||||
/// Value to be passed with its parts in more than one location at the ABI
|
||||
/// level. For example, a 128-bit integer may be passed in two 64-bit registers,
|
||||
/// or even a 64-bit register and a 64-bit stack slot, on a 64-bit machine. The
|
||||
/// number of "parts" should correspond to the number of registers used to store
|
||||
/// this type according to the machine backend.
|
||||
///
|
||||
/// As an invariant, the `purpose` for every part must match. As a further
|
||||
/// invariant, a `StructArg` part cannot appear with any other part.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ABIArg {
|
||||
/// Storage slots (registers or stack locations) for each part of the
|
||||
/// argument value. The number of slots must equal the number of register
|
||||
/// parts used to store a value of this type.
|
||||
Slots {
|
||||
/// Slots, one per register part.
|
||||
slots: Vec<ABIArgSlot>,
|
||||
/// Purpose of this arg.
|
||||
purpose: ir::ArgumentPurpose,
|
||||
},
|
||||
@@ -167,21 +186,50 @@ pub enum ABIArg {
|
||||
|
||||
impl ABIArg {
|
||||
/// Get the purpose of this arg.
|
||||
fn get_purpose(self) -> ir::ArgumentPurpose {
|
||||
fn get_purpose(&self) -> ir::ArgumentPurpose {
|
||||
match self {
|
||||
ABIArg::Reg { purpose, .. } => purpose,
|
||||
ABIArg::Stack { purpose, .. } => purpose,
|
||||
ABIArg::StructArg { purpose, .. } => purpose,
|
||||
&ABIArg::Slots { purpose, .. } => purpose,
|
||||
&ABIArg::StructArg { purpose, .. } => purpose,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this a StructArg?
|
||||
fn is_struct_arg(self) -> bool {
|
||||
fn is_struct_arg(&self) -> bool {
|
||||
match self {
|
||||
ABIArg::StructArg { .. } => true,
|
||||
&ABIArg::StructArg { .. } => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an ABIArg from one register.
|
||||
pub fn reg(
|
||||
reg: RealReg,
|
||||
ty: ir::Type,
|
||||
extension: ir::ArgumentExtension,
|
||||
purpose: ir::ArgumentPurpose,
|
||||
) -> ABIArg {
|
||||
ABIArg::Slots {
|
||||
slots: vec![ABIArgSlot::Reg { reg, ty, extension }],
|
||||
purpose,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an ABIArg from one stack slot.
|
||||
pub fn stack(
|
||||
offset: i64,
|
||||
ty: ir::Type,
|
||||
extension: ir::ArgumentExtension,
|
||||
purpose: ir::ArgumentPurpose,
|
||||
) -> ABIArg {
|
||||
ABIArg::Slots {
|
||||
slots: vec![ABIArgSlot::Stack {
|
||||
offset,
|
||||
ty,
|
||||
extension,
|
||||
}],
|
||||
purpose,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Are we computing information about arguments or return values? Much of the
|
||||
@@ -275,6 +323,7 @@ pub trait ABIMachineSpec {
|
||||
/// index of the extra synthetic arg that was added.
|
||||
fn compute_arg_locs(
|
||||
call_conv: isa::CallConv,
|
||||
flags: &settings::Flags,
|
||||
params: &[ir::AbiParam],
|
||||
args_or_rets: ArgsOrRets,
|
||||
add_ret_area_ptr: bool,
|
||||
@@ -461,11 +510,15 @@ struct ABISig {
|
||||
}
|
||||
|
||||
impl ABISig {
|
||||
fn from_func_sig<M: ABIMachineSpec>(sig: &ir::Signature) -> CodegenResult<ABISig> {
|
||||
fn from_func_sig<M: ABIMachineSpec>(
|
||||
sig: &ir::Signature,
|
||||
flags: &settings::Flags,
|
||||
) -> CodegenResult<ABISig> {
|
||||
// Compute args and retvals from signature. Handle retvals first,
|
||||
// because we may need to add a return-area arg to the args.
|
||||
let (rets, stack_ret_space, _) = M::compute_arg_locs(
|
||||
sig.call_conv,
|
||||
flags,
|
||||
&sig.returns,
|
||||
ArgsOrRets::Rets,
|
||||
/* extra ret-area ptr = */ false,
|
||||
@@ -473,6 +526,7 @@ impl ABISig {
|
||||
let need_stack_return_area = stack_ret_space > 0;
|
||||
let (args, stack_arg_space, stack_ret_arg) = M::compute_arg_locs(
|
||||
sig.call_conv,
|
||||
flags,
|
||||
&sig.params,
|
||||
ArgsOrRets::Args,
|
||||
need_stack_return_area,
|
||||
@@ -557,8 +611,11 @@ fn get_special_purpose_param_register(
|
||||
purpose: ir::ArgumentPurpose,
|
||||
) -> Option<Reg> {
|
||||
let idx = f.signature.special_param_index(purpose)?;
|
||||
match abi.args[idx] {
|
||||
ABIArg::Reg { regs, .. } => Some(regs.only_reg().unwrap().to_reg()),
|
||||
match &abi.args[idx] {
|
||||
&ABIArg::Slots { ref slots, .. } => match &slots[0] {
|
||||
&ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -569,7 +626,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
||||
debug!("ABI: func signature {:?}", f.signature);
|
||||
|
||||
let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature);
|
||||
let sig = ABISig::from_func_sig::<M>(&ir_sig)?;
|
||||
let sig = ABISig::from_func_sig::<M>(&ir_sig, &flags)?;
|
||||
|
||||
let call_conv = f.signature.call_conv;
|
||||
// Only these calling conventions are supported.
|
||||
@@ -577,7 +634,8 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
||||
call_conv == isa::CallConv::SystemV
|
||||
|| call_conv == isa::CallConv::Fast
|
||||
|| call_conv == isa::CallConv::Cold
|
||||
|| call_conv.extends_baldrdash(),
|
||||
|| call_conv.extends_baldrdash()
|
||||
|| call_conv.extends_windows_fastcall(),
|
||||
"Unsupported calling convention: {:?}",
|
||||
call_conv
|
||||
);
|
||||
@@ -776,19 +834,6 @@ fn ty_from_ty_hint_or_reg_class<M: ABIMachineSpec>(r: Reg, ty: Option<Type>) ->
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_move_multi<M: ABIMachineSpec>(
|
||||
dst: ValueRegs<Writable<Reg>>,
|
||||
src: ValueRegs<Reg>,
|
||||
ty: Type,
|
||||
) -> SmallInstVec<M::I> {
|
||||
let mut ret = smallvec![];
|
||||
let (_, tys) = M::I::rc_for_type(ty).unwrap();
|
||||
for ((&dst, &src), &ty) in dst.regs().iter().zip(src.regs().iter()).zip(tys.iter()) {
|
||||
ret.push(M::gen_move(dst, src, ty));
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
fn gen_load_stack_multi<M: ABIMachineSpec>(
|
||||
from: StackAMode,
|
||||
dst: ValueRegs<Writable<Reg>>,
|
||||
@@ -821,22 +866,6 @@ fn gen_store_stack_multi<M: ABIMachineSpec>(
|
||||
ret
|
||||
}
|
||||
|
||||
fn gen_store_base_offset_multi<M: ABIMachineSpec>(
|
||||
base: Reg,
|
||||
mut offset: i32,
|
||||
src: ValueRegs<Reg>,
|
||||
ty: Type,
|
||||
) -> SmallInstVec<M::I> {
|
||||
let mut ret = smallvec![];
|
||||
let (_, tys) = M::I::rc_for_type(ty).unwrap();
|
||||
// N.B.: registers are given in the `ValueRegs` in target endian order.
|
||||
for (&src, &ty) in src.regs().iter().zip(tys.iter()) {
|
||||
ret.push(M::gen_store_base_offset(base, offset, src, ty));
|
||||
offset += ty.bytes() as i32;
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature {
|
||||
let params_structret = sig
|
||||
.params
|
||||
@@ -892,10 +921,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
|
||||
fn liveins(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for &arg in &self.sig.args {
|
||||
if let ABIArg::Reg { regs, .. } = arg {
|
||||
for &r in regs.regs() {
|
||||
set.insert(r);
|
||||
for arg in &self.sig.args {
|
||||
if let &ABIArg::Slots { ref slots, .. } = arg {
|
||||
for slot in slots {
|
||||
if let ABIArgSlot::Reg { reg, .. } = slot {
|
||||
set.insert(*reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -904,10 +935,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
|
||||
fn liveouts(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for &ret in &self.sig.rets {
|
||||
if let ABIArg::Reg { regs, .. } = ret {
|
||||
for &r in regs.regs() {
|
||||
set.insert(r);
|
||||
for ret in &self.sig.rets {
|
||||
if let &ABIArg::Slots { ref slots, .. } = ret {
|
||||
for slot in slots {
|
||||
if let ABIArgSlot::Reg { reg, .. } = slot {
|
||||
set.insert(*reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -935,30 +968,44 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
idx: usize,
|
||||
into_regs: ValueRegs<Writable<Reg>>,
|
||||
) -> SmallInstVec<Self::I> {
|
||||
let mut insts = smallvec![];
|
||||
match &self.sig.args[idx] {
|
||||
&ABIArg::Slots { ref slots, .. } => {
|
||||
assert_eq!(into_regs.len(), slots.len());
|
||||
for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
|
||||
match slot {
|
||||
// Extension mode doesn't matter (we're copying out, not in; we
|
||||
// ignore high bits by convention).
|
||||
&ABIArg::Reg { regs, ty, .. } => {
|
||||
gen_move_multi::<M>(into_regs, regs.map(|r| r.to_reg()), ty)
|
||||
&ABIArgSlot::Reg { reg, ty, .. } => {
|
||||
insts.push(M::gen_move(*into_reg, reg.to_reg(), ty));
|
||||
}
|
||||
&ABIArg::Stack { offset, ty, .. } => gen_load_stack_multi::<M>(
|
||||
&ABIArgSlot::Stack { offset, ty, .. } => {
|
||||
insts.push(M::gen_load_stack(
|
||||
StackAMode::FPOffset(
|
||||
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
|
||||
ty,
|
||||
),
|
||||
into_regs,
|
||||
*into_reg,
|
||||
ty,
|
||||
),
|
||||
&ABIArg::StructArg { offset, .. } => smallvec![M::gen_get_stack_addr(
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
&ABIArg::StructArg { offset, .. } => {
|
||||
let into_reg = into_regs.only_reg().unwrap();
|
||||
insts.push(M::gen_get_stack_addr(
|
||||
StackAMode::FPOffset(
|
||||
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
|
||||
I8,
|
||||
),
|
||||
into_regs.only_reg().unwrap(),
|
||||
into_reg,
|
||||
I8,
|
||||
)],
|
||||
));
|
||||
}
|
||||
}
|
||||
insts
|
||||
}
|
||||
|
||||
fn arg_is_needed_in_body(&self, idx: usize) -> bool {
|
||||
match self.sig.args[idx].get_purpose() {
|
||||
@@ -978,41 +1025,38 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
let mut ret = smallvec![];
|
||||
let word_bits = M::word_bits() as u8;
|
||||
match &self.sig.rets[idx] {
|
||||
&ABIArg::Reg {
|
||||
regs,
|
||||
ty,
|
||||
extension,
|
||||
..
|
||||
&ABIArg::Slots { ref slots, .. } => {
|
||||
assert_eq!(from_regs.len(), slots.len());
|
||||
for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
|
||||
match slot {
|
||||
&ABIArgSlot::Reg {
|
||||
reg, ty, extension, ..
|
||||
} => {
|
||||
let from_bits = ty_bits(ty) as u8;
|
||||
let dest_regs = writable_value_regs(regs.map(|r| r.to_reg()));
|
||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||
match (ext, from_bits) {
|
||||
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
|
||||
if n < word_bits =>
|
||||
{
|
||||
let signed = ext == ArgumentExtension::Sext;
|
||||
let dest_reg = dest_regs
|
||||
.only_reg()
|
||||
.expect("extension only possible from one-reg value");
|
||||
let from_reg = from_regs
|
||||
.only_reg()
|
||||
.expect("extension only possible from one-reg value");
|
||||
ret.push(M::gen_extend(
|
||||
dest_reg,
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
from_reg.to_reg(),
|
||||
signed,
|
||||
from_bits,
|
||||
/* to_bits = */ word_bits,
|
||||
));
|
||||
}
|
||||
_ => ret.extend(
|
||||
gen_move_multi::<M>(dest_regs, non_writable_value_regs(from_regs), ty)
|
||||
.into_iter(),
|
||||
),
|
||||
_ => {
|
||||
ret.push(M::gen_move(
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
from_reg.to_reg(),
|
||||
ty,
|
||||
));
|
||||
}
|
||||
};
|
||||
}
|
||||
&ABIArg::Stack {
|
||||
&ABIArgSlot::Stack {
|
||||
offset,
|
||||
ty,
|
||||
extension,
|
||||
@@ -1023,21 +1067,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
// A machine ABI implementation should ensure that stack frames
|
||||
// have "reasonable" size. All current ABIs for machinst
|
||||
// backends (aarch64 and x64) enforce a 128MB limit.
|
||||
let off = i32::try_from(offset)
|
||||
.expect("Argument stack offset greater than 2GB; should hit impl limit first");
|
||||
let off = i32::try_from(offset).expect(
|
||||
"Argument stack offset greater than 2GB; should hit impl limit first",
|
||||
);
|
||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||
// Trash the from_reg; it should be its last use.
|
||||
match (ext, from_bits) {
|
||||
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
|
||||
if n < word_bits =>
|
||||
{
|
||||
let from_reg = from_regs
|
||||
.only_reg()
|
||||
.expect("extension only possible from one-reg value");
|
||||
assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
|
||||
let signed = ext == ArgumentExtension::Sext;
|
||||
ret.push(M::gen_extend(
|
||||
from_reg,
|
||||
Writable::from_reg(from_reg.to_reg()),
|
||||
from_reg.to_reg(),
|
||||
signed,
|
||||
from_bits,
|
||||
@@ -1048,17 +1090,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
ret.extend(
|
||||
gen_store_base_offset_multi::<M>(
|
||||
ret.push(M::gen_store_base_offset(
|
||||
self.ret_area_ptr.unwrap().to_reg(),
|
||||
off,
|
||||
non_writable_value_regs(from_regs),
|
||||
from_reg.to_reg(),
|
||||
ty,
|
||||
)
|
||||
.into_iter(),
|
||||
);
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
&ABIArg::StructArg { .. } => {
|
||||
panic!("StructArg in return position is unsupported");
|
||||
}
|
||||
&ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"),
|
||||
}
|
||||
ret
|
||||
}
|
||||
@@ -1345,22 +1389,32 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
|
||||
// Compute uses: all arg regs.
|
||||
let mut uses = Vec::new();
|
||||
for arg in &sig.args {
|
||||
match arg {
|
||||
&ABIArg::Reg { regs, .. } => uses.extend(regs.regs().iter().map(|r| r.to_reg())),
|
||||
if let &ABIArg::Slots { ref slots, .. } = arg {
|
||||
for slot in slots {
|
||||
match slot {
|
||||
&ABIArgSlot::Reg { reg, .. } => {
|
||||
uses.push(reg.to_reg());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
|
||||
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
|
||||
for ret in &sig.rets {
|
||||
match ret {
|
||||
&ABIArg::Reg { regs, .. } => {
|
||||
defs.extend(regs.regs().iter().map(|r| Writable::from_reg(r.to_reg())))
|
||||
if let &ABIArg::Slots { ref slots, .. } = ret {
|
||||
for slot in slots {
|
||||
match slot {
|
||||
&ABIArgSlot::Reg { reg, .. } => {
|
||||
defs.push(Writable::from_reg(reg.to_reg()));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(uses, defs)
|
||||
}
|
||||
@@ -1406,7 +1460,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
|
||||
flags: &settings::Flags,
|
||||
) -> CodegenResult<ABICallerImpl<M>> {
|
||||
let ir_sig = ensure_struct_return_ptr_is_returned(sig);
|
||||
let sig = ABISig::from_func_sig::<M>(&ir_sig)?;
|
||||
let sig = ABISig::from_func_sig::<M>(&ir_sig, flags)?;
|
||||
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
|
||||
Ok(ABICallerImpl {
|
||||
ir_sig,
|
||||
@@ -1431,7 +1485,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
|
||||
flags: &settings::Flags,
|
||||
) -> CodegenResult<ABICallerImpl<M>> {
|
||||
let ir_sig = ensure_struct_return_ptr_is_returned(sig);
|
||||
let sig = ABISig::from_func_sig::<M>(&ir_sig)?;
|
||||
let sig = ABISig::from_func_sig::<M>(&ir_sig, flags)?;
|
||||
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
|
||||
Ok(ABICallerImpl {
|
||||
ir_sig,
|
||||
@@ -1501,15 +1555,15 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
let word_rc = M::word_reg_class();
|
||||
let word_bits = M::word_bits() as usize;
|
||||
match &self.sig.args[idx] {
|
||||
&ABIArg::Reg {
|
||||
regs,
|
||||
ty,
|
||||
extension,
|
||||
..
|
||||
&ABIArg::Slots { ref slots, .. } => {
|
||||
assert_eq!(from_regs.len(), slots.len());
|
||||
for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
|
||||
match slot {
|
||||
&ABIArgSlot::Reg {
|
||||
reg, ty, extension, ..
|
||||
} => {
|
||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
|
||||
let reg = regs.only_reg().unwrap();
|
||||
assert_eq!(word_rc, reg.get_class());
|
||||
let signed = match ext {
|
||||
ir::ArgumentExtension::Uext => false,
|
||||
@@ -1518,22 +1572,20 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
};
|
||||
ctx.emit(M::gen_extend(
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
from_regs.only_reg().unwrap(),
|
||||
*from_reg,
|
||||
signed,
|
||||
ty_bits(ty) as u8,
|
||||
word_bits as u8,
|
||||
));
|
||||
} else {
|
||||
for insn in gen_move_multi::<M>(
|
||||
writable_value_regs(regs.map(|r| r.to_reg())),
|
||||
from_regs,
|
||||
ctx.emit(M::gen_move(
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
*from_reg,
|
||||
ty,
|
||||
) {
|
||||
ctx.emit(insn);
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
&ABIArg::Stack {
|
||||
&ABIArgSlot::Stack {
|
||||
offset,
|
||||
ty,
|
||||
extension,
|
||||
@@ -1542,9 +1594,6 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
let mut ty = ty;
|
||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
|
||||
let from_reg = from_regs
|
||||
.only_reg()
|
||||
.expect("only one reg for sub-word value width");
|
||||
assert_eq!(word_rc, from_reg.get_class());
|
||||
let signed = match ext {
|
||||
ir::ArgumentExtension::Uext => false,
|
||||
@@ -1555,8 +1604,8 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
// treat high bits as undefined for values in registers, so this
|
||||
// is safe, even for an argument that is nominally read-only.
|
||||
ctx.emit(M::gen_extend(
|
||||
Writable::from_reg(from_reg),
|
||||
from_reg,
|
||||
Writable::from_reg(*from_reg),
|
||||
*from_reg,
|
||||
signed,
|
||||
ty_bits(ty) as u8,
|
||||
word_bits as u8,
|
||||
@@ -1564,10 +1613,13 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
// Store the extended version.
|
||||
ty = M::word_type();
|
||||
}
|
||||
for insn in
|
||||
gen_store_stack_multi::<M>(StackAMode::SPOffset(offset, ty), from_regs, ty)
|
||||
{
|
||||
ctx.emit(insn);
|
||||
ctx.emit(M::gen_store_stack(
|
||||
StackAMode::SPOffset(offset, ty),
|
||||
*from_reg,
|
||||
ty,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
&ABIArg::StructArg { offset, size, .. } => {
|
||||
@@ -1618,24 +1670,29 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
into_regs: ValueRegs<Writable<Reg>>,
|
||||
) {
|
||||
match &self.sig.rets[idx] {
|
||||
&ABIArg::Slots { ref slots, .. } => {
|
||||
assert_eq!(into_regs.len(), slots.len());
|
||||
for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
|
||||
match slot {
|
||||
// Extension mode doesn't matter because we're copying out, not in,
|
||||
// and we ignore high bits in our own registers by convention.
|
||||
&ABIArg::Reg { regs, ty, .. } => {
|
||||
for insn in gen_move_multi::<M>(into_regs, regs.map(|r| r.to_reg()), ty) {
|
||||
ctx.emit(insn);
|
||||
&ABIArgSlot::Reg { reg, ty, .. } => {
|
||||
ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty));
|
||||
}
|
||||
}
|
||||
&ABIArg::Stack { offset, ty, .. } => {
|
||||
&ABIArgSlot::Stack { offset, ty, .. } => {
|
||||
let ret_area_base = self.sig.stack_arg_space;
|
||||
for insn in gen_load_stack_multi::<M>(
|
||||
ctx.emit(M::gen_load_stack(
|
||||
StackAMode::SPOffset(offset + ret_area_base, ty),
|
||||
into_regs,
|
||||
*into_reg,
|
||||
ty,
|
||||
) {
|
||||
ctx.emit(insn);
|
||||
));
|
||||
}
|
||||
}
|
||||
&ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"),
|
||||
}
|
||||
}
|
||||
&ABIArg::StructArg { .. } => {
|
||||
panic!("StructArg not supported in return position");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs};
|
||||
use crate::ir::Type;
|
||||
use regalloc::{Reg, Writable};
|
||||
use std::ops::{Add, BitAnd, Not, Sub};
|
||||
|
||||
/// Returns the size (in bits) of a given type.
|
||||
pub fn ty_bits(ty: Type) -> usize {
|
||||
@@ -26,3 +27,17 @@ pub(crate) fn get_output_reg<I: VCodeInst, C: LowerCtx<I = I>>(
|
||||
) -> ValueRegs<Writable<Reg>> {
|
||||
ctx.get_output(spec.insn, spec.output)
|
||||
}
|
||||
|
||||
/// Align a size up to a power-of-two alignment.
|
||||
pub(crate) fn align_to<N>(x: N, alignment: N) -> N
|
||||
where
|
||||
N: Not<Output = N>
|
||||
+ BitAnd<N, Output = N>
|
||||
+ Add<N, Output = N>
|
||||
+ Sub<N, Output = N>
|
||||
+ From<u8>
|
||||
+ Copy,
|
||||
{
|
||||
let alignment_mask = alignment - 1.into();
|
||||
(x + alignment_mask) & !alignment_mask
|
||||
}
|
||||
|
||||
@@ -175,11 +175,13 @@ impl<R: Clone + Copy + Debug + PartialEq + Eq + InvalidSentinel> ValueRegs<R> {
|
||||
}
|
||||
|
||||
/// Create a writable ValueRegs.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn writable_value_regs(regs: ValueRegs<Reg>) -> ValueRegs<Writable<Reg>> {
|
||||
regs.map(|r| Writable::from_reg(r))
|
||||
}
|
||||
|
||||
/// Strip a writable ValueRegs down to a readonly ValueRegs.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn non_writable_value_regs(regs: ValueRegs<Writable<Reg>>) -> ValueRegs<Reg> {
|
||||
regs.map(|r| r.to_reg())
|
||||
}
|
||||
|
||||
@@ -398,6 +398,7 @@ use_pinned_reg_as_heap_base = false
|
||||
enable_simd = false
|
||||
enable_atomics = true
|
||||
enable_safepoints = false
|
||||
enable_llvm_abi_extensions = false
|
||||
emit_all_ones_funcaddrs = false
|
||||
enable_probestack = true
|
||||
probestack_func_adjusts_sp = false
|
||||
|
||||
299
cranelift/filetests/filetests/isa/x64/fastcall.clif
Normal file
299
cranelift/filetests/filetests/isa/x64/fastcall.clif
Normal file
@@ -0,0 +1,299 @@
|
||||
test compile
|
||||
set enable_llvm_abi_extensions=true
|
||||
target x86_64
|
||||
feature "experimental_x64"
|
||||
|
||||
function %f0(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %rcx, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f1(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %rdx, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f2(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %r8, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f3(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %r9, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f4(i64, i64, f64, i64) -> f64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: f64, v3: i64):
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movaps %xmm2, %xmm0
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f5(i64, i64, f64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: f64, v3: i64):
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %r9, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f6(i64, i64, i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
|
||||
return v5
|
||||
}
|
||||
|
||||
;; This is truly odd (because of the regalloc ordering), but it works. Note
|
||||
;; that we're spilling and using rsi, which is a callee-save in fastcall, because
|
||||
;; the regalloc order is optimized for SysV. Also note that because we copy args
|
||||
;; out of their input locations to separate vregs, we have a spurious load
|
||||
;; from [rbp+48]. Ordinarily these moves are coalesced because the dest vreg
|
||||
;; is allocated as a caller-save (volatile), but here again we allocate rsi
|
||||
;; first and so have to spill it (and consequently don't coalesce).
|
||||
;;
|
||||
;; TODO(#2704): fix regalloc's register priority ordering!
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: subq $$16, %rsp
|
||||
; nextln: movq %rsi, 0(%rsp)
|
||||
; nextln: virtual_sp_offset_adjust 16
|
||||
; nextln: movq 48(%rbp), %rsi
|
||||
; nextln: movq 56(%rbp), %rsi
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq 0(%rsp), %rsi
|
||||
; nextln: addq $$16, %rsp
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f7(i128, i64, i128, i128) -> i128 windows_fastcall {
|
||||
block0(v0: i128, v1: i64, v2: i128, v3: i128):
|
||||
return v3
|
||||
}
|
||||
|
||||
;; Again, terrible regalloc behavior. The important part is that `v3` comes
|
||||
;; from [rbp+56] and [rbp+64], i.e., the second and third non-shadow
|
||||
;; stack slot.
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: subq $$16, %rsp
|
||||
; nextln: movq %rsi, 0(%rsp)
|
||||
; nextln: movq %rdi, 8(%rsp)
|
||||
; nextln: virtual_sp_offset_adjust 16
|
||||
; nextln: movq 48(%rbp), %rsi
|
||||
; nextln: movq 56(%rbp), %rsi
|
||||
; nextln: movq 64(%rbp), %rdi
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq %rdi, %rdx
|
||||
; nextln: movq 0(%rsp), %rsi
|
||||
; nextln: movq 8(%rsp), %rdi
|
||||
; nextln: addq $$16, %rsp
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f8(i64) -> i64 windows_fastcall {
|
||||
sig0 = (i64, i64, f64, f64, i64, i64) -> i64 windows_fastcall
|
||||
fn0 = %g sig0
|
||||
|
||||
block0(v0: i64):
|
||||
v1 = fcvt_from_sint.f64 v0
|
||||
v2 = call fn0(v0, v0, v1, v1, v0, v0)
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: subq $$16, %rsp
|
||||
; nextln: movq %rsi, 0(%rsp)
|
||||
; nextln: virtual_sp_offset_adjust 16
|
||||
; nextln: movq %rcx, %rsi
|
||||
; nextln: cvtsi2sd %rsi, %xmm3
|
||||
; nextln: subq $$48, %rsp
|
||||
; nextln: virtual_sp_offset_adjust 48
|
||||
; nextln: movq %rsi, %rcx
|
||||
; nextln: movq %rsi, %rdx
|
||||
; nextln: movaps %xmm3, %xmm2
|
||||
; nextln: movq %rsi, 32(%rsp)
|
||||
; nextln: movq %rsi, 40(%rsp)
|
||||
; nextln: load_ext_name %g+0, %rsi
|
||||
; nextln: call *%rsi
|
||||
; nextln: addq $$48, %rsp
|
||||
; nextln: virtual_sp_offset_adjust -48
|
||||
; nextln: movq 0(%rsp), %rsi
|
||||
; nextln: addq $$16, %rsp
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f9(i64) -> f64 windows_fastcall {
|
||||
block0(v0: i64):
|
||||
v1 = load.f64 v0+0
|
||||
v2 = load.f64 v0+8
|
||||
v3 = load.f64 v0+16
|
||||
v4 = load.f64 v0+24
|
||||
v5 = load.f64 v0+32
|
||||
v6 = load.f64 v0+40
|
||||
v7 = load.f64 v0+48
|
||||
v8 = load.f64 v0+56
|
||||
v9 = load.f64 v0+64
|
||||
v10 = load.f64 v0+72
|
||||
v11 = load.f64 v0+80
|
||||
v12 = load.f64 v0+88
|
||||
v13 = load.f64 v0+96
|
||||
v14 = load.f64 v0+104
|
||||
v15 = load.f64 v0+112
|
||||
v16 = load.f64 v0+120
|
||||
v17 = load.f64 v0+128
|
||||
v18 = load.f64 v0+136
|
||||
v19 = load.f64 v0+144
|
||||
v20 = load.f64 v0+152
|
||||
|
||||
v21 = fadd.f64 v1, v2
|
||||
v22 = fadd.f64 v3, v4
|
||||
v23 = fadd.f64 v5, v6
|
||||
v24 = fadd.f64 v7, v8
|
||||
v25 = fadd.f64 v9, v10
|
||||
v26 = fadd.f64 v11, v12
|
||||
v27 = fadd.f64 v13, v14
|
||||
v28 = fadd.f64 v15, v16
|
||||
v29 = fadd.f64 v17, v18
|
||||
v30 = fadd.f64 v19, v20
|
||||
|
||||
v31 = fadd.f64 v21, v22
|
||||
v32 = fadd.f64 v23, v24
|
||||
v33 = fadd.f64 v25, v26
|
||||
v34 = fadd.f64 v27, v28
|
||||
v35 = fadd.f64 v29, v30
|
||||
|
||||
v36 = fadd.f64 v31, v32
|
||||
v37 = fadd.f64 v33, v34
|
||||
|
||||
v38 = fadd.f64 v36, v37
|
||||
|
||||
v39 = fadd.f64 v38, v35
|
||||
|
||||
return v39
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: subq $$208, %rsp
|
||||
; nextln: movdqu %xmm6, 0(%rsp)
|
||||
; nextln: movdqu %xmm7, 16(%rsp)
|
||||
; nextln: movdqu %xmm8, 32(%rsp)
|
||||
; nextln: movdqu %xmm9, 48(%rsp)
|
||||
; nextln: movdqu %xmm10, 64(%rsp)
|
||||
; nextln: movdqu %xmm11, 80(%rsp)
|
||||
; nextln: movdqu %xmm12, 96(%rsp)
|
||||
; nextln: movdqu %xmm13, 112(%rsp)
|
||||
; nextln: movdqu %xmm14, 128(%rsp)
|
||||
; nextln: movdqu %xmm15, 144(%rsp)
|
||||
; nextln: virtual_sp_offset_adjust 160
|
||||
; nextln: movsd 0(%rcx), %xmm0
|
||||
; nextln: movsd %xmm0, rsp(16 + virtual offset)
|
||||
; nextln: movsd 8(%rcx), %xmm1
|
||||
; nextln: movsd 16(%rcx), %xmm0
|
||||
; nextln: movsd %xmm0, rsp(24 + virtual offset)
|
||||
; nextln: movsd 24(%rcx), %xmm3
|
||||
; nextln: movsd 32(%rcx), %xmm0
|
||||
; nextln: movsd %xmm0, rsp(32 + virtual offset)
|
||||
; nextln: movsd 40(%rcx), %xmm5
|
||||
; nextln: movsd 48(%rcx), %xmm6
|
||||
; nextln: movsd 56(%rcx), %xmm7
|
||||
; nextln: movsd 64(%rcx), %xmm8
|
||||
; nextln: movsd 72(%rcx), %xmm9
|
||||
; nextln: movsd 80(%rcx), %xmm10
|
||||
; nextln: movsd 88(%rcx), %xmm11
|
||||
; nextln: movsd 96(%rcx), %xmm12
|
||||
; nextln: movsd 104(%rcx), %xmm13
|
||||
; nextln: movsd 112(%rcx), %xmm14
|
||||
; nextln: movsd 120(%rcx), %xmm15
|
||||
; nextln: movsd 128(%rcx), %xmm0
|
||||
; nextln: movsd %xmm0, rsp(0 + virtual offset)
|
||||
; nextln: movsd 136(%rcx), %xmm0
|
||||
; nextln: movsd 144(%rcx), %xmm2
|
||||
; nextln: movsd %xmm2, rsp(8 + virtual offset)
|
||||
; nextln: movsd 152(%rcx), %xmm2
|
||||
; nextln: nop len=0
|
||||
; nextln: movsd rsp(16 + virtual offset), %xmm4
|
||||
; nextln: addsd %xmm1, %xmm4
|
||||
; nextln: movsd %xmm4, rsp(16 + virtual offset)
|
||||
; nextln: movsd rsp(24 + virtual offset), %xmm1
|
||||
; nextln: addsd %xmm3, %xmm1
|
||||
; nextln: movsd rsp(32 + virtual offset), %xmm4
|
||||
; nextln: addsd %xmm5, %xmm4
|
||||
; nextln: addsd %xmm7, %xmm6
|
||||
; nextln: addsd %xmm9, %xmm8
|
||||
; nextln: addsd %xmm11, %xmm10
|
||||
; nextln: addsd %xmm13, %xmm12
|
||||
; nextln: addsd %xmm15, %xmm14
|
||||
; nextln: movsd rsp(0 + virtual offset), %xmm3
|
||||
; nextln: addsd %xmm0, %xmm3
|
||||
; nextln: movsd rsp(8 + virtual offset), %xmm0
|
||||
; nextln: addsd %xmm2, %xmm0
|
||||
; nextln: movsd rsp(16 + virtual offset), %xmm2
|
||||
; nextln: addsd %xmm1, %xmm2
|
||||
; nextln: addsd %xmm6, %xmm4
|
||||
; nextln: addsd %xmm10, %xmm8
|
||||
; nextln: addsd %xmm14, %xmm12
|
||||
; nextln: addsd %xmm0, %xmm3
|
||||
; nextln: addsd %xmm4, %xmm2
|
||||
; nextln: addsd %xmm12, %xmm8
|
||||
; nextln: addsd %xmm8, %xmm2
|
||||
; nextln: addsd %xmm3, %xmm2
|
||||
; nextln: movaps %xmm2, %xmm0
|
||||
; nextln: movdqu 0(%rsp), %xmm6
|
||||
; nextln: movdqu 16(%rsp), %xmm7
|
||||
; nextln: movdqu 32(%rsp), %xmm8
|
||||
; nextln: movdqu 48(%rsp), %xmm9
|
||||
; nextln: movdqu 64(%rsp), %xmm10
|
||||
; nextln: movdqu 80(%rsp), %xmm11
|
||||
; nextln: movdqu 96(%rsp), %xmm12
|
||||
; nextln: movdqu 112(%rsp), %xmm13
|
||||
; nextln: movdqu 128(%rsp), %xmm14
|
||||
; nextln: movdqu 144(%rsp), %xmm15
|
||||
; nextln: addq $$160, %rsp
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
test compile
|
||||
set enable_llvm_abi_extensions=true
|
||||
target x86_64
|
||||
feature "experimental_x64"
|
||||
|
||||
@@ -738,17 +739,17 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128):
|
||||
v11 = iadd.i128 v9, v10
|
||||
return v11
|
||||
|
||||
; check: movq %rsp, %rbp
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: subq $$16, %rsp
|
||||
; nextln: movq %r12, 0(%rsp)
|
||||
; nextln: movq %r13, 8(%rsp)
|
||||
; nextln: virtual_sp_offset_adjust 16
|
||||
; nextln: movq 16(%rbp), %r9
|
||||
; nextln: movq 24(%rbp), %r10
|
||||
; nextln: movq 32(%rbp), %r12
|
||||
; nextln: movq 40(%rbp), %r11
|
||||
; nextln: movq 48(%rbp), %rax
|
||||
; nextln: movq 56(%rbp), %r13
|
||||
; nextln: movq 16(%rbp), %r10
|
||||
; nextln: movq 24(%rbp), %r12
|
||||
; nextln: movq 32(%rbp), %r11
|
||||
; nextln: movq 40(%rbp), %rax
|
||||
; nextln: movq 48(%rbp), %r13
|
||||
; nextln: addq %rdx, %rdi
|
||||
; nextln: adcq %rcx, %rsi
|
||||
; nextln: xorq %rcx, %rcx
|
||||
@@ -786,10 +787,10 @@ block0(v0: i128):
|
||||
; nextln: movq %r10, 16(%rsi)
|
||||
; nextln: movq %r11, 24(%rsi)
|
||||
; nextln: movq %r12, 32(%rsi)
|
||||
; nextln: movq %r13, 48(%rsi)
|
||||
; nextln: movq %r14, 56(%rsi)
|
||||
; nextln: movq %rdi, 64(%rsi)
|
||||
; nextln: movq %rbx, 72(%rsi)
|
||||
; nextln: movq %r13, 40(%rsi)
|
||||
; nextln: movq %r14, 48(%rsi)
|
||||
; nextln: movq %rdi, 56(%rsi)
|
||||
; nextln: movq %rbx, 64(%rsi)
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
test compile
|
||||
set enable_llvm_abi_extensions=true
|
||||
target x86_64
|
||||
feature "experimental_x64"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user