x86-64 Windows fastcall ABI support.

This adds support for the "fastcall" ABI, which is the native C/C++ ABI
on Windows platforms on x86-64. It is similar to but not exactly like
System V; primarily, its argument register assignments are different,
and it requires stack shadow space.

Note that this also adjusts the handling of multi-register values in the
shared ABI implementation, and with this change, adjusts handling of
`i128`s on *both* Fastcall/x64 *and* SysV/x64 platforms. This was done
to align with actual behavior by the "rustc ABI" on both platforms, as
mapped out experimentally (Compiler Explorer link in comments). This
behavior is gated under the `enable_llvm_abi_extensions` flag.

Note also that this does *not* add x64 unwind info on Windows. That will
come in a future PR (but is planned!).
This commit is contained in:
Chris Fallin
2021-02-22 20:28:49 -08:00
parent 98d3e6823f
commit 6c94eb82aa
13 changed files with 997 additions and 475 deletions

View File

@@ -216,6 +216,25 @@ pub(crate) fn define() -> SettingGroup {
0,
);
settings.add_bool(
"enable_llvm_abi_extensions",
r#"
Enable various ABI extensions defined by LLVM's behavior.
In some cases, LLVM's implementation of an ABI (calling convention)
goes beyond a standard and supports additional argument types or
behavior. This option instructs Cranelift codegen to follow LLVM's
behavior where applicable.
Currently, this applies only to Windows Fastcall on x86-64, and
allows an `i128` argument to be spread across two 64-bit integer
registers. The Fastcall implementation otherwise does not support
`i128` arguments, and will panic if they are present and this
option is not set.
"#,
false,
);
// BaldrMonkey requires that not-yet-relocated function addresses be encoded
// as all-ones bitpatterns.
settings.add_bool(

View File

@@ -78,41 +78,41 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
match &param.purpose {
&ir::ArgumentPurpose::VMContext => {
// This is SpiderMonkey's `WasmTlsReg`.
Some(ABIArg::Reg {
regs: ValueRegs::one(xreg(BALDRDASH_TLS_REG).to_real_reg()),
ty: ir::types::I64,
extension: param.extension,
purpose: param.purpose,
})
Some(ABIArg::reg(
xreg(BALDRDASH_TLS_REG).to_real_reg(),
ir::types::I64,
param.extension,
param.purpose,
))
}
&ir::ArgumentPurpose::SignatureId => {
// This is SpiderMonkey's `WasmTableCallSigReg`.
Some(ABIArg::Reg {
regs: ValueRegs::one(xreg(BALDRDASH_SIG_REG).to_real_reg()),
ty: ir::types::I64,
extension: param.extension,
purpose: param.purpose,
})
Some(ABIArg::reg(
xreg(BALDRDASH_SIG_REG).to_real_reg(),
ir::types::I64,
param.extension,
param.purpose,
))
}
&ir::ArgumentPurpose::CalleeTLS => {
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack {
offset: BALDRDASH_CALLEE_TLS_OFFSET,
ty: ir::types::I64,
extension: ir::ArgumentExtension::None,
purpose: param.purpose,
})
Some(ABIArg::stack(
BALDRDASH_CALLEE_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
param.purpose,
))
}
&ir::ArgumentPurpose::CallerTLS => {
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack {
offset: BALDRDASH_CALLER_TLS_OFFSET,
ty: ir::types::I64,
extension: ir::ArgumentExtension::None,
purpose: param.purpose,
})
Some(ABIArg::stack(
BALDRDASH_CALLER_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
param.purpose,
))
}
_ => None,
}
@@ -161,6 +161,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
fn compute_arg_locs(
call_conv: isa::CallConv,
_flags: &settings::Flags,
params: &[ir::AbiParam],
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
@@ -253,12 +254,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
RegClass::V128 => vreg(*next_reg),
_ => unreachable!(),
};
ret.push(ABIArg::Reg {
regs: ValueRegs::one(reg.to_real_reg()),
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
ret.push(ABIArg::reg(
reg.to_real_reg(),
param.value_type,
param.extension,
param.purpose,
));
*next_reg += 1;
remaining_reg_vals -= 1;
} else {
@@ -268,13 +269,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
let size = std::cmp::max(size, 8);
// Align.
debug_assert!(size.is_power_of_two());
next_stack = (next_stack + size - 1) & !(size - 1);
ret.push(ABIArg::Stack {
offset: next_stack as i64,
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
next_stack = align_to(next_stack, size);
ret.push(ABIArg::stack(
next_stack as i64,
param.value_type,
param.extension,
param.purpose,
));
next_stack += size;
}
}
@@ -286,19 +287,19 @@ impl ABIMachineSpec for AArch64MachineDeps {
let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
ret.push(ABIArg::Reg {
regs: ValueRegs::one(xreg(next_xreg).to_real_reg()),
ty: I64,
extension: ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal,
});
ret.push(ABIArg::reg(
xreg(next_xreg).to_real_reg(),
I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
} else {
ret.push(ABIArg::Stack {
offset: next_stack as i64,
ty: I64,
extension: ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal,
});
ret.push(ABIArg::stack(
next_stack as i64,
I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
next_stack += 8;
}
Some(ret.len() - 1)
@@ -306,7 +307,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
None
};
next_stack = (next_stack + 15) & !15;
next_stack = align_to(next_stack, 16);
// To avoid overflow issues, limit the arg/return size to something
// reasonable -- here, 128 MB.

View File

@@ -51,6 +51,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
fn compute_arg_locs(
_call_conv: isa::CallConv,
_flags: &settings::Flags,
params: &[ir::AbiParam],
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
@@ -81,12 +82,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
if next_rreg < max_reg_val {
let reg = rreg(next_rreg);
ret.push(ABIArg::Reg {
regs: ValueRegs::one(reg.to_real_reg()),
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
ret.push(ABIArg::reg(
reg.to_real_reg(),
param.value_type,
param.extension,
param.purpose,
));
next_rreg += 1;
} else {
// Arguments are stored on stack in reversed order.
@@ -101,12 +102,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if next_rreg < max_reg_val {
ret.push(ABIArg::Reg {
regs: ValueRegs::one(rreg(next_rreg).to_real_reg()),
ty: I32,
extension: ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal,
});
ret.push(ABIArg::reg(
rreg(next_rreg).to_real_reg(),
I32,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
} else {
stack_args.push((
I32,
@@ -124,12 +125,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
let max_stack = next_stack;
for (ty, ext, purpose) in stack_args.into_iter().rev() {
next_stack -= 4;
ret.push(ABIArg::Stack {
offset: (max_stack - next_stack) as i64,
ret.push(ABIArg::stack(
(max_stack - next_stack) as i64,
ty,
extension: ext,
ext,
purpose,
});
));
}
assert_eq!(next_stack, 0);

View File

@@ -31,41 +31,41 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
match &param.purpose {
&ir::ArgumentPurpose::VMContext => {
// This is SpiderMonkey's `WasmTlsReg`.
Some(ABIArg::Reg {
regs: ValueRegs::one(regs::r14().to_real_reg()),
ty: types::I64,
extension: param.extension,
purpose: param.purpose,
})
Some(ABIArg::reg(
regs::r14().to_real_reg(),
types::I64,
param.extension,
param.purpose,
))
}
&ir::ArgumentPurpose::SignatureId => {
// This is SpiderMonkey's `WasmTableCallSigReg`.
Some(ABIArg::Reg {
regs: ValueRegs::one(regs::r10().to_real_reg()),
ty: types::I64,
extension: param.extension,
purpose: param.purpose,
})
Some(ABIArg::reg(
regs::r10().to_real_reg(),
types::I64,
param.extension,
param.purpose,
))
}
&ir::ArgumentPurpose::CalleeTLS => {
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack {
offset: BALDRDASH_CALLEE_TLS_OFFSET,
ty: ir::types::I64,
extension: ir::ArgumentExtension::None,
purpose: param.purpose,
})
Some(ABIArg::stack(
BALDRDASH_CALLEE_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
param.purpose,
))
}
&ir::ArgumentPurpose::CallerTLS => {
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack {
offset: BALDRDASH_CALLER_TLS_OFFSET,
ty: ir::types::I64,
extension: ir::ArgumentExtension::None,
purpose: param.purpose,
})
Some(ABIArg::stack(
BALDRDASH_CALLER_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
param.purpose,
))
}
_ => None,
}
@@ -97,18 +97,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
fn compute_arg_locs(
call_conv: isa::CallConv,
flags: &settings::Flags,
params: &[ir::AbiParam],
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
let is_baldrdash = call_conv.extends_baldrdash();
let is_fastcall = call_conv.extends_windows_fastcall();
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
let mut next_gpr = 0;
let mut next_vreg = 0;
let mut next_stack: u64 = 0;
let mut next_param_idx = 0; // Fastcall cares about overall param index
let mut ret = vec![];
if args_or_rets == ArgsOrRets::Args && is_fastcall {
// Fastcall always reserves 32 bytes of shadow space corresponding to
// the four initial in-arg parameters.
//
// (See:
// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160)
next_stack = 32;
}
if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
// Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
// caller TLS-register values, respectively.
@@ -159,72 +171,92 @@ impl ABIMachineSpec for X64ABIMachineSpec {
}
// Find regclass(es) of the register(s) used to store a value of this type.
let (rcs, _) = Inst::rc_for_type(param.value_type)?;
let intreg = rcs[0] == RegClass::I64;
let num_regs = rcs.len();
assert!(num_regs <= 2);
if num_regs == 2 {
assert_eq!(rcs[0], rcs[1]);
let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
// Now assign ABIArgSlots for each register-sized part.
//
// Note that the handling of `i128` values is unique here:
//
// - If `enable_llvm_abi_extensions` is set in the flags, each
// `i128` is split into two `i64`s and assigned exactly as if it
// were two consecutive 64-bit args. This is consistent with LLVM's
// behavior, and is needed for some uses of Cranelift (e.g., the
// rustc backend).
//
// - Otherwise, both SysV and Fastcall specify behavior (use of
// vector register, a register pair, or passing by reference
// depending on the case), but for simplicity, we will just panic if
// an i128 type appears in a signature and the LLVM extensions flag
// is not set.
//
// For examples of how rustc compiles i128 args and return values on
// both SysV and Fastcall platforms, see:
// https://godbolt.org/z/PhG3ob
if param.value_type.bits() > 64
&& !param.value_type.is_vector()
&& !flags.enable_llvm_abi_extensions()
{
panic!(
"i128 args/return values not supported unless LLVM ABI extensions are enabled"
);
}
let mut regs: SmallVec<[RealReg; 2]> = smallvec![];
for j in 0..num_regs {
let mut slots = vec![];
for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
let intreg = *rc == RegClass::I64;
let nextreg = if intreg {
match args_or_rets {
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr + j),
ArgsOrRets::Args => {
get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
}
ArgsOrRets::Rets => {
get_intreg_for_retval_systemv(&call_conv, next_gpr + j, i + j)
get_intreg_for_retval(&call_conv, next_gpr, next_param_idx)
}
}
} else {
match args_or_rets {
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg + j),
ArgsOrRets::Args => {
get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
}
ArgsOrRets::Rets => {
get_fltreg_for_retval_systemv(&call_conv, next_vreg + j, i + j)
get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx)
}
}
};
next_param_idx += 1;
if let Some(reg) = nextreg {
regs.push(reg.to_real_reg());
if intreg {
next_gpr += 1;
} else {
next_vreg += 1;
}
slots.push(ABIArgSlot::Reg {
reg: reg.to_real_reg(),
ty: *reg_ty,
extension: param.extension,
});
} else {
regs.clear();
break;
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
// stack alignment happens separately after all args.)
let size = (reg_ty.bits() / 8) as u64;
let size = std::cmp::max(size, 8);
// Align.
debug_assert!(size.is_power_of_two());
next_stack = align_to(next_stack, size);
slots.push(ABIArgSlot::Stack {
offset: next_stack as i64,
ty: *reg_ty,
extension: param.extension,
});
next_stack += size;
}
}
if regs.len() > 0 {
let regs = match num_regs {
1 => ValueRegs::one(regs[0]),
2 => ValueRegs::two(regs[0], regs[1]),
_ => panic!("More than two registers unexpected"),
};
ret.push(ABIArg::Reg {
regs,
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
if intreg {
next_gpr += num_regs;
} else {
next_vreg += num_regs;
}
} else {
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
// stack alignment happens separately after all args.)
let size = (param.value_type.bits() / 8) as u64;
let size = std::cmp::max(size, 8);
// Align.
debug_assert!(size.is_power_of_two());
next_stack = (next_stack + size - 1) & !(size - 1);
ret.push(ABIArg::Stack {
offset: next_stack as i64,
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
next_stack += size;
}
ret.push(ABIArg::Slots {
slots,
purpose: param.purpose,
});
}
if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
@@ -233,20 +265,20 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
ret.push(ABIArg::Reg {
regs: ValueRegs::one(reg.to_real_reg()),
ty: types::I64,
extension: ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal,
});
if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
ret.push(ABIArg::reg(
reg.to_real_reg(),
types::I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
} else {
ret.push(ABIArg::Stack {
offset: next_stack as i64,
ty: types::I64,
extension: ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal,
});
ret.push(ABIArg::stack(
next_stack as i64,
types::I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
next_stack += 8;
}
Some(ret.len() - 1)
@@ -254,7 +286,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
None
};
next_stack = (next_stack + 15) & !15;
next_stack = align_to(next_stack, 16);
// To avoid overflow issues, limit the arg/return size to something reasonable.
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
@@ -452,10 +484,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
// registers (all XMM registers are caller-save) so we can compute the
// total size of the needed stack space easily.
let clobbered = get_callee_saves(&call_conv, clobbers);
let clobbered_size = 8 * clobbered.len() as u32;
let stack_size = clobbered_size + fixed_frame_storage_size;
let stack_size = compute_clobber_size(&clobbered) + fixed_frame_storage_size;
// Align to 16 bytes.
let stack_size = (stack_size + 15) & !15;
let stack_size = align_to(stack_size, 16);
let clobbered_size = stack_size - fixed_frame_storage_size;
// Adjust the stack pointer downward with one `sub rsp, IMM`
// instruction.
@@ -473,16 +504,23 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let r_reg = reg.to_reg();
match r_reg.get_class() {
RegClass::I64 => {
insts.push(Inst::mov_r_m(
OperandSize::Size64,
insts.push(Inst::store(
types::I64,
r_reg.to_reg(),
Amode::imm_reg(cur_offset, regs::rsp()),
));
cur_offset += 8;
}
// No XMM regs are callee-save, so we do not need to implement
// this.
_ => unimplemented!(),
RegClass::V128 => {
cur_offset = align_to(cur_offset, 16);
insts.push(Inst::store(
types::I8X16,
r_reg.to_reg(),
Amode::imm_reg(cur_offset, regs::rsp()),
));
cur_offset += 16;
}
_ => unreachable!(),
}
}
@@ -499,8 +537,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let mut insts = SmallVec::new();
let clobbered = get_callee_saves(&call_conv, clobbers);
let stack_size = 8 * clobbered.len() as u32;
let stack_size = (stack_size + 15) & !15;
let stack_size = compute_clobber_size(&clobbered);
let stack_size = align_to(stack_size, 16);
// Restore regs by loading from offsets of RSP.
let mut cur_offset = 0;
@@ -514,7 +552,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
));
cur_offset += 8;
}
_ => unimplemented!(),
RegClass::V128 => {
cur_offset = align_to(cur_offset, 16);
insts.push(Inst::load(
types::I8X16,
Amode::imm_reg(cur_offset, regs::rsp()),
Writable::from_reg(rreg.to_reg()),
ExtKind::None,
));
cur_offset += 16;
}
_ => unreachable!(),
}
}
// Adjust RSP back upward.
@@ -592,14 +640,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
// Baldrdash should not use struct args.
assert!(!call_conv.extends_baldrdash());
let mut insts = SmallVec::new();
let arg0 = get_intreg_for_arg_systemv(&call_conv, 0).unwrap();
let arg1 = get_intreg_for_arg_systemv(&call_conv, 1).unwrap();
let arg2 = get_intreg_for_arg_systemv(&call_conv, 2).unwrap();
let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap();
let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap();
let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap();
// We need a register to load the address of `memcpy()` below and we
// don't have a lowering context to allocate a temp here; so just use a
// register we know we are free to mutate as part of this sequence
// (because it is clobbered by the call as per the ABI anyway).
let memcpy_addr = get_intreg_for_arg_systemv(&call_conv, 3).unwrap();
let memcpy_addr = get_intreg_for_arg(&call_conv, 3, 3).unwrap();
insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64));
insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64));
insts.extend(
@@ -648,10 +696,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
let mut caller_saved = vec![
// Systemv calling convention:
// - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved).
Writable::from_reg(regs::rsi()),
Writable::from_reg(regs::rdi()),
// intersection of Systemv and FastCall calling conventions:
// - GPR: all except RDI, RSI, RBX, RBP, R12 to R15.
// SysV adds RDI, RSI (FastCall makes these callee-saved).
Writable::from_reg(regs::rax()),
Writable::from_reg(regs::rcx()),
Writable::from_reg(regs::rdx()),
@@ -659,25 +706,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
Writable::from_reg(regs::r9()),
Writable::from_reg(regs::r10()),
Writable::from_reg(regs::r11()),
// - XMM: all the registers!
// - XMM: XMM0-5. SysV adds the rest (XMM6-XMM15).
Writable::from_reg(regs::xmm0()),
Writable::from_reg(regs::xmm1()),
Writable::from_reg(regs::xmm2()),
Writable::from_reg(regs::xmm3()),
Writable::from_reg(regs::xmm4()),
Writable::from_reg(regs::xmm5()),
Writable::from_reg(regs::xmm6()),
Writable::from_reg(regs::xmm7()),
Writable::from_reg(regs::xmm8()),
Writable::from_reg(regs::xmm9()),
Writable::from_reg(regs::xmm10()),
Writable::from_reg(regs::xmm11()),
Writable::from_reg(regs::xmm12()),
Writable::from_reg(regs::xmm13()),
Writable::from_reg(regs::xmm14()),
Writable::from_reg(regs::xmm15()),
];
if !call_conv_of_callee.extends_windows_fastcall() {
caller_saved.push(Writable::from_reg(regs::rsi()));
caller_saved.push(Writable::from_reg(regs::rdi()));
caller_saved.push(Writable::from_reg(regs::xmm6()));
caller_saved.push(Writable::from_reg(regs::xmm7()));
caller_saved.push(Writable::from_reg(regs::xmm8()));
caller_saved.push(Writable::from_reg(regs::xmm9()));
caller_saved.push(Writable::from_reg(regs::xmm10()));
caller_saved.push(Writable::from_reg(regs::xmm11()));
caller_saved.push(Writable::from_reg(regs::xmm12()));
caller_saved.push(Writable::from_reg(regs::xmm13()));
caller_saved.push(Writable::from_reg(regs::xmm14()));
caller_saved.push(Writable::from_reg(regs::xmm15()));
}
if call_conv_of_callee.extends_baldrdash() {
caller_saved.push(Writable::from_reg(regs::r12()));
caller_saved.push(Writable::from_reg(regs::r13()));
@@ -739,49 +791,67 @@ impl From<StackAMode> for SyntheticAmode {
}
}
fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
match call_conv {
fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
let is_fastcall = match call_conv {
CallConv::Fast
| CallConv::Cold
| CallConv::SystemV
| CallConv::BaldrdashSystemV
| CallConv::Baldrdash2020 => {}
_ => panic!("int args only supported for SysV calling convention"),
| CallConv::Baldrdash2020 => false,
CallConv::WindowsFastcall => true,
_ => panic!("int args only supported for SysV or Fastcall calling convention"),
};
match idx {
0 => Some(regs::rdi()),
1 => Some(regs::rsi()),
2 => Some(regs::rdx()),
3 => Some(regs::rcx()),
4 => Some(regs::r8()),
5 => Some(regs::r9()),
// Fastcall counts by absolute argument number; SysV counts by argument of
// this (integer) class.
let i = if is_fastcall { arg_idx } else { idx };
match (i, is_fastcall) {
(0, false) => Some(regs::rdi()),
(1, false) => Some(regs::rsi()),
(2, false) => Some(regs::rdx()),
(3, false) => Some(regs::rcx()),
(4, false) => Some(regs::r8()),
(5, false) => Some(regs::r9()),
(0, true) => Some(regs::rcx()),
(1, true) => Some(regs::rdx()),
(2, true) => Some(regs::r8()),
(3, true) => Some(regs::r9()),
_ => None,
}
}
fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
match call_conv {
fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
let is_fastcall = match call_conv {
CallConv::Fast
| CallConv::Cold
| CallConv::SystemV
| CallConv::BaldrdashSystemV
| CallConv::Baldrdash2020 => {}
_ => panic!("float args only supported for SysV calling convention"),
| CallConv::Baldrdash2020 => false,
CallConv::WindowsFastcall => true,
_ => panic!("float args only supported for SysV or Fastcall calling convention"),
};
match idx {
0 => Some(regs::xmm0()),
1 => Some(regs::xmm1()),
2 => Some(regs::xmm2()),
3 => Some(regs::xmm3()),
4 => Some(regs::xmm4()),
5 => Some(regs::xmm5()),
6 => Some(regs::xmm6()),
7 => Some(regs::xmm7()),
// Fastcall counts by absolute argument number; SysV counts by argument of
// this (floating-point) class.
let i = if is_fastcall { arg_idx } else { idx };
match (i, is_fastcall) {
(0, false) => Some(regs::xmm0()),
(1, false) => Some(regs::xmm1()),
(2, false) => Some(regs::xmm2()),
(3, false) => Some(regs::xmm3()),
(4, false) => Some(regs::xmm4()),
(5, false) => Some(regs::xmm5()),
(6, false) => Some(regs::xmm6()),
(7, false) => Some(regs::xmm7()),
(0, true) => Some(regs::xmm0()),
(1, true) => Some(regs::xmm1()),
(2, true) => Some(regs::xmm2()),
(3, true) => Some(regs::xmm3()),
_ => None,
}
}
fn get_intreg_for_retval_systemv(
fn get_intreg_for_retval(
call_conv: &CallConv,
intreg_idx: usize,
retval_idx: usize,
@@ -799,11 +869,16 @@ fn get_intreg_for_retval_systemv(
None
}
}
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
CallConv::WindowsFastcall => match intreg_idx {
0 => Some(regs::rax()),
1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
_ => None,
},
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
}
}
fn get_fltreg_for_retval_systemv(
fn get_fltreg_for_retval(
call_conv: &CallConv,
fltreg_idx: usize,
retval_idx: usize,
@@ -821,7 +896,11 @@ fn get_fltreg_for_retval_systemv(
None
}
}
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
CallConv::WindowsFastcall => match fltreg_idx {
0 => Some(regs::xmm0()),
_ => None,
},
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
}
}
@@ -854,6 +933,21 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool {
}
}
fn is_callee_save_fastcall(r: RealReg) -> bool {
use regs::*;
match r.get_class() {
RegClass::I64 => match r.get_hw_encoding() as u8 {
ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
_ => false,
},
RegClass::V128 => match r.get_hw_encoding() as u8 {
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
_ => false,
},
_ => panic!("Unknown register class: {:?}", r.get_class()),
}
}
fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
let mut regs: Vec<Writable<RealReg>> = match call_conv {
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
@@ -869,7 +963,11 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
.cloned()
.filter(|r| is_callee_save_systemv(r.to_reg()))
.collect(),
CallConv::WindowsFastcall => todo!("windows fastcall"),
CallConv::WindowsFastcall => regs
.iter()
.cloned()
.filter(|r| is_callee_save_fastcall(r.to_reg()))
.collect(),
CallConv::Probestack => todo!("probestack?"),
};
// Sort registers for deterministic code output. We can do an unstable sort because the
@@ -877,3 +975,20 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
regs.sort_unstable_by_key(|r| r.to_reg().get_index());
regs
}
fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
let mut clobbered_size = 0;
for reg in clobbers {
match reg.to_reg().get_class() {
RegClass::I64 => {
clobbered_size += 8;
}
RegClass::V128 => {
clobbered_size = align_to(clobbered_size, 16);
clobbered_size += 16;
}
_ => unreachable!(),
}
}
clobbered_size
}

View File

@@ -456,6 +456,7 @@ pub(crate) enum InstructionSet {
Popcnt,
Lzcnt,
BMI1,
#[allow(dead_code)] // never constructed (yet).
BMI2,
}

View File

@@ -23,11 +23,20 @@ use regalloc::{
};
use std::string::String;
// Hardware encodings for a few registers.
// Hardware encodings (note the special rax, rcx, rdx, rbx order).
pub const ENC_RAX: u8 = 0;
pub const ENC_RCX: u8 = 1;
pub const ENC_RDX: u8 = 2;
pub const ENC_RBX: u8 = 3;
pub const ENC_RSP: u8 = 4;
pub const ENC_RBP: u8 = 5;
pub const ENC_RSI: u8 = 6;
pub const ENC_RDI: u8 = 7;
pub const ENC_R8: u8 = 8;
pub const ENC_R9: u8 = 9;
pub const ENC_R10: u8 = 10;
pub const ENC_R11: u8 = 11;
pub const ENC_R12: u8 = 12;
pub const ENC_R13: u8 = 13;
pub const ENC_R14: u8 = 14;
@@ -38,31 +47,31 @@ fn gpr(enc: u8, index: u8) -> Reg {
}
pub(crate) fn rsi() -> Reg {
gpr(6, 16)
gpr(ENC_RSI, 16)
}
pub(crate) fn rdi() -> Reg {
gpr(7, 17)
gpr(ENC_RDI, 17)
}
pub(crate) fn rax() -> Reg {
gpr(0, 18)
gpr(ENC_RAX, 18)
}
pub(crate) fn rcx() -> Reg {
gpr(1, 19)
gpr(ENC_RCX, 19)
}
pub(crate) fn rdx() -> Reg {
gpr(2, 20)
gpr(ENC_RDX, 20)
}
pub(crate) fn r8() -> Reg {
gpr(8, 21)
gpr(ENC_R8, 21)
}
pub(crate) fn r9() -> Reg {
gpr(9, 22)
gpr(ENC_R9, 22)
}
pub(crate) fn r10() -> Reg {
gpr(10, 23)
gpr(ENC_R10, 23)
}
pub(crate) fn r11() -> Reg {
gpr(11, 24)
gpr(ENC_R11, 24)
}
pub(crate) fn r12() -> Reg {
gpr(ENC_R12, 25)

View File

@@ -124,19 +124,18 @@ use std::convert::TryFrom;
use std::marker::PhantomData;
use std::mem;
/// A location for an argument or return value.
#[derive(Clone, Copy, Debug)]
pub enum ABIArg {
/// In a real register (or set of registers).
/// A location for (part of) an argument or return value. These "storage slots"
/// are specified for each register-sized part of an argument.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ABIArgSlot {
/// In a real register.
Reg {
/// Register(s) that hold this arg.
regs: ValueRegs<RealReg>,
/// Register that holds this arg.
reg: RealReg,
/// Value type of this arg.
ty: ir::Type,
/// Should this arg be zero- or sign-extended?
extension: ir::ArgumentExtension,
/// Purpose of this arg.
purpose: ir::ArgumentPurpose,
},
/// Arguments only: on stack, at given offset from SP at entry.
Stack {
@@ -146,6 +145,26 @@ pub enum ABIArg {
ty: ir::Type,
/// Should this arg be zero- or sign-extended?
extension: ir::ArgumentExtension,
},
}
/// An ABIArg is composed of one or more parts. This allows for a CLIF-level
/// Value to be passed with its parts in more than one location at the ABI
/// level. For example, a 128-bit integer may be passed in two 64-bit registers,
/// or even a 64-bit register and a 64-bit stack slot, on a 64-bit machine. The
/// number of "parts" should correspond to the number of registers used to store
/// this type according to the machine backend.
///
/// As an invariant, the `purpose` for every part must match. As a further
/// invariant, a `StructArg` part cannot appear with any other part.
#[derive(Clone, Debug)]
pub enum ABIArg {
/// Storage slots (registers or stack locations) for each part of the
/// argument value. The number of slots must equal the number of register
/// parts used to store a value of this type.
Slots {
/// Slots, one per register part.
slots: Vec<ABIArgSlot>,
/// Purpose of this arg.
purpose: ir::ArgumentPurpose,
},
@@ -167,21 +186,50 @@ pub enum ABIArg {
impl ABIArg {
/// Get the purpose of this arg.
fn get_purpose(self) -> ir::ArgumentPurpose {
fn get_purpose(&self) -> ir::ArgumentPurpose {
match self {
ABIArg::Reg { purpose, .. } => purpose,
ABIArg::Stack { purpose, .. } => purpose,
ABIArg::StructArg { purpose, .. } => purpose,
&ABIArg::Slots { purpose, .. } => purpose,
&ABIArg::StructArg { purpose, .. } => purpose,
}
}
/// Is this a StructArg?
fn is_struct_arg(self) -> bool {
fn is_struct_arg(&self) -> bool {
match self {
ABIArg::StructArg { .. } => true,
&ABIArg::StructArg { .. } => true,
_ => false,
}
}
/// Create an ABIArg from one register.
pub fn reg(
reg: RealReg,
ty: ir::Type,
extension: ir::ArgumentExtension,
purpose: ir::ArgumentPurpose,
) -> ABIArg {
ABIArg::Slots {
slots: vec![ABIArgSlot::Reg { reg, ty, extension }],
purpose,
}
}
/// Create an ABIArg from one stack slot.
pub fn stack(
offset: i64,
ty: ir::Type,
extension: ir::ArgumentExtension,
purpose: ir::ArgumentPurpose,
) -> ABIArg {
ABIArg::Slots {
slots: vec![ABIArgSlot::Stack {
offset,
ty,
extension,
}],
purpose,
}
}
}
/// Are we computing information about arguments or return values? Much of the
@@ -275,6 +323,7 @@ pub trait ABIMachineSpec {
/// index of the extra synthetic arg that was added.
fn compute_arg_locs(
call_conv: isa::CallConv,
flags: &settings::Flags,
params: &[ir::AbiParam],
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
@@ -461,11 +510,15 @@ struct ABISig {
}
impl ABISig {
fn from_func_sig<M: ABIMachineSpec>(sig: &ir::Signature) -> CodegenResult<ABISig> {
fn from_func_sig<M: ABIMachineSpec>(
sig: &ir::Signature,
flags: &settings::Flags,
) -> CodegenResult<ABISig> {
// Compute args and retvals from signature. Handle retvals first,
// because we may need to add a return-area arg to the args.
let (rets, stack_ret_space, _) = M::compute_arg_locs(
sig.call_conv,
flags,
&sig.returns,
ArgsOrRets::Rets,
/* extra ret-area ptr = */ false,
@@ -473,6 +526,7 @@ impl ABISig {
let need_stack_return_area = stack_ret_space > 0;
let (args, stack_arg_space, stack_ret_arg) = M::compute_arg_locs(
sig.call_conv,
flags,
&sig.params,
ArgsOrRets::Args,
need_stack_return_area,
@@ -557,8 +611,11 @@ fn get_special_purpose_param_register(
purpose: ir::ArgumentPurpose,
) -> Option<Reg> {
let idx = f.signature.special_param_index(purpose)?;
match abi.args[idx] {
ABIArg::Reg { regs, .. } => Some(regs.only_reg().unwrap().to_reg()),
match &abi.args[idx] {
&ABIArg::Slots { ref slots, .. } => match &slots[0] {
&ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()),
_ => None,
},
_ => None,
}
}
@@ -569,7 +626,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
debug!("ABI: func signature {:?}", f.signature);
let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature);
let sig = ABISig::from_func_sig::<M>(&ir_sig)?;
let sig = ABISig::from_func_sig::<M>(&ir_sig, &flags)?;
let call_conv = f.signature.call_conv;
// Only these calling conventions are supported.
@@ -577,7 +634,8 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
call_conv == isa::CallConv::SystemV
|| call_conv == isa::CallConv::Fast
|| call_conv == isa::CallConv::Cold
|| call_conv.extends_baldrdash(),
|| call_conv.extends_baldrdash()
|| call_conv.extends_windows_fastcall(),
"Unsupported calling convention: {:?}",
call_conv
);
@@ -776,19 +834,6 @@ fn ty_from_ty_hint_or_reg_class<M: ABIMachineSpec>(r: Reg, ty: Option<Type>) ->
}
}
fn gen_move_multi<M: ABIMachineSpec>(
dst: ValueRegs<Writable<Reg>>,
src: ValueRegs<Reg>,
ty: Type,
) -> SmallInstVec<M::I> {
let mut ret = smallvec![];
let (_, tys) = M::I::rc_for_type(ty).unwrap();
for ((&dst, &src), &ty) in dst.regs().iter().zip(src.regs().iter()).zip(tys.iter()) {
ret.push(M::gen_move(dst, src, ty));
}
ret
}
fn gen_load_stack_multi<M: ABIMachineSpec>(
from: StackAMode,
dst: ValueRegs<Writable<Reg>>,
@@ -821,22 +866,6 @@ fn gen_store_stack_multi<M: ABIMachineSpec>(
ret
}
fn gen_store_base_offset_multi<M: ABIMachineSpec>(
base: Reg,
mut offset: i32,
src: ValueRegs<Reg>,
ty: Type,
) -> SmallInstVec<M::I> {
let mut ret = smallvec![];
let (_, tys) = M::I::rc_for_type(ty).unwrap();
// N.B.: registers are given in the `ValueRegs` in target endian order.
for (&src, &ty) in src.regs().iter().zip(tys.iter()) {
ret.push(M::gen_store_base_offset(base, offset, src, ty));
offset += ty.bytes() as i32;
}
ret
}
fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature {
let params_structret = sig
.params
@@ -892,10 +921,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
fn liveins(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for &arg in &self.sig.args {
if let ABIArg::Reg { regs, .. } = arg {
for &r in regs.regs() {
set.insert(r);
for arg in &self.sig.args {
if let &ABIArg::Slots { ref slots, .. } = arg {
for slot in slots {
if let ABIArgSlot::Reg { reg, .. } = slot {
set.insert(*reg);
}
}
}
}
@@ -904,10 +935,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
fn liveouts(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for &ret in &self.sig.rets {
if let ABIArg::Reg { regs, .. } = ret {
for &r in regs.regs() {
set.insert(r);
for ret in &self.sig.rets {
if let &ABIArg::Slots { ref slots, .. } = ret {
for slot in slots {
if let ABIArgSlot::Reg { reg, .. } = slot {
set.insert(*reg);
}
}
}
}
@@ -935,29 +968,43 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
idx: usize,
into_regs: ValueRegs<Writable<Reg>>,
) -> SmallInstVec<Self::I> {
let mut insts = smallvec![];
match &self.sig.args[idx] {
// Extension mode doesn't matter (we're copying out, not in; we
// ignore high bits by convention).
&ABIArg::Reg { regs, ty, .. } => {
gen_move_multi::<M>(into_regs, regs.map(|r| r.to_reg()), ty)
&ABIArg::Slots { ref slots, .. } => {
assert_eq!(into_regs.len(), slots.len());
for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
match slot {
// Extension mode doesn't matter (we're copying out, not in; we
// ignore high bits by convention).
&ABIArgSlot::Reg { reg, ty, .. } => {
insts.push(M::gen_move(*into_reg, reg.to_reg(), ty));
}
&ABIArgSlot::Stack { offset, ty, .. } => {
insts.push(M::gen_load_stack(
StackAMode::FPOffset(
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
ty,
),
*into_reg,
ty,
));
}
}
}
}
&ABIArg::Stack { offset, ty, .. } => gen_load_stack_multi::<M>(
StackAMode::FPOffset(
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
ty,
),
into_regs,
ty,
),
&ABIArg::StructArg { offset, .. } => smallvec![M::gen_get_stack_addr(
StackAMode::FPOffset(
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
&ABIArg::StructArg { offset, .. } => {
let into_reg = into_regs.only_reg().unwrap();
insts.push(M::gen_get_stack_addr(
StackAMode::FPOffset(
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
I8,
),
into_reg,
I8,
),
into_regs.only_reg().unwrap(),
I8,
)],
));
}
}
insts
}
fn arg_is_needed_in_body(&self, idx: usize) -> bool {
@@ -978,87 +1025,84 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let mut ret = smallvec![];
let word_bits = M::word_bits() as u8;
match &self.sig.rets[idx] {
&ABIArg::Reg {
regs,
ty,
extension,
..
} => {
let from_bits = ty_bits(ty) as u8;
let dest_regs = writable_value_regs(regs.map(|r| r.to_reg()));
let ext = M::get_ext_mode(self.sig.call_conv, extension);
match (ext, from_bits) {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < word_bits =>
{
let signed = ext == ArgumentExtension::Sext;
let dest_reg = dest_regs
.only_reg()
.expect("extension only possible from one-reg value");
let from_reg = from_regs
.only_reg()
.expect("extension only possible from one-reg value");
ret.push(M::gen_extend(
dest_reg,
from_reg.to_reg(),
signed,
from_bits,
/* to_bits = */ word_bits,
));
&ABIArg::Slots { ref slots, .. } => {
assert_eq!(from_regs.len(), slots.len());
for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
match slot {
&ABIArgSlot::Reg {
reg, ty, extension, ..
} => {
let from_bits = ty_bits(ty) as u8;
let ext = M::get_ext_mode(self.sig.call_conv, extension);
match (ext, from_bits) {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < word_bits =>
{
let signed = ext == ArgumentExtension::Sext;
ret.push(M::gen_extend(
Writable::from_reg(reg.to_reg()),
from_reg.to_reg(),
signed,
from_bits,
/* to_bits = */ word_bits,
));
}
_ => {
ret.push(M::gen_move(
Writable::from_reg(reg.to_reg()),
from_reg.to_reg(),
ty,
));
}
};
}
&ABIArgSlot::Stack {
offset,
ty,
extension,
..
} => {
let mut ty = ty;
let from_bits = ty_bits(ty) as u8;
// A machine ABI implementation should ensure that stack frames
// have "reasonable" size. All current ABIs for machinst
// backends (aarch64 and x64) enforce a 128MB limit.
let off = i32::try_from(offset).expect(
"Argument stack offset greater than 2GB; should hit impl limit first",
);
let ext = M::get_ext_mode(self.sig.call_conv, extension);
// Trash the from_reg; it should be its last use.
match (ext, from_bits) {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < word_bits =>
{
assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
let signed = ext == ArgumentExtension::Sext;
ret.push(M::gen_extend(
Writable::from_reg(from_reg.to_reg()),
from_reg.to_reg(),
signed,
from_bits,
/* to_bits = */ word_bits,
));
// Store the extended version.
ty = M::word_type();
}
_ => {}
};
ret.push(M::gen_store_base_offset(
self.ret_area_ptr.unwrap().to_reg(),
off,
from_reg.to_reg(),
ty,
));
}
}
_ => ret.extend(
gen_move_multi::<M>(dest_regs, non_writable_value_regs(from_regs), ty)
.into_iter(),
),
};
}
}
&ABIArg::Stack {
offset,
ty,
extension,
..
} => {
let mut ty = ty;
let from_bits = ty_bits(ty) as u8;
// A machine ABI implementation should ensure that stack frames
// have "reasonable" size. All current ABIs for machinst
// backends (aarch64 and x64) enforce a 128MB limit.
let off = i32::try_from(offset)
.expect("Argument stack offset greater than 2GB; should hit impl limit first");
let ext = M::get_ext_mode(self.sig.call_conv, extension);
// Trash the from_reg; it should be its last use.
match (ext, from_bits) {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < word_bits =>
{
let from_reg = from_regs
.only_reg()
.expect("extension only possible from one-reg value");
assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
let signed = ext == ArgumentExtension::Sext;
ret.push(M::gen_extend(
from_reg,
from_reg.to_reg(),
signed,
from_bits,
/* to_bits = */ word_bits,
));
// Store the extended version.
ty = M::word_type();
}
_ => {}
};
ret.extend(
gen_store_base_offset_multi::<M>(
self.ret_area_ptr.unwrap().to_reg(),
off,
non_writable_value_regs(from_regs),
ty,
)
.into_iter(),
);
&ABIArg::StructArg { .. } => {
panic!("StructArg in return position is unsupported");
}
&ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"),
}
ret
}
@@ -1345,20 +1389,30 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
// Compute uses: all arg regs.
let mut uses = Vec::new();
for arg in &sig.args {
match arg {
&ABIArg::Reg { regs, .. } => uses.extend(regs.regs().iter().map(|r| r.to_reg())),
_ => {}
if let &ABIArg::Slots { ref slots, .. } = arg {
for slot in slots {
match slot {
&ABIArgSlot::Reg { reg, .. } => {
uses.push(reg.to_reg());
}
_ => {}
}
}
}
}
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
for ret in &sig.rets {
match ret {
&ABIArg::Reg { regs, .. } => {
defs.extend(regs.regs().iter().map(|r| Writable::from_reg(r.to_reg())))
if let &ABIArg::Slots { ref slots, .. } = ret {
for slot in slots {
match slot {
&ABIArgSlot::Reg { reg, .. } => {
defs.push(Writable::from_reg(reg.to_reg()));
}
_ => {}
}
}
_ => {}
}
}
@@ -1406,7 +1460,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
flags: &settings::Flags,
) -> CodegenResult<ABICallerImpl<M>> {
let ir_sig = ensure_struct_return_ptr_is_returned(sig);
let sig = ABISig::from_func_sig::<M>(&ir_sig)?;
let sig = ABISig::from_func_sig::<M>(&ir_sig, flags)?;
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
Ok(ABICallerImpl {
ir_sig,
@@ -1431,7 +1485,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
flags: &settings::Flags,
) -> CodegenResult<ABICallerImpl<M>> {
let ir_sig = ensure_struct_return_ptr_is_returned(sig);
let sig = ABISig::from_func_sig::<M>(&ir_sig)?;
let sig = ABISig::from_func_sig::<M>(&ir_sig, flags)?;
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
Ok(ABICallerImpl {
ir_sig,
@@ -1501,75 +1555,73 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
let word_rc = M::word_reg_class();
let word_bits = M::word_bits() as usize;
match &self.sig.args[idx] {
&ABIArg::Reg {
regs,
ty,
extension,
..
} => {
let ext = M::get_ext_mode(self.sig.call_conv, extension);
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
let reg = regs.only_reg().unwrap();
assert_eq!(word_rc, reg.get_class());
let signed = match ext {
ir::ArgumentExtension::Uext => false,
ir::ArgumentExtension::Sext => true,
_ => unreachable!(),
};
ctx.emit(M::gen_extend(
Writable::from_reg(reg.to_reg()),
from_regs.only_reg().unwrap(),
signed,
ty_bits(ty) as u8,
word_bits as u8,
));
} else {
for insn in gen_move_multi::<M>(
writable_value_regs(regs.map(|r| r.to_reg())),
from_regs,
ty,
) {
ctx.emit(insn);
&ABIArg::Slots { ref slots, .. } => {
assert_eq!(from_regs.len(), slots.len());
for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
match slot {
&ABIArgSlot::Reg {
reg, ty, extension, ..
} => {
let ext = M::get_ext_mode(self.sig.call_conv, extension);
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
assert_eq!(word_rc, reg.get_class());
let signed = match ext {
ir::ArgumentExtension::Uext => false,
ir::ArgumentExtension::Sext => true,
_ => unreachable!(),
};
ctx.emit(M::gen_extend(
Writable::from_reg(reg.to_reg()),
*from_reg,
signed,
ty_bits(ty) as u8,
word_bits as u8,
));
} else {
ctx.emit(M::gen_move(
Writable::from_reg(reg.to_reg()),
*from_reg,
ty,
));
}
}
&ABIArgSlot::Stack {
offset,
ty,
extension,
..
} => {
let mut ty = ty;
let ext = M::get_ext_mode(self.sig.call_conv, extension);
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
assert_eq!(word_rc, from_reg.get_class());
let signed = match ext {
ir::ArgumentExtension::Uext => false,
ir::ArgumentExtension::Sext => true,
_ => unreachable!(),
};
// Extend in place in the source register. Our convention is to
// treat high bits as undefined for values in registers, so this
// is safe, even for an argument that is nominally read-only.
ctx.emit(M::gen_extend(
Writable::from_reg(*from_reg),
*from_reg,
signed,
ty_bits(ty) as u8,
word_bits as u8,
));
// Store the extended version.
ty = M::word_type();
}
ctx.emit(M::gen_store_stack(
StackAMode::SPOffset(offset, ty),
*from_reg,
ty,
));
}
}
}
}
&ABIArg::Stack {
offset,
ty,
extension,
..
} => {
let mut ty = ty;
let ext = M::get_ext_mode(self.sig.call_conv, extension);
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
let from_reg = from_regs
.only_reg()
.expect("only one reg for sub-word value width");
assert_eq!(word_rc, from_reg.get_class());
let signed = match ext {
ir::ArgumentExtension::Uext => false,
ir::ArgumentExtension::Sext => true,
_ => unreachable!(),
};
// Extend in place in the source register. Our convention is to
// treat high bits as undefined for values in registers, so this
// is safe, even for an argument that is nominally read-only.
ctx.emit(M::gen_extend(
Writable::from_reg(from_reg),
from_reg,
signed,
ty_bits(ty) as u8,
word_bits as u8,
));
// Store the extended version.
ty = M::word_type();
}
for insn in
gen_store_stack_multi::<M>(StackAMode::SPOffset(offset, ty), from_regs, ty)
{
ctx.emit(insn);
}
}
&ABIArg::StructArg { offset, size, .. } => {
let src_ptr = from_regs.only_reg().unwrap();
let dst_ptr = ctx.alloc_tmp(M::word_type()).only_reg().unwrap();
@@ -1618,24 +1670,29 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
into_regs: ValueRegs<Writable<Reg>>,
) {
match &self.sig.rets[idx] {
// Extension mode doesn't matter because we're copying out, not in,
// and we ignore high bits in our own registers by convention.
&ABIArg::Reg { regs, ty, .. } => {
for insn in gen_move_multi::<M>(into_regs, regs.map(|r| r.to_reg()), ty) {
ctx.emit(insn);
&ABIArg::Slots { ref slots, .. } => {
assert_eq!(into_regs.len(), slots.len());
for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
match slot {
// Extension mode doesn't matter because we're copying out, not in,
// and we ignore high bits in our own registers by convention.
&ABIArgSlot::Reg { reg, ty, .. } => {
ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty));
}
&ABIArgSlot::Stack { offset, ty, .. } => {
let ret_area_base = self.sig.stack_arg_space;
ctx.emit(M::gen_load_stack(
StackAMode::SPOffset(offset + ret_area_base, ty),
*into_reg,
ty,
));
}
}
}
}
&ABIArg::Stack { offset, ty, .. } => {
let ret_area_base = self.sig.stack_arg_space;
for insn in gen_load_stack_multi::<M>(
StackAMode::SPOffset(offset + ret_area_base, ty),
into_regs,
ty,
) {
ctx.emit(insn);
}
&ABIArg::StructArg { .. } => {
panic!("StructArg not supported in return position");
}
&ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"),
}
}

View File

@@ -3,6 +3,7 @@
use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs};
use crate::ir::Type;
use regalloc::{Reg, Writable};
use std::ops::{Add, BitAnd, Not, Sub};
/// Returns the size (in bits) of a given type.
pub fn ty_bits(ty: Type) -> usize {
@@ -26,3 +27,17 @@ pub(crate) fn get_output_reg<I: VCodeInst, C: LowerCtx<I = I>>(
) -> ValueRegs<Writable<Reg>> {
ctx.get_output(spec.insn, spec.output)
}
/// Align a size up to a power-of-two alignment.
pub(crate) fn align_to<N>(x: N, alignment: N) -> N
where
N: Not<Output = N>
+ BitAnd<N, Output = N>
+ Add<N, Output = N>
+ Sub<N, Output = N>
+ From<u8>
+ Copy,
{
let alignment_mask = alignment - 1.into();
(x + alignment_mask) & !alignment_mask
}

View File

@@ -175,11 +175,13 @@ impl<R: Clone + Copy + Debug + PartialEq + Eq + InvalidSentinel> ValueRegs<R> {
}
/// Create a writable ValueRegs.
#[allow(dead_code)]
pub(crate) fn writable_value_regs(regs: ValueRegs<Reg>) -> ValueRegs<Writable<Reg>> {
regs.map(|r| Writable::from_reg(r))
}
/// Strip a writable ValueRegs down to a readonly ValueRegs.
#[allow(dead_code)]
pub(crate) fn non_writable_value_regs(regs: ValueRegs<Writable<Reg>>) -> ValueRegs<Reg> {
regs.map(|r| r.to_reg())
}

View File

@@ -398,6 +398,7 @@ use_pinned_reg_as_heap_base = false
enable_simd = false
enable_atomics = true
enable_safepoints = false
enable_llvm_abi_extensions = false
emit_all_ones_funcaddrs = false
enable_probestack = true
probestack_func_adjusts_sp = false

View File

@@ -0,0 +1,299 @@
test compile
set enable_llvm_abi_extensions=true
target x86_64
feature "experimental_x64"
function %f0(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
return v0
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movq %rcx, %rax
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f1(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
return v1
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movq %rdx, %rax
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f2(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
return v2
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movq %r8, %rax
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f3(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
return v3
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movq %r9, %rax
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f4(i64, i64, f64, i64) -> f64 windows_fastcall {
block0(v0: i64, v1: i64, v2: f64, v3: i64):
return v2
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movaps %xmm2, %xmm0
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f5(i64, i64, f64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: f64, v3: i64):
return v3
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movq %r9, %rax
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f6(i64, i64, i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
return v5
}
;; This is truly odd (because of the regalloc ordering), but it works. Note
;; that we're spilling and using rsi, which is a callee-save in fastcall, because
;; the regalloc order is optimized for SysV. Also note that because we copy args
;; out of their input locations to separate vregs, we have a spurious load
;; from [rbp+48]. Ordinarily these moves are coalesced because the dest vreg
;; is allocated as a caller-save (volatile), but here again we allocate rsi
;; first and so have to spill it (and consequently don't coalesce).
;;
;; TODO(#2704): fix regalloc's register priority ordering!
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$16, %rsp
; nextln: movq %rsi, 0(%rsp)
; nextln: virtual_sp_offset_adjust 16
; nextln: movq 48(%rbp), %rsi
; nextln: movq 56(%rbp), %rsi
; nextln: movq %rsi, %rax
; nextln: movq 0(%rsp), %rsi
; nextln: addq $$16, %rsp
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f7(i128, i64, i128, i128) -> i128 windows_fastcall {
block0(v0: i128, v1: i64, v2: i128, v3: i128):
return v3
}
;; Again, terrible regalloc behavior. The important part is that `v3` comes
;; from [rbp+56] and [rbp+64], i.e., the second and third non-shadow
;; stack slot.
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$16, %rsp
; nextln: movq %rsi, 0(%rsp)
; nextln: movq %rdi, 8(%rsp)
; nextln: virtual_sp_offset_adjust 16
; nextln: movq 48(%rbp), %rsi
; nextln: movq 56(%rbp), %rsi
; nextln: movq 64(%rbp), %rdi
; nextln: movq %rsi, %rax
; nextln: movq %rdi, %rdx
; nextln: movq 0(%rsp), %rsi
; nextln: movq 8(%rsp), %rdi
; nextln: addq $$16, %rsp
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f8(i64) -> i64 windows_fastcall {
sig0 = (i64, i64, f64, f64, i64, i64) -> i64 windows_fastcall
fn0 = %g sig0
block0(v0: i64):
v1 = fcvt_from_sint.f64 v0
v2 = call fn0(v0, v0, v1, v1, v0, v0)
return v2
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$16, %rsp
; nextln: movq %rsi, 0(%rsp)
; nextln: virtual_sp_offset_adjust 16
; nextln: movq %rcx, %rsi
; nextln: cvtsi2sd %rsi, %xmm3
; nextln: subq $$48, %rsp
; nextln: virtual_sp_offset_adjust 48
; nextln: movq %rsi, %rcx
; nextln: movq %rsi, %rdx
; nextln: movaps %xmm3, %xmm2
; nextln: movq %rsi, 32(%rsp)
; nextln: movq %rsi, 40(%rsp)
; nextln: load_ext_name %g+0, %rsi
; nextln: call *%rsi
; nextln: addq $$48, %rsp
; nextln: virtual_sp_offset_adjust -48
; nextln: movq 0(%rsp), %rsi
; nextln: addq $$16, %rsp
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f9(i64) -> f64 windows_fastcall {
block0(v0: i64):
v1 = load.f64 v0+0
v2 = load.f64 v0+8
v3 = load.f64 v0+16
v4 = load.f64 v0+24
v5 = load.f64 v0+32
v6 = load.f64 v0+40
v7 = load.f64 v0+48
v8 = load.f64 v0+56
v9 = load.f64 v0+64
v10 = load.f64 v0+72
v11 = load.f64 v0+80
v12 = load.f64 v0+88
v13 = load.f64 v0+96
v14 = load.f64 v0+104
v15 = load.f64 v0+112
v16 = load.f64 v0+120
v17 = load.f64 v0+128
v18 = load.f64 v0+136
v19 = load.f64 v0+144
v20 = load.f64 v0+152
v21 = fadd.f64 v1, v2
v22 = fadd.f64 v3, v4
v23 = fadd.f64 v5, v6
v24 = fadd.f64 v7, v8
v25 = fadd.f64 v9, v10
v26 = fadd.f64 v11, v12
v27 = fadd.f64 v13, v14
v28 = fadd.f64 v15, v16
v29 = fadd.f64 v17, v18
v30 = fadd.f64 v19, v20
v31 = fadd.f64 v21, v22
v32 = fadd.f64 v23, v24
v33 = fadd.f64 v25, v26
v34 = fadd.f64 v27, v28
v35 = fadd.f64 v29, v30
v36 = fadd.f64 v31, v32
v37 = fadd.f64 v33, v34
v38 = fadd.f64 v36, v37
v39 = fadd.f64 v38, v35
return v39
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$208, %rsp
; nextln: movdqu %xmm6, 0(%rsp)
; nextln: movdqu %xmm7, 16(%rsp)
; nextln: movdqu %xmm8, 32(%rsp)
; nextln: movdqu %xmm9, 48(%rsp)
; nextln: movdqu %xmm10, 64(%rsp)
; nextln: movdqu %xmm11, 80(%rsp)
; nextln: movdqu %xmm12, 96(%rsp)
; nextln: movdqu %xmm13, 112(%rsp)
; nextln: movdqu %xmm14, 128(%rsp)
; nextln: movdqu %xmm15, 144(%rsp)
; nextln: virtual_sp_offset_adjust 160
; nextln: movsd 0(%rcx), %xmm0
; nextln: movsd %xmm0, rsp(16 + virtual offset)
; nextln: movsd 8(%rcx), %xmm1
; nextln: movsd 16(%rcx), %xmm0
; nextln: movsd %xmm0, rsp(24 + virtual offset)
; nextln: movsd 24(%rcx), %xmm3
; nextln: movsd 32(%rcx), %xmm0
; nextln: movsd %xmm0, rsp(32 + virtual offset)
; nextln: movsd 40(%rcx), %xmm5
; nextln: movsd 48(%rcx), %xmm6
; nextln: movsd 56(%rcx), %xmm7
; nextln: movsd 64(%rcx), %xmm8
; nextln: movsd 72(%rcx), %xmm9
; nextln: movsd 80(%rcx), %xmm10
; nextln: movsd 88(%rcx), %xmm11
; nextln: movsd 96(%rcx), %xmm12
; nextln: movsd 104(%rcx), %xmm13
; nextln: movsd 112(%rcx), %xmm14
; nextln: movsd 120(%rcx), %xmm15
; nextln: movsd 128(%rcx), %xmm0
; nextln: movsd %xmm0, rsp(0 + virtual offset)
; nextln: movsd 136(%rcx), %xmm0
; nextln: movsd 144(%rcx), %xmm2
; nextln: movsd %xmm2, rsp(8 + virtual offset)
; nextln: movsd 152(%rcx), %xmm2
; nextln: nop len=0
; nextln: movsd rsp(16 + virtual offset), %xmm4
; nextln: addsd %xmm1, %xmm4
; nextln: movsd %xmm4, rsp(16 + virtual offset)
; nextln: movsd rsp(24 + virtual offset), %xmm1
; nextln: addsd %xmm3, %xmm1
; nextln: movsd rsp(32 + virtual offset), %xmm4
; nextln: addsd %xmm5, %xmm4
; nextln: addsd %xmm7, %xmm6
; nextln: addsd %xmm9, %xmm8
; nextln: addsd %xmm11, %xmm10
; nextln: addsd %xmm13, %xmm12
; nextln: addsd %xmm15, %xmm14
; nextln: movsd rsp(0 + virtual offset), %xmm3
; nextln: addsd %xmm0, %xmm3
; nextln: movsd rsp(8 + virtual offset), %xmm0
; nextln: addsd %xmm2, %xmm0
; nextln: movsd rsp(16 + virtual offset), %xmm2
; nextln: addsd %xmm1, %xmm2
; nextln: addsd %xmm6, %xmm4
; nextln: addsd %xmm10, %xmm8
; nextln: addsd %xmm14, %xmm12
; nextln: addsd %xmm0, %xmm3
; nextln: addsd %xmm4, %xmm2
; nextln: addsd %xmm12, %xmm8
; nextln: addsd %xmm8, %xmm2
; nextln: addsd %xmm3, %xmm2
; nextln: movaps %xmm2, %xmm0
; nextln: movdqu 0(%rsp), %xmm6
; nextln: movdqu 16(%rsp), %xmm7
; nextln: movdqu 32(%rsp), %xmm8
; nextln: movdqu 48(%rsp), %xmm9
; nextln: movdqu 64(%rsp), %xmm10
; nextln: movdqu 80(%rsp), %xmm11
; nextln: movdqu 96(%rsp), %xmm12
; nextln: movdqu 112(%rsp), %xmm13
; nextln: movdqu 128(%rsp), %xmm14
; nextln: movdqu 144(%rsp), %xmm15
; nextln: addq $$160, %rsp
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret

View File

@@ -1,4 +1,5 @@
test compile
set enable_llvm_abi_extensions=true
target x86_64
feature "experimental_x64"
@@ -941,17 +942,17 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128):
v11 = iadd.i128 v9, v10
return v11
; check: movq %rsp, %rbp
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$16, %rsp
; nextln: movq %r12, 0(%rsp)
; nextln: movq %r13, 8(%rsp)
; nextln: virtual_sp_offset_adjust 16
; nextln: movq 16(%rbp), %r9
; nextln: movq 24(%rbp), %r10
; nextln: movq 32(%rbp), %r12
; nextln: movq 40(%rbp), %r11
; nextln: movq 48(%rbp), %rax
; nextln: movq 56(%rbp), %r13
; nextln: movq 16(%rbp), %r10
; nextln: movq 24(%rbp), %r12
; nextln: movq 32(%rbp), %r11
; nextln: movq 40(%rbp), %rax
; nextln: movq 48(%rbp), %r13
; nextln: addq %rdx, %rdi
; nextln: adcq %rcx, %rsi
; nextln: xorq %rcx, %rcx
@@ -989,10 +990,10 @@ block0(v0: i128):
; nextln: movq %r10, 16(%rsi)
; nextln: movq %r11, 24(%rsi)
; nextln: movq %r12, 32(%rsi)
; nextln: movq %r13, 48(%rsi)
; nextln: movq %r14, 56(%rsi)
; nextln: movq %rdi, 64(%rsi)
; nextln: movq %rbx, 72(%rsi)
; nextln: movq %r13, 40(%rsi)
; nextln: movq %r14, 48(%rsi)
; nextln: movq %rdi, 56(%rsi)
; nextln: movq %rbx, 64(%rsi)
}

View File

@@ -1,4 +1,5 @@
test compile
set enable_llvm_abi_extensions=true
target x86_64
feature "experimental_x64"