Merge pull request #2678 from cfallin/x64-fastcall

x86-64 Windows fastcall ABI support.
This commit is contained in:
Chris Fallin
2021-03-05 10:46:47 -08:00
committed by GitHub
13 changed files with 997 additions and 475 deletions

View File

@@ -216,6 +216,25 @@ pub(crate) fn define() -> SettingGroup {
0, 0,
); );
settings.add_bool(
"enable_llvm_abi_extensions",
r#"
Enable various ABI extensions defined by LLVM's behavior.
In some cases, LLVM's implementation of an ABI (calling convention)
goes beyond a standard and supports additional argument types or
behavior. This option instructs Cranelift codegen to follow LLVM's
behavior where applicable.
Currently, this applies only to Windows Fastcall on x86-64, and
allows an `i128` argument to be spread across two 64-bit integer
registers. The Fastcall implementation otherwise does not support
`i128` arguments, and will panic if they are present and this
option is not set.
"#,
false,
);
// BaldrMonkey requires that not-yet-relocated function addresses be encoded // BaldrMonkey requires that not-yet-relocated function addresses be encoded
// as all-ones bitpatterns. // as all-ones bitpatterns.
settings.add_bool( settings.add_bool(

View File

@@ -78,41 +78,41 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
match &param.purpose { match &param.purpose {
&ir::ArgumentPurpose::VMContext => { &ir::ArgumentPurpose::VMContext => {
// This is SpiderMonkey's `WasmTlsReg`. // This is SpiderMonkey's `WasmTlsReg`.
Some(ABIArg::Reg { Some(ABIArg::reg(
regs: ValueRegs::one(xreg(BALDRDASH_TLS_REG).to_real_reg()), xreg(BALDRDASH_TLS_REG).to_real_reg(),
ty: ir::types::I64, ir::types::I64,
extension: param.extension, param.extension,
purpose: param.purpose, param.purpose,
}) ))
} }
&ir::ArgumentPurpose::SignatureId => { &ir::ArgumentPurpose::SignatureId => {
// This is SpiderMonkey's `WasmTableCallSigReg`. // This is SpiderMonkey's `WasmTableCallSigReg`.
Some(ABIArg::Reg { Some(ABIArg::reg(
regs: ValueRegs::one(xreg(BALDRDASH_SIG_REG).to_real_reg()), xreg(BALDRDASH_SIG_REG).to_real_reg(),
ty: ir::types::I64, ir::types::I64,
extension: param.extension, param.extension,
purpose: param.purpose, param.purpose,
}) ))
} }
&ir::ArgumentPurpose::CalleeTLS => { &ir::ArgumentPurpose::CalleeTLS => {
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020. // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020); assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack { Some(ABIArg::stack(
offset: BALDRDASH_CALLEE_TLS_OFFSET, BALDRDASH_CALLEE_TLS_OFFSET,
ty: ir::types::I64, ir::types::I64,
extension: ir::ArgumentExtension::None, ir::ArgumentExtension::None,
purpose: param.purpose, param.purpose,
}) ))
} }
&ir::ArgumentPurpose::CallerTLS => { &ir::ArgumentPurpose::CallerTLS => {
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020. // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020); assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack { Some(ABIArg::stack(
offset: BALDRDASH_CALLER_TLS_OFFSET, BALDRDASH_CALLER_TLS_OFFSET,
ty: ir::types::I64, ir::types::I64,
extension: ir::ArgumentExtension::None, ir::ArgumentExtension::None,
purpose: param.purpose, param.purpose,
}) ))
} }
_ => None, _ => None,
} }
@@ -161,6 +161,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
fn compute_arg_locs( fn compute_arg_locs(
call_conv: isa::CallConv, call_conv: isa::CallConv,
_flags: &settings::Flags,
params: &[ir::AbiParam], params: &[ir::AbiParam],
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
@@ -253,12 +254,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
RegClass::V128 => vreg(*next_reg), RegClass::V128 => vreg(*next_reg),
_ => unreachable!(), _ => unreachable!(),
}; };
ret.push(ABIArg::Reg { ret.push(ABIArg::reg(
regs: ValueRegs::one(reg.to_real_reg()), reg.to_real_reg(),
ty: param.value_type, param.value_type,
extension: param.extension, param.extension,
purpose: param.purpose, param.purpose,
}); ));
*next_reg += 1; *next_reg += 1;
remaining_reg_vals -= 1; remaining_reg_vals -= 1;
} else { } else {
@@ -268,13 +269,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
let size = std::cmp::max(size, 8); let size = std::cmp::max(size, 8);
// Align. // Align.
debug_assert!(size.is_power_of_two()); debug_assert!(size.is_power_of_two());
next_stack = (next_stack + size - 1) & !(size - 1); next_stack = align_to(next_stack, size);
ret.push(ABIArg::Stack { ret.push(ABIArg::stack(
offset: next_stack as i64, next_stack as i64,
ty: param.value_type, param.value_type,
extension: param.extension, param.extension,
purpose: param.purpose, param.purpose,
}); ));
next_stack += size; next_stack += size;
} }
} }
@@ -286,19 +287,19 @@ impl ABIMachineSpec for AArch64MachineDeps {
let extra_arg = if add_ret_area_ptr { let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args); debug_assert!(args_or_rets == ArgsOrRets::Args);
if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 { if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
ret.push(ABIArg::Reg { ret.push(ABIArg::reg(
regs: ValueRegs::one(xreg(next_xreg).to_real_reg()), xreg(next_xreg).to_real_reg(),
ty: I64, I64,
extension: ir::ArgumentExtension::None, ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal, ir::ArgumentPurpose::Normal,
}); ));
} else { } else {
ret.push(ABIArg::Stack { ret.push(ABIArg::stack(
offset: next_stack as i64, next_stack as i64,
ty: I64, I64,
extension: ir::ArgumentExtension::None, ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal, ir::ArgumentPurpose::Normal,
}); ));
next_stack += 8; next_stack += 8;
} }
Some(ret.len() - 1) Some(ret.len() - 1)
@@ -306,7 +307,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
None None
}; };
next_stack = (next_stack + 15) & !15; next_stack = align_to(next_stack, 16);
// To avoid overflow issues, limit the arg/return size to something // To avoid overflow issues, limit the arg/return size to something
// reasonable -- here, 128 MB. // reasonable -- here, 128 MB.

View File

@@ -51,6 +51,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
fn compute_arg_locs( fn compute_arg_locs(
_call_conv: isa::CallConv, _call_conv: isa::CallConv,
_flags: &settings::Flags,
params: &[ir::AbiParam], params: &[ir::AbiParam],
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
@@ -81,12 +82,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
if next_rreg < max_reg_val { if next_rreg < max_reg_val {
let reg = rreg(next_rreg); let reg = rreg(next_rreg);
ret.push(ABIArg::Reg { ret.push(ABIArg::reg(
regs: ValueRegs::one(reg.to_real_reg()), reg.to_real_reg(),
ty: param.value_type, param.value_type,
extension: param.extension, param.extension,
purpose: param.purpose, param.purpose,
}); ));
next_rreg += 1; next_rreg += 1;
} else { } else {
// Arguments are stored on stack in reversed order. // Arguments are stored on stack in reversed order.
@@ -101,12 +102,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
let extra_arg = if add_ret_area_ptr { let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args); debug_assert!(args_or_rets == ArgsOrRets::Args);
if next_rreg < max_reg_val { if next_rreg < max_reg_val {
ret.push(ABIArg::Reg { ret.push(ABIArg::reg(
regs: ValueRegs::one(rreg(next_rreg).to_real_reg()), rreg(next_rreg).to_real_reg(),
ty: I32, I32,
extension: ir::ArgumentExtension::None, ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal, ir::ArgumentPurpose::Normal,
}); ));
} else { } else {
stack_args.push(( stack_args.push((
I32, I32,
@@ -124,12 +125,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
let max_stack = next_stack; let max_stack = next_stack;
for (ty, ext, purpose) in stack_args.into_iter().rev() { for (ty, ext, purpose) in stack_args.into_iter().rev() {
next_stack -= 4; next_stack -= 4;
ret.push(ABIArg::Stack { ret.push(ABIArg::stack(
offset: (max_stack - next_stack) as i64, (max_stack - next_stack) as i64,
ty, ty,
extension: ext, ext,
purpose, purpose,
}); ));
} }
assert_eq!(next_stack, 0); assert_eq!(next_stack, 0);

View File

@@ -31,41 +31,41 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
match &param.purpose { match &param.purpose {
&ir::ArgumentPurpose::VMContext => { &ir::ArgumentPurpose::VMContext => {
// This is SpiderMonkey's `WasmTlsReg`. // This is SpiderMonkey's `WasmTlsReg`.
Some(ABIArg::Reg { Some(ABIArg::reg(
regs: ValueRegs::one(regs::r14().to_real_reg()), regs::r14().to_real_reg(),
ty: types::I64, types::I64,
extension: param.extension, param.extension,
purpose: param.purpose, param.purpose,
}) ))
} }
&ir::ArgumentPurpose::SignatureId => { &ir::ArgumentPurpose::SignatureId => {
// This is SpiderMonkey's `WasmTableCallSigReg`. // This is SpiderMonkey's `WasmTableCallSigReg`.
Some(ABIArg::Reg { Some(ABIArg::reg(
regs: ValueRegs::one(regs::r10().to_real_reg()), regs::r10().to_real_reg(),
ty: types::I64, types::I64,
extension: param.extension, param.extension,
purpose: param.purpose, param.purpose,
}) ))
} }
&ir::ArgumentPurpose::CalleeTLS => { &ir::ArgumentPurpose::CalleeTLS => {
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020. // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020); assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack { Some(ABIArg::stack(
offset: BALDRDASH_CALLEE_TLS_OFFSET, BALDRDASH_CALLEE_TLS_OFFSET,
ty: ir::types::I64, ir::types::I64,
extension: ir::ArgumentExtension::None, ir::ArgumentExtension::None,
purpose: param.purpose, param.purpose,
}) ))
} }
&ir::ArgumentPurpose::CallerTLS => { &ir::ArgumentPurpose::CallerTLS => {
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020. // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020); assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack { Some(ABIArg::stack(
offset: BALDRDASH_CALLER_TLS_OFFSET, BALDRDASH_CALLER_TLS_OFFSET,
ty: ir::types::I64, ir::types::I64,
extension: ir::ArgumentExtension::None, ir::ArgumentExtension::None,
purpose: param.purpose, param.purpose,
}) ))
} }
_ => None, _ => None,
} }
@@ -97,18 +97,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
fn compute_arg_locs( fn compute_arg_locs(
call_conv: isa::CallConv, call_conv: isa::CallConv,
flags: &settings::Flags,
params: &[ir::AbiParam], params: &[ir::AbiParam],
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> { ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
let is_baldrdash = call_conv.extends_baldrdash(); let is_baldrdash = call_conv.extends_baldrdash();
let is_fastcall = call_conv.extends_windows_fastcall();
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020; let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
let mut next_gpr = 0; let mut next_gpr = 0;
let mut next_vreg = 0; let mut next_vreg = 0;
let mut next_stack: u64 = 0; let mut next_stack: u64 = 0;
let mut next_param_idx = 0; // Fastcall cares about overall param index
let mut ret = vec![]; let mut ret = vec![];
if args_or_rets == ArgsOrRets::Args && is_fastcall {
// Fastcall always reserves 32 bytes of shadow space corresponding to
// the four initial in-arg parameters.
//
// (See:
// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160)
next_stack = 32;
}
if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls { if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
// Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and // Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
// caller TLS-register values, respectively. // caller TLS-register values, respectively.
@@ -159,72 +171,92 @@ impl ABIMachineSpec for X64ABIMachineSpec {
} }
// Find regclass(es) of the register(s) used to store a value of this type. // Find regclass(es) of the register(s) used to store a value of this type.
let (rcs, _) = Inst::rc_for_type(param.value_type)?; let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
let intreg = rcs[0] == RegClass::I64;
let num_regs = rcs.len(); // Now assign ABIArgSlots for each register-sized part.
assert!(num_regs <= 2); //
if num_regs == 2 { // Note that the handling of `i128` values is unique here:
assert_eq!(rcs[0], rcs[1]); //
// - If `enable_llvm_abi_extensions` is set in the flags, each
// `i128` is split into two `i64`s and assigned exactly as if it
// were two consecutive 64-bit args. This is consistent with LLVM's
// behavior, and is needed for some uses of Cranelift (e.g., the
// rustc backend).
//
// - Otherwise, both SysV and Fastcall specify behavior (use of
// vector register, a register pair, or passing by reference
// depending on the case), but for simplicity, we will just panic if
// an i128 type appears in a signature and the LLVM extensions flag
// is not set.
//
// For examples of how rustc compiles i128 args and return values on
// both SysV and Fastcall platforms, see:
// https://godbolt.org/z/PhG3ob
if param.value_type.bits() > 64
&& !param.value_type.is_vector()
&& !flags.enable_llvm_abi_extensions()
{
panic!(
"i128 args/return values not supported unless LLVM ABI extensions are enabled"
);
} }
let mut regs: SmallVec<[RealReg; 2]> = smallvec![]; let mut slots = vec![];
for j in 0..num_regs { for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
let intreg = *rc == RegClass::I64;
let nextreg = if intreg { let nextreg = if intreg {
match args_or_rets { match args_or_rets {
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr + j), ArgsOrRets::Args => {
get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
}
ArgsOrRets::Rets => { ArgsOrRets::Rets => {
get_intreg_for_retval_systemv(&call_conv, next_gpr + j, i + j) get_intreg_for_retval(&call_conv, next_gpr, next_param_idx)
} }
} }
} else { } else {
match args_or_rets { match args_or_rets {
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg + j), ArgsOrRets::Args => {
get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
}
ArgsOrRets::Rets => { ArgsOrRets::Rets => {
get_fltreg_for_retval_systemv(&call_conv, next_vreg + j, i + j) get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx)
} }
} }
}; };
next_param_idx += 1;
if let Some(reg) = nextreg { if let Some(reg) = nextreg {
regs.push(reg.to_real_reg()); if intreg {
next_gpr += 1;
} else {
next_vreg += 1;
}
slots.push(ABIArgSlot::Reg {
reg: reg.to_real_reg(),
ty: *reg_ty,
extension: param.extension,
});
} else { } else {
regs.clear(); // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
break; // stack alignment happens separately after all args.)
let size = (reg_ty.bits() / 8) as u64;
let size = std::cmp::max(size, 8);
// Align.
debug_assert!(size.is_power_of_two());
next_stack = align_to(next_stack, size);
slots.push(ABIArgSlot::Stack {
offset: next_stack as i64,
ty: *reg_ty,
extension: param.extension,
});
next_stack += size;
} }
} }
if regs.len() > 0 { ret.push(ABIArg::Slots {
let regs = match num_regs { slots,
1 => ValueRegs::one(regs[0]), purpose: param.purpose,
2 => ValueRegs::two(regs[0], regs[1]), });
_ => panic!("More than two registers unexpected"),
};
ret.push(ABIArg::Reg {
regs,
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
if intreg {
next_gpr += num_regs;
} else {
next_vreg += num_regs;
}
} else {
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
// stack alignment happens separately after all args.)
let size = (param.value_type.bits() / 8) as u64;
let size = std::cmp::max(size, 8);
// Align.
debug_assert!(size.is_power_of_two());
next_stack = (next_stack + size - 1) & !(size - 1);
ret.push(ABIArg::Stack {
offset: next_stack as i64,
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
next_stack += size;
}
} }
if args_or_rets == ArgsOrRets::Rets && is_baldrdash { if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
@@ -233,20 +265,20 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let extra_arg = if add_ret_area_ptr { let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args); debug_assert!(args_or_rets == ArgsOrRets::Args);
if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) { if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
ret.push(ABIArg::Reg { ret.push(ABIArg::reg(
regs: ValueRegs::one(reg.to_real_reg()), reg.to_real_reg(),
ty: types::I64, types::I64,
extension: ir::ArgumentExtension::None, ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal, ir::ArgumentPurpose::Normal,
}); ));
} else { } else {
ret.push(ABIArg::Stack { ret.push(ABIArg::stack(
offset: next_stack as i64, next_stack as i64,
ty: types::I64, types::I64,
extension: ir::ArgumentExtension::None, ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal, ir::ArgumentPurpose::Normal,
}); ));
next_stack += 8; next_stack += 8;
} }
Some(ret.len() - 1) Some(ret.len() - 1)
@@ -254,7 +286,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
None None
}; };
next_stack = (next_stack + 15) & !15; next_stack = align_to(next_stack, 16);
// To avoid overflow issues, limit the arg/return size to something reasonable. // To avoid overflow issues, limit the arg/return size to something reasonable.
if next_stack > STACK_ARG_RET_SIZE_LIMIT { if next_stack > STACK_ARG_RET_SIZE_LIMIT {
@@ -452,10 +484,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
// registers (all XMM registers are caller-save) so we can compute the // registers (all XMM registers are caller-save) so we can compute the
// total size of the needed stack space easily. // total size of the needed stack space easily.
let clobbered = get_callee_saves(&call_conv, clobbers); let clobbered = get_callee_saves(&call_conv, clobbers);
let clobbered_size = 8 * clobbered.len() as u32; let stack_size = compute_clobber_size(&clobbered) + fixed_frame_storage_size;
let stack_size = clobbered_size + fixed_frame_storage_size;
// Align to 16 bytes. // Align to 16 bytes.
let stack_size = (stack_size + 15) & !15; let stack_size = align_to(stack_size, 16);
let clobbered_size = stack_size - fixed_frame_storage_size; let clobbered_size = stack_size - fixed_frame_storage_size;
// Adjust the stack pointer downward with one `sub rsp, IMM` // Adjust the stack pointer downward with one `sub rsp, IMM`
// instruction. // instruction.
@@ -473,16 +504,23 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let r_reg = reg.to_reg(); let r_reg = reg.to_reg();
match r_reg.get_class() { match r_reg.get_class() {
RegClass::I64 => { RegClass::I64 => {
insts.push(Inst::mov_r_m( insts.push(Inst::store(
OperandSize::Size64, types::I64,
r_reg.to_reg(), r_reg.to_reg(),
Amode::imm_reg(cur_offset, regs::rsp()), Amode::imm_reg(cur_offset, regs::rsp()),
)); ));
cur_offset += 8; cur_offset += 8;
} }
// No XMM regs are callee-save, so we do not need to implement RegClass::V128 => {
// this. cur_offset = align_to(cur_offset, 16);
_ => unimplemented!(), insts.push(Inst::store(
types::I8X16,
r_reg.to_reg(),
Amode::imm_reg(cur_offset, regs::rsp()),
));
cur_offset += 16;
}
_ => unreachable!(),
} }
} }
@@ -499,8 +537,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let mut insts = SmallVec::new(); let mut insts = SmallVec::new();
let clobbered = get_callee_saves(&call_conv, clobbers); let clobbered = get_callee_saves(&call_conv, clobbers);
let stack_size = 8 * clobbered.len() as u32; let stack_size = compute_clobber_size(&clobbered);
let stack_size = (stack_size + 15) & !15; let stack_size = align_to(stack_size, 16);
// Restore regs by loading from offsets of RSP. // Restore regs by loading from offsets of RSP.
let mut cur_offset = 0; let mut cur_offset = 0;
@@ -514,7 +552,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
)); ));
cur_offset += 8; cur_offset += 8;
} }
_ => unimplemented!(), RegClass::V128 => {
cur_offset = align_to(cur_offset, 16);
insts.push(Inst::load(
types::I8X16,
Amode::imm_reg(cur_offset, regs::rsp()),
Writable::from_reg(rreg.to_reg()),
ExtKind::None,
));
cur_offset += 16;
}
_ => unreachable!(),
} }
} }
// Adjust RSP back upward. // Adjust RSP back upward.
@@ -592,14 +640,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
// Baldrdash should not use struct args. // Baldrdash should not use struct args.
assert!(!call_conv.extends_baldrdash()); assert!(!call_conv.extends_baldrdash());
let mut insts = SmallVec::new(); let mut insts = SmallVec::new();
let arg0 = get_intreg_for_arg_systemv(&call_conv, 0).unwrap(); let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap();
let arg1 = get_intreg_for_arg_systemv(&call_conv, 1).unwrap(); let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap();
let arg2 = get_intreg_for_arg_systemv(&call_conv, 2).unwrap(); let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap();
// We need a register to load the address of `memcpy()` below and we // We need a register to load the address of `memcpy()` below and we
// don't have a lowering context to allocate a temp here; so just use a // don't have a lowering context to allocate a temp here; so just use a
// register we know we are free to mutate as part of this sequence // register we know we are free to mutate as part of this sequence
// (because it is clobbered by the call as per the ABI anyway). // (because it is clobbered by the call as per the ABI anyway).
let memcpy_addr = get_intreg_for_arg_systemv(&call_conv, 3).unwrap(); let memcpy_addr = get_intreg_for_arg(&call_conv, 3, 3).unwrap();
insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64)); insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64));
insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64)); insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64));
insts.extend( insts.extend(
@@ -648,10 +696,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> { fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
let mut caller_saved = vec![ let mut caller_saved = vec![
// Systemv calling convention: // intersection of Systemv and FastCall calling conventions:
// - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved). // - GPR: all except RDI, RSI, RBX, RBP, R12 to R15.
Writable::from_reg(regs::rsi()), // SysV adds RDI, RSI (FastCall makes these callee-saved).
Writable::from_reg(regs::rdi()),
Writable::from_reg(regs::rax()), Writable::from_reg(regs::rax()),
Writable::from_reg(regs::rcx()), Writable::from_reg(regs::rcx()),
Writable::from_reg(regs::rdx()), Writable::from_reg(regs::rdx()),
@@ -659,25 +706,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
Writable::from_reg(regs::r9()), Writable::from_reg(regs::r9()),
Writable::from_reg(regs::r10()), Writable::from_reg(regs::r10()),
Writable::from_reg(regs::r11()), Writable::from_reg(regs::r11()),
// - XMM: all the registers! // - XMM: XMM0-5. SysV adds the rest (XMM6-XMM15).
Writable::from_reg(regs::xmm0()), Writable::from_reg(regs::xmm0()),
Writable::from_reg(regs::xmm1()), Writable::from_reg(regs::xmm1()),
Writable::from_reg(regs::xmm2()), Writable::from_reg(regs::xmm2()),
Writable::from_reg(regs::xmm3()), Writable::from_reg(regs::xmm3()),
Writable::from_reg(regs::xmm4()), Writable::from_reg(regs::xmm4()),
Writable::from_reg(regs::xmm5()), Writable::from_reg(regs::xmm5()),
Writable::from_reg(regs::xmm6()),
Writable::from_reg(regs::xmm7()),
Writable::from_reg(regs::xmm8()),
Writable::from_reg(regs::xmm9()),
Writable::from_reg(regs::xmm10()),
Writable::from_reg(regs::xmm11()),
Writable::from_reg(regs::xmm12()),
Writable::from_reg(regs::xmm13()),
Writable::from_reg(regs::xmm14()),
Writable::from_reg(regs::xmm15()),
]; ];
if !call_conv_of_callee.extends_windows_fastcall() {
caller_saved.push(Writable::from_reg(regs::rsi()));
caller_saved.push(Writable::from_reg(regs::rdi()));
caller_saved.push(Writable::from_reg(regs::xmm6()));
caller_saved.push(Writable::from_reg(regs::xmm7()));
caller_saved.push(Writable::from_reg(regs::xmm8()));
caller_saved.push(Writable::from_reg(regs::xmm9()));
caller_saved.push(Writable::from_reg(regs::xmm10()));
caller_saved.push(Writable::from_reg(regs::xmm11()));
caller_saved.push(Writable::from_reg(regs::xmm12()));
caller_saved.push(Writable::from_reg(regs::xmm13()));
caller_saved.push(Writable::from_reg(regs::xmm14()));
caller_saved.push(Writable::from_reg(regs::xmm15()));
}
if call_conv_of_callee.extends_baldrdash() { if call_conv_of_callee.extends_baldrdash() {
caller_saved.push(Writable::from_reg(regs::r12())); caller_saved.push(Writable::from_reg(regs::r12()));
caller_saved.push(Writable::from_reg(regs::r13())); caller_saved.push(Writable::from_reg(regs::r13()));
@@ -739,49 +791,67 @@ impl From<StackAMode> for SyntheticAmode {
} }
} }
fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> { fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
match call_conv { let is_fastcall = match call_conv {
CallConv::Fast CallConv::Fast
| CallConv::Cold | CallConv::Cold
| CallConv::SystemV | CallConv::SystemV
| CallConv::BaldrdashSystemV | CallConv::BaldrdashSystemV
| CallConv::Baldrdash2020 => {} | CallConv::Baldrdash2020 => false,
_ => panic!("int args only supported for SysV calling convention"), CallConv::WindowsFastcall => true,
_ => panic!("int args only supported for SysV or Fastcall calling convention"),
}; };
match idx {
0 => Some(regs::rdi()), // Fastcall counts by absolute argument number; SysV counts by argument of
1 => Some(regs::rsi()), // this (integer) class.
2 => Some(regs::rdx()), let i = if is_fastcall { arg_idx } else { idx };
3 => Some(regs::rcx()), match (i, is_fastcall) {
4 => Some(regs::r8()), (0, false) => Some(regs::rdi()),
5 => Some(regs::r9()), (1, false) => Some(regs::rsi()),
(2, false) => Some(regs::rdx()),
(3, false) => Some(regs::rcx()),
(4, false) => Some(regs::r8()),
(5, false) => Some(regs::r9()),
(0, true) => Some(regs::rcx()),
(1, true) => Some(regs::rdx()),
(2, true) => Some(regs::r8()),
(3, true) => Some(regs::r9()),
_ => None, _ => None,
} }
} }
fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> { fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
match call_conv { let is_fastcall = match call_conv {
CallConv::Fast CallConv::Fast
| CallConv::Cold | CallConv::Cold
| CallConv::SystemV | CallConv::SystemV
| CallConv::BaldrdashSystemV | CallConv::BaldrdashSystemV
| CallConv::Baldrdash2020 => {} | CallConv::Baldrdash2020 => false,
_ => panic!("float args only supported for SysV calling convention"), CallConv::WindowsFastcall => true,
_ => panic!("float args only supported for SysV or Fastcall calling convention"),
}; };
match idx {
0 => Some(regs::xmm0()), // Fastcall counts by absolute argument number; SysV counts by argument of
1 => Some(regs::xmm1()), // this (floating-point) class.
2 => Some(regs::xmm2()), let i = if is_fastcall { arg_idx } else { idx };
3 => Some(regs::xmm3()), match (i, is_fastcall) {
4 => Some(regs::xmm4()), (0, false) => Some(regs::xmm0()),
5 => Some(regs::xmm5()), (1, false) => Some(regs::xmm1()),
6 => Some(regs::xmm6()), (2, false) => Some(regs::xmm2()),
7 => Some(regs::xmm7()), (3, false) => Some(regs::xmm3()),
(4, false) => Some(regs::xmm4()),
(5, false) => Some(regs::xmm5()),
(6, false) => Some(regs::xmm6()),
(7, false) => Some(regs::xmm7()),
(0, true) => Some(regs::xmm0()),
(1, true) => Some(regs::xmm1()),
(2, true) => Some(regs::xmm2()),
(3, true) => Some(regs::xmm3()),
_ => None, _ => None,
} }
} }
fn get_intreg_for_retval_systemv( fn get_intreg_for_retval(
call_conv: &CallConv, call_conv: &CallConv,
intreg_idx: usize, intreg_idx: usize,
retval_idx: usize, retval_idx: usize,
@@ -799,11 +869,16 @@ fn get_intreg_for_retval_systemv(
None None
} }
} }
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), CallConv::WindowsFastcall => match intreg_idx {
0 => Some(regs::rax()),
1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
_ => None,
},
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
} }
} }
fn get_fltreg_for_retval_systemv( fn get_fltreg_for_retval(
call_conv: &CallConv, call_conv: &CallConv,
fltreg_idx: usize, fltreg_idx: usize,
retval_idx: usize, retval_idx: usize,
@@ -821,7 +896,11 @@ fn get_fltreg_for_retval_systemv(
None None
} }
} }
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), CallConv::WindowsFastcall => match fltreg_idx {
0 => Some(regs::xmm0()),
_ => None,
},
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
} }
} }
@@ -854,6 +933,21 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool {
} }
} }
fn is_callee_save_fastcall(r: RealReg) -> bool {
use regs::*;
match r.get_class() {
RegClass::I64 => match r.get_hw_encoding() as u8 {
ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
_ => false,
},
RegClass::V128 => match r.get_hw_encoding() as u8 {
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
_ => false,
},
_ => panic!("Unknown register class: {:?}", r.get_class()),
}
}
fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> { fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
let mut regs: Vec<Writable<RealReg>> = match call_conv { let mut regs: Vec<Writable<RealReg>> = match call_conv {
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
@@ -869,7 +963,11 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
.cloned() .cloned()
.filter(|r| is_callee_save_systemv(r.to_reg())) .filter(|r| is_callee_save_systemv(r.to_reg()))
.collect(), .collect(),
CallConv::WindowsFastcall => todo!("windows fastcall"), CallConv::WindowsFastcall => regs
.iter()
.cloned()
.filter(|r| is_callee_save_fastcall(r.to_reg()))
.collect(),
CallConv::Probestack => todo!("probestack?"), CallConv::Probestack => todo!("probestack?"),
}; };
// Sort registers for deterministic code output. We can do an unstable sort because the // Sort registers for deterministic code output. We can do an unstable sort because the
@@ -877,3 +975,20 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
regs.sort_unstable_by_key(|r| r.to_reg().get_index()); regs.sort_unstable_by_key(|r| r.to_reg().get_index());
regs regs
} }
fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
let mut clobbered_size = 0;
for reg in clobbers {
match reg.to_reg().get_class() {
RegClass::I64 => {
clobbered_size += 8;
}
RegClass::V128 => {
clobbered_size = align_to(clobbered_size, 16);
clobbered_size += 16;
}
_ => unreachable!(),
}
}
clobbered_size
}

View File

@@ -456,6 +456,7 @@ pub(crate) enum InstructionSet {
Popcnt, Popcnt,
Lzcnt, Lzcnt,
BMI1, BMI1,
#[allow(dead_code)] // never constructed (yet).
BMI2, BMI2,
} }

View File

@@ -23,11 +23,20 @@ use regalloc::{
}; };
use std::string::String; use std::string::String;
// Hardware encodings for a few registers. // Hardware encodings (note the special rax, rcx, rdx, rbx order).
pub const ENC_RAX: u8 = 0;
pub const ENC_RCX: u8 = 1;
pub const ENC_RDX: u8 = 2;
pub const ENC_RBX: u8 = 3; pub const ENC_RBX: u8 = 3;
pub const ENC_RSP: u8 = 4; pub const ENC_RSP: u8 = 4;
pub const ENC_RBP: u8 = 5; pub const ENC_RBP: u8 = 5;
pub const ENC_RSI: u8 = 6;
pub const ENC_RDI: u8 = 7;
pub const ENC_R8: u8 = 8;
pub const ENC_R9: u8 = 9;
pub const ENC_R10: u8 = 10;
pub const ENC_R11: u8 = 11;
pub const ENC_R12: u8 = 12; pub const ENC_R12: u8 = 12;
pub const ENC_R13: u8 = 13; pub const ENC_R13: u8 = 13;
pub const ENC_R14: u8 = 14; pub const ENC_R14: u8 = 14;
@@ -38,31 +47,31 @@ fn gpr(enc: u8, index: u8) -> Reg {
} }
pub(crate) fn rsi() -> Reg { pub(crate) fn rsi() -> Reg {
gpr(6, 16) gpr(ENC_RSI, 16)
} }
pub(crate) fn rdi() -> Reg { pub(crate) fn rdi() -> Reg {
gpr(7, 17) gpr(ENC_RDI, 17)
} }
pub(crate) fn rax() -> Reg { pub(crate) fn rax() -> Reg {
gpr(0, 18) gpr(ENC_RAX, 18)
} }
pub(crate) fn rcx() -> Reg { pub(crate) fn rcx() -> Reg {
gpr(1, 19) gpr(ENC_RCX, 19)
} }
pub(crate) fn rdx() -> Reg { pub(crate) fn rdx() -> Reg {
gpr(2, 20) gpr(ENC_RDX, 20)
} }
pub(crate) fn r8() -> Reg { pub(crate) fn r8() -> Reg {
gpr(8, 21) gpr(ENC_R8, 21)
} }
pub(crate) fn r9() -> Reg { pub(crate) fn r9() -> Reg {
gpr(9, 22) gpr(ENC_R9, 22)
} }
pub(crate) fn r10() -> Reg { pub(crate) fn r10() -> Reg {
gpr(10, 23) gpr(ENC_R10, 23)
} }
pub(crate) fn r11() -> Reg { pub(crate) fn r11() -> Reg {
gpr(11, 24) gpr(ENC_R11, 24)
} }
pub(crate) fn r12() -> Reg { pub(crate) fn r12() -> Reg {
gpr(ENC_R12, 25) gpr(ENC_R12, 25)

View File

@@ -124,19 +124,18 @@ use std::convert::TryFrom;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::mem; use std::mem;
/// A location for an argument or return value. /// A location for (part of) an argument or return value. These "storage slots"
#[derive(Clone, Copy, Debug)] /// are specified for each register-sized part of an argument.
pub enum ABIArg { #[derive(Clone, Copy, Debug, PartialEq, Eq)]
/// In a real register (or set of registers). pub enum ABIArgSlot {
/// In a real register.
Reg { Reg {
/// Register(s) that hold this arg. /// Register that holds this arg.
regs: ValueRegs<RealReg>, reg: RealReg,
/// Value type of this arg. /// Value type of this arg.
ty: ir::Type, ty: ir::Type,
/// Should this arg be zero- or sign-extended? /// Should this arg be zero- or sign-extended?
extension: ir::ArgumentExtension, extension: ir::ArgumentExtension,
/// Purpose of this arg.
purpose: ir::ArgumentPurpose,
}, },
/// Arguments only: on stack, at given offset from SP at entry. /// Arguments only: on stack, at given offset from SP at entry.
Stack { Stack {
@@ -146,6 +145,26 @@ pub enum ABIArg {
ty: ir::Type, ty: ir::Type,
/// Should this arg be zero- or sign-extended? /// Should this arg be zero- or sign-extended?
extension: ir::ArgumentExtension, extension: ir::ArgumentExtension,
},
}
/// An ABIArg is composed of one or more parts. This allows for a CLIF-level
/// Value to be passed with its parts in more than one location at the ABI
/// level. For example, a 128-bit integer may be passed in two 64-bit registers,
/// or even a 64-bit register and a 64-bit stack slot, on a 64-bit machine. The
/// number of "parts" should correspond to the number of registers used to store
/// this type according to the machine backend.
///
/// As an invariant, the `purpose` for every part must match. As a further
/// invariant, a `StructArg` part cannot appear with any other part.
#[derive(Clone, Debug)]
pub enum ABIArg {
/// Storage slots (registers or stack locations) for each part of the
/// argument value. The number of slots must equal the number of register
/// parts used to store a value of this type.
Slots {
/// Slots, one per register part.
slots: Vec<ABIArgSlot>,
/// Purpose of this arg. /// Purpose of this arg.
purpose: ir::ArgumentPurpose, purpose: ir::ArgumentPurpose,
}, },
@@ -167,21 +186,50 @@ pub enum ABIArg {
impl ABIArg { impl ABIArg {
/// Get the purpose of this arg. /// Get the purpose of this arg.
fn get_purpose(self) -> ir::ArgumentPurpose { fn get_purpose(&self) -> ir::ArgumentPurpose {
match self { match self {
ABIArg::Reg { purpose, .. } => purpose, &ABIArg::Slots { purpose, .. } => purpose,
ABIArg::Stack { purpose, .. } => purpose, &ABIArg::StructArg { purpose, .. } => purpose,
ABIArg::StructArg { purpose, .. } => purpose,
} }
} }
/// Is this a StructArg? /// Is this a StructArg?
fn is_struct_arg(self) -> bool { fn is_struct_arg(&self) -> bool {
match self { match self {
ABIArg::StructArg { .. } => true, &ABIArg::StructArg { .. } => true,
_ => false, _ => false,
} }
} }
/// Create an ABIArg from one register.
pub fn reg(
reg: RealReg,
ty: ir::Type,
extension: ir::ArgumentExtension,
purpose: ir::ArgumentPurpose,
) -> ABIArg {
ABIArg::Slots {
slots: vec![ABIArgSlot::Reg { reg, ty, extension }],
purpose,
}
}
/// Create an ABIArg from one stack slot.
pub fn stack(
offset: i64,
ty: ir::Type,
extension: ir::ArgumentExtension,
purpose: ir::ArgumentPurpose,
) -> ABIArg {
ABIArg::Slots {
slots: vec![ABIArgSlot::Stack {
offset,
ty,
extension,
}],
purpose,
}
}
} }
/// Are we computing information about arguments or return values? Much of the /// Are we computing information about arguments or return values? Much of the
@@ -275,6 +323,7 @@ pub trait ABIMachineSpec {
/// index of the extra synthetic arg that was added. /// index of the extra synthetic arg that was added.
fn compute_arg_locs( fn compute_arg_locs(
call_conv: isa::CallConv, call_conv: isa::CallConv,
flags: &settings::Flags,
params: &[ir::AbiParam], params: &[ir::AbiParam],
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
@@ -461,11 +510,15 @@ struct ABISig {
} }
impl ABISig { impl ABISig {
fn from_func_sig<M: ABIMachineSpec>(sig: &ir::Signature) -> CodegenResult<ABISig> { fn from_func_sig<M: ABIMachineSpec>(
sig: &ir::Signature,
flags: &settings::Flags,
) -> CodegenResult<ABISig> {
// Compute args and retvals from signature. Handle retvals first, // Compute args and retvals from signature. Handle retvals first,
// because we may need to add a return-area arg to the args. // because we may need to add a return-area arg to the args.
let (rets, stack_ret_space, _) = M::compute_arg_locs( let (rets, stack_ret_space, _) = M::compute_arg_locs(
sig.call_conv, sig.call_conv,
flags,
&sig.returns, &sig.returns,
ArgsOrRets::Rets, ArgsOrRets::Rets,
/* extra ret-area ptr = */ false, /* extra ret-area ptr = */ false,
@@ -473,6 +526,7 @@ impl ABISig {
let need_stack_return_area = stack_ret_space > 0; let need_stack_return_area = stack_ret_space > 0;
let (args, stack_arg_space, stack_ret_arg) = M::compute_arg_locs( let (args, stack_arg_space, stack_ret_arg) = M::compute_arg_locs(
sig.call_conv, sig.call_conv,
flags,
&sig.params, &sig.params,
ArgsOrRets::Args, ArgsOrRets::Args,
need_stack_return_area, need_stack_return_area,
@@ -557,8 +611,11 @@ fn get_special_purpose_param_register(
purpose: ir::ArgumentPurpose, purpose: ir::ArgumentPurpose,
) -> Option<Reg> { ) -> Option<Reg> {
let idx = f.signature.special_param_index(purpose)?; let idx = f.signature.special_param_index(purpose)?;
match abi.args[idx] { match &abi.args[idx] {
ABIArg::Reg { regs, .. } => Some(regs.only_reg().unwrap().to_reg()), &ABIArg::Slots { ref slots, .. } => match &slots[0] {
&ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()),
_ => None,
},
_ => None, _ => None,
} }
} }
@@ -569,7 +626,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
debug!("ABI: func signature {:?}", f.signature); debug!("ABI: func signature {:?}", f.signature);
let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature); let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature);
let sig = ABISig::from_func_sig::<M>(&ir_sig)?; let sig = ABISig::from_func_sig::<M>(&ir_sig, &flags)?;
let call_conv = f.signature.call_conv; let call_conv = f.signature.call_conv;
// Only these calling conventions are supported. // Only these calling conventions are supported.
@@ -577,7 +634,8 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
call_conv == isa::CallConv::SystemV call_conv == isa::CallConv::SystemV
|| call_conv == isa::CallConv::Fast || call_conv == isa::CallConv::Fast
|| call_conv == isa::CallConv::Cold || call_conv == isa::CallConv::Cold
|| call_conv.extends_baldrdash(), || call_conv.extends_baldrdash()
|| call_conv.extends_windows_fastcall(),
"Unsupported calling convention: {:?}", "Unsupported calling convention: {:?}",
call_conv call_conv
); );
@@ -776,19 +834,6 @@ fn ty_from_ty_hint_or_reg_class<M: ABIMachineSpec>(r: Reg, ty: Option<Type>) ->
} }
} }
fn gen_move_multi<M: ABIMachineSpec>(
dst: ValueRegs<Writable<Reg>>,
src: ValueRegs<Reg>,
ty: Type,
) -> SmallInstVec<M::I> {
let mut ret = smallvec![];
let (_, tys) = M::I::rc_for_type(ty).unwrap();
for ((&dst, &src), &ty) in dst.regs().iter().zip(src.regs().iter()).zip(tys.iter()) {
ret.push(M::gen_move(dst, src, ty));
}
ret
}
fn gen_load_stack_multi<M: ABIMachineSpec>( fn gen_load_stack_multi<M: ABIMachineSpec>(
from: StackAMode, from: StackAMode,
dst: ValueRegs<Writable<Reg>>, dst: ValueRegs<Writable<Reg>>,
@@ -821,22 +866,6 @@ fn gen_store_stack_multi<M: ABIMachineSpec>(
ret ret
} }
fn gen_store_base_offset_multi<M: ABIMachineSpec>(
base: Reg,
mut offset: i32,
src: ValueRegs<Reg>,
ty: Type,
) -> SmallInstVec<M::I> {
let mut ret = smallvec![];
let (_, tys) = M::I::rc_for_type(ty).unwrap();
// N.B.: registers are given in the `ValueRegs` in target endian order.
for (&src, &ty) in src.regs().iter().zip(tys.iter()) {
ret.push(M::gen_store_base_offset(base, offset, src, ty));
offset += ty.bytes() as i32;
}
ret
}
fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature { fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature {
let params_structret = sig let params_structret = sig
.params .params
@@ -892,10 +921,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
fn liveins(&self) -> Set<RealReg> { fn liveins(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty(); let mut set: Set<RealReg> = Set::empty();
for &arg in &self.sig.args { for arg in &self.sig.args {
if let ABIArg::Reg { regs, .. } = arg { if let &ABIArg::Slots { ref slots, .. } = arg {
for &r in regs.regs() { for slot in slots {
set.insert(r); if let ABIArgSlot::Reg { reg, .. } = slot {
set.insert(*reg);
}
} }
} }
} }
@@ -904,10 +935,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
fn liveouts(&self) -> Set<RealReg> { fn liveouts(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty(); let mut set: Set<RealReg> = Set::empty();
for &ret in &self.sig.rets { for ret in &self.sig.rets {
if let ABIArg::Reg { regs, .. } = ret { if let &ABIArg::Slots { ref slots, .. } = ret {
for &r in regs.regs() { for slot in slots {
set.insert(r); if let ABIArgSlot::Reg { reg, .. } = slot {
set.insert(*reg);
}
} }
} }
} }
@@ -935,29 +968,43 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
idx: usize, idx: usize,
into_regs: ValueRegs<Writable<Reg>>, into_regs: ValueRegs<Writable<Reg>>,
) -> SmallInstVec<Self::I> { ) -> SmallInstVec<Self::I> {
let mut insts = smallvec![];
match &self.sig.args[idx] { match &self.sig.args[idx] {
// Extension mode doesn't matter (we're copying out, not in; we &ABIArg::Slots { ref slots, .. } => {
// ignore high bits by convention). assert_eq!(into_regs.len(), slots.len());
&ABIArg::Reg { regs, ty, .. } => { for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
gen_move_multi::<M>(into_regs, regs.map(|r| r.to_reg()), ty) match slot {
// Extension mode doesn't matter (we're copying out, not in; we
// ignore high bits by convention).
&ABIArgSlot::Reg { reg, ty, .. } => {
insts.push(M::gen_move(*into_reg, reg.to_reg(), ty));
}
&ABIArgSlot::Stack { offset, ty, .. } => {
insts.push(M::gen_load_stack(
StackAMode::FPOffset(
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
ty,
),
*into_reg,
ty,
));
}
}
}
} }
&ABIArg::Stack { offset, ty, .. } => gen_load_stack_multi::<M>( &ABIArg::StructArg { offset, .. } => {
StackAMode::FPOffset( let into_reg = into_regs.only_reg().unwrap();
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, insts.push(M::gen_get_stack_addr(
ty, StackAMode::FPOffset(
), M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
into_regs, I8,
ty, ),
), into_reg,
&ABIArg::StructArg { offset, .. } => smallvec![M::gen_get_stack_addr(
StackAMode::FPOffset(
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
I8, I8,
), ));
into_regs.only_reg().unwrap(), }
I8,
)],
} }
insts
} }
fn arg_is_needed_in_body(&self, idx: usize) -> bool { fn arg_is_needed_in_body(&self, idx: usize) -> bool {
@@ -978,87 +1025,84 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let mut ret = smallvec![]; let mut ret = smallvec![];
let word_bits = M::word_bits() as u8; let word_bits = M::word_bits() as u8;
match &self.sig.rets[idx] { match &self.sig.rets[idx] {
&ABIArg::Reg { &ABIArg::Slots { ref slots, .. } => {
regs, assert_eq!(from_regs.len(), slots.len());
ty, for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
extension, match slot {
.. &ABIArgSlot::Reg {
} => { reg, ty, extension, ..
let from_bits = ty_bits(ty) as u8; } => {
let dest_regs = writable_value_regs(regs.map(|r| r.to_reg())); let from_bits = ty_bits(ty) as u8;
let ext = M::get_ext_mode(self.sig.call_conv, extension); let ext = M::get_ext_mode(self.sig.call_conv, extension);
match (ext, from_bits) { match (ext, from_bits) {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < word_bits => if n < word_bits =>
{ {
let signed = ext == ArgumentExtension::Sext; let signed = ext == ArgumentExtension::Sext;
let dest_reg = dest_regs ret.push(M::gen_extend(
.only_reg() Writable::from_reg(reg.to_reg()),
.expect("extension only possible from one-reg value"); from_reg.to_reg(),
let from_reg = from_regs signed,
.only_reg() from_bits,
.expect("extension only possible from one-reg value"); /* to_bits = */ word_bits,
ret.push(M::gen_extend( ));
dest_reg, }
from_reg.to_reg(), _ => {
signed, ret.push(M::gen_move(
from_bits, Writable::from_reg(reg.to_reg()),
/* to_bits = */ word_bits, from_reg.to_reg(),
)); ty,
));
}
};
}
&ABIArgSlot::Stack {
offset,
ty,
extension,
..
} => {
let mut ty = ty;
let from_bits = ty_bits(ty) as u8;
// A machine ABI implementation should ensure that stack frames
// have "reasonable" size. All current ABIs for machinst
// backends (aarch64 and x64) enforce a 128MB limit.
let off = i32::try_from(offset).expect(
"Argument stack offset greater than 2GB; should hit impl limit first",
);
let ext = M::get_ext_mode(self.sig.call_conv, extension);
// Trash the from_reg; it should be its last use.
match (ext, from_bits) {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < word_bits =>
{
assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
let signed = ext == ArgumentExtension::Sext;
ret.push(M::gen_extend(
Writable::from_reg(from_reg.to_reg()),
from_reg.to_reg(),
signed,
from_bits,
/* to_bits = */ word_bits,
));
// Store the extended version.
ty = M::word_type();
}
_ => {}
};
ret.push(M::gen_store_base_offset(
self.ret_area_ptr.unwrap().to_reg(),
off,
from_reg.to_reg(),
ty,
));
}
} }
_ => ret.extend( }
gen_move_multi::<M>(dest_regs, non_writable_value_regs(from_regs), ty)
.into_iter(),
),
};
} }
&ABIArg::Stack { &ABIArg::StructArg { .. } => {
offset, panic!("StructArg in return position is unsupported");
ty,
extension,
..
} => {
let mut ty = ty;
let from_bits = ty_bits(ty) as u8;
// A machine ABI implementation should ensure that stack frames
// have "reasonable" size. All current ABIs for machinst
// backends (aarch64 and x64) enforce a 128MB limit.
let off = i32::try_from(offset)
.expect("Argument stack offset greater than 2GB; should hit impl limit first");
let ext = M::get_ext_mode(self.sig.call_conv, extension);
// Trash the from_reg; it should be its last use.
match (ext, from_bits) {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < word_bits =>
{
let from_reg = from_regs
.only_reg()
.expect("extension only possible from one-reg value");
assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
let signed = ext == ArgumentExtension::Sext;
ret.push(M::gen_extend(
from_reg,
from_reg.to_reg(),
signed,
from_bits,
/* to_bits = */ word_bits,
));
// Store the extended version.
ty = M::word_type();
}
_ => {}
};
ret.extend(
gen_store_base_offset_multi::<M>(
self.ret_area_ptr.unwrap().to_reg(),
off,
non_writable_value_regs(from_regs),
ty,
)
.into_iter(),
);
} }
&ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"),
} }
ret ret
} }
@@ -1345,20 +1389,30 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
// Compute uses: all arg regs. // Compute uses: all arg regs.
let mut uses = Vec::new(); let mut uses = Vec::new();
for arg in &sig.args { for arg in &sig.args {
match arg { if let &ABIArg::Slots { ref slots, .. } = arg {
&ABIArg::Reg { regs, .. } => uses.extend(regs.regs().iter().map(|r| r.to_reg())), for slot in slots {
_ => {} match slot {
&ABIArgSlot::Reg { reg, .. } => {
uses.push(reg.to_reg());
}
_ => {}
}
}
} }
} }
// Compute defs: all retval regs, and all caller-save (clobbered) regs. // Compute defs: all retval regs, and all caller-save (clobbered) regs.
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv); let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
for ret in &sig.rets { for ret in &sig.rets {
match ret { if let &ABIArg::Slots { ref slots, .. } = ret {
&ABIArg::Reg { regs, .. } => { for slot in slots {
defs.extend(regs.regs().iter().map(|r| Writable::from_reg(r.to_reg()))) match slot {
&ABIArgSlot::Reg { reg, .. } => {
defs.push(Writable::from_reg(reg.to_reg()));
}
_ => {}
}
} }
_ => {}
} }
} }
@@ -1406,7 +1460,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
flags: &settings::Flags, flags: &settings::Flags,
) -> CodegenResult<ABICallerImpl<M>> { ) -> CodegenResult<ABICallerImpl<M>> {
let ir_sig = ensure_struct_return_ptr_is_returned(sig); let ir_sig = ensure_struct_return_ptr_is_returned(sig);
let sig = ABISig::from_func_sig::<M>(&ir_sig)?; let sig = ABISig::from_func_sig::<M>(&ir_sig, flags)?;
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig); let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
Ok(ABICallerImpl { Ok(ABICallerImpl {
ir_sig, ir_sig,
@@ -1431,7 +1485,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
flags: &settings::Flags, flags: &settings::Flags,
) -> CodegenResult<ABICallerImpl<M>> { ) -> CodegenResult<ABICallerImpl<M>> {
let ir_sig = ensure_struct_return_ptr_is_returned(sig); let ir_sig = ensure_struct_return_ptr_is_returned(sig);
let sig = ABISig::from_func_sig::<M>(&ir_sig)?; let sig = ABISig::from_func_sig::<M>(&ir_sig, flags)?;
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig); let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
Ok(ABICallerImpl { Ok(ABICallerImpl {
ir_sig, ir_sig,
@@ -1501,75 +1555,73 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
let word_rc = M::word_reg_class(); let word_rc = M::word_reg_class();
let word_bits = M::word_bits() as usize; let word_bits = M::word_bits() as usize;
match &self.sig.args[idx] { match &self.sig.args[idx] {
&ABIArg::Reg { &ABIArg::Slots { ref slots, .. } => {
regs, assert_eq!(from_regs.len(), slots.len());
ty, for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
extension, match slot {
.. &ABIArgSlot::Reg {
} => { reg, ty, extension, ..
let ext = M::get_ext_mode(self.sig.call_conv, extension); } => {
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { let ext = M::get_ext_mode(self.sig.call_conv, extension);
let reg = regs.only_reg().unwrap(); if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
assert_eq!(word_rc, reg.get_class()); assert_eq!(word_rc, reg.get_class());
let signed = match ext { let signed = match ext {
ir::ArgumentExtension::Uext => false, ir::ArgumentExtension::Uext => false,
ir::ArgumentExtension::Sext => true, ir::ArgumentExtension::Sext => true,
_ => unreachable!(), _ => unreachable!(),
}; };
ctx.emit(M::gen_extend( ctx.emit(M::gen_extend(
Writable::from_reg(reg.to_reg()), Writable::from_reg(reg.to_reg()),
from_regs.only_reg().unwrap(), *from_reg,
signed, signed,
ty_bits(ty) as u8, ty_bits(ty) as u8,
word_bits as u8, word_bits as u8,
)); ));
} else { } else {
for insn in gen_move_multi::<M>( ctx.emit(M::gen_move(
writable_value_regs(regs.map(|r| r.to_reg())), Writable::from_reg(reg.to_reg()),
from_regs, *from_reg,
ty, ty,
) { ));
ctx.emit(insn); }
}
&ABIArgSlot::Stack {
offset,
ty,
extension,
..
} => {
let mut ty = ty;
let ext = M::get_ext_mode(self.sig.call_conv, extension);
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
assert_eq!(word_rc, from_reg.get_class());
let signed = match ext {
ir::ArgumentExtension::Uext => false,
ir::ArgumentExtension::Sext => true,
_ => unreachable!(),
};
// Extend in place in the source register. Our convention is to
// treat high bits as undefined for values in registers, so this
// is safe, even for an argument that is nominally read-only.
ctx.emit(M::gen_extend(
Writable::from_reg(*from_reg),
*from_reg,
signed,
ty_bits(ty) as u8,
word_bits as u8,
));
// Store the extended version.
ty = M::word_type();
}
ctx.emit(M::gen_store_stack(
StackAMode::SPOffset(offset, ty),
*from_reg,
ty,
));
}
} }
} }
} }
&ABIArg::Stack {
offset,
ty,
extension,
..
} => {
let mut ty = ty;
let ext = M::get_ext_mode(self.sig.call_conv, extension);
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
let from_reg = from_regs
.only_reg()
.expect("only one reg for sub-word value width");
assert_eq!(word_rc, from_reg.get_class());
let signed = match ext {
ir::ArgumentExtension::Uext => false,
ir::ArgumentExtension::Sext => true,
_ => unreachable!(),
};
// Extend in place in the source register. Our convention is to
// treat high bits as undefined for values in registers, so this
// is safe, even for an argument that is nominally read-only.
ctx.emit(M::gen_extend(
Writable::from_reg(from_reg),
from_reg,
signed,
ty_bits(ty) as u8,
word_bits as u8,
));
// Store the extended version.
ty = M::word_type();
}
for insn in
gen_store_stack_multi::<M>(StackAMode::SPOffset(offset, ty), from_regs, ty)
{
ctx.emit(insn);
}
}
&ABIArg::StructArg { offset, size, .. } => { &ABIArg::StructArg { offset, size, .. } => {
let src_ptr = from_regs.only_reg().unwrap(); let src_ptr = from_regs.only_reg().unwrap();
let dst_ptr = ctx.alloc_tmp(M::word_type()).only_reg().unwrap(); let dst_ptr = ctx.alloc_tmp(M::word_type()).only_reg().unwrap();
@@ -1618,24 +1670,29 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
into_regs: ValueRegs<Writable<Reg>>, into_regs: ValueRegs<Writable<Reg>>,
) { ) {
match &self.sig.rets[idx] { match &self.sig.rets[idx] {
// Extension mode doesn't matter because we're copying out, not in, &ABIArg::Slots { ref slots, .. } => {
// and we ignore high bits in our own registers by convention. assert_eq!(into_regs.len(), slots.len());
&ABIArg::Reg { regs, ty, .. } => { for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
for insn in gen_move_multi::<M>(into_regs, regs.map(|r| r.to_reg()), ty) { match slot {
ctx.emit(insn); // Extension mode doesn't matter because we're copying out, not in,
// and we ignore high bits in our own registers by convention.
&ABIArgSlot::Reg { reg, ty, .. } => {
ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty));
}
&ABIArgSlot::Stack { offset, ty, .. } => {
let ret_area_base = self.sig.stack_arg_space;
ctx.emit(M::gen_load_stack(
StackAMode::SPOffset(offset + ret_area_base, ty),
*into_reg,
ty,
));
}
}
} }
} }
&ABIArg::Stack { offset, ty, .. } => { &ABIArg::StructArg { .. } => {
let ret_area_base = self.sig.stack_arg_space; panic!("StructArg not supported in return position");
for insn in gen_load_stack_multi::<M>(
StackAMode::SPOffset(offset + ret_area_base, ty),
into_regs,
ty,
) {
ctx.emit(insn);
}
} }
&ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"),
} }
} }

View File

@@ -3,6 +3,7 @@
use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs}; use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs};
use crate::ir::Type; use crate::ir::Type;
use regalloc::{Reg, Writable}; use regalloc::{Reg, Writable};
use std::ops::{Add, BitAnd, Not, Sub};
/// Returns the size (in bits) of a given type. /// Returns the size (in bits) of a given type.
pub fn ty_bits(ty: Type) -> usize { pub fn ty_bits(ty: Type) -> usize {
@@ -26,3 +27,17 @@ pub(crate) fn get_output_reg<I: VCodeInst, C: LowerCtx<I = I>>(
) -> ValueRegs<Writable<Reg>> { ) -> ValueRegs<Writable<Reg>> {
ctx.get_output(spec.insn, spec.output) ctx.get_output(spec.insn, spec.output)
} }
/// Align a size up to a power-of-two alignment.
pub(crate) fn align_to<N>(x: N, alignment: N) -> N
where
N: Not<Output = N>
+ BitAnd<N, Output = N>
+ Add<N, Output = N>
+ Sub<N, Output = N>
+ From<u8>
+ Copy,
{
let alignment_mask = alignment - 1.into();
(x + alignment_mask) & !alignment_mask
}

View File

@@ -175,11 +175,13 @@ impl<R: Clone + Copy + Debug + PartialEq + Eq + InvalidSentinel> ValueRegs<R> {
} }
/// Create a writable ValueRegs. /// Create a writable ValueRegs.
#[allow(dead_code)]
pub(crate) fn writable_value_regs(regs: ValueRegs<Reg>) -> ValueRegs<Writable<Reg>> { pub(crate) fn writable_value_regs(regs: ValueRegs<Reg>) -> ValueRegs<Writable<Reg>> {
regs.map(|r| Writable::from_reg(r)) regs.map(|r| Writable::from_reg(r))
} }
/// Strip a writable ValueRegs down to a readonly ValueRegs. /// Strip a writable ValueRegs down to a readonly ValueRegs.
#[allow(dead_code)]
pub(crate) fn non_writable_value_regs(regs: ValueRegs<Writable<Reg>>) -> ValueRegs<Reg> { pub(crate) fn non_writable_value_regs(regs: ValueRegs<Writable<Reg>>) -> ValueRegs<Reg> {
regs.map(|r| r.to_reg()) regs.map(|r| r.to_reg())
} }

View File

@@ -398,6 +398,7 @@ use_pinned_reg_as_heap_base = false
enable_simd = false enable_simd = false
enable_atomics = true enable_atomics = true
enable_safepoints = false enable_safepoints = false
enable_llvm_abi_extensions = false
emit_all_ones_funcaddrs = false emit_all_ones_funcaddrs = false
enable_probestack = true enable_probestack = true
probestack_func_adjusts_sp = false probestack_func_adjusts_sp = false

View File

@@ -0,0 +1,299 @@
test compile
set enable_llvm_abi_extensions=true
target x86_64
feature "experimental_x64"
function %f0(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
return v0
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movq %rcx, %rax
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f1(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
return v1
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movq %rdx, %rax
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f2(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
return v2
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movq %r8, %rax
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f3(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
return v3
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movq %r9, %rax
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f4(i64, i64, f64, i64) -> f64 windows_fastcall {
block0(v0: i64, v1: i64, v2: f64, v3: i64):
return v2
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movaps %xmm2, %xmm0
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f5(i64, i64, f64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: f64, v3: i64):
return v3
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movq %r9, %rax
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f6(i64, i64, i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
return v5
}
;; This is truly odd (because of the regalloc ordering), but it works. Note
;; that we're spilling and using rsi, which is a callee-save in fastcall, because
;; the regalloc order is optimized for SysV. Also note that because we copy args
;; out of their input locations to separate vregs, we have a spurious load
;; from [rbp+48]. Ordinarily these moves are coalesced because the dest vreg
;; is allocated as a caller-save (volatile), but here again we allocate rsi
;; first and so have to spill it (and consequently don't coalesce).
;;
;; TODO(#2704): fix regalloc's register priority ordering!
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$16, %rsp
; nextln: movq %rsi, 0(%rsp)
; nextln: virtual_sp_offset_adjust 16
; nextln: movq 48(%rbp), %rsi
; nextln: movq 56(%rbp), %rsi
; nextln: movq %rsi, %rax
; nextln: movq 0(%rsp), %rsi
; nextln: addq $$16, %rsp
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f7(i128, i64, i128, i128) -> i128 windows_fastcall {
block0(v0: i128, v1: i64, v2: i128, v3: i128):
return v3
}
;; Again, terrible regalloc behavior. The important part is that `v3` comes
;; from [rbp+56] and [rbp+64], i.e., the second and third non-shadow
;; stack slot.
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$16, %rsp
; nextln: movq %rsi, 0(%rsp)
; nextln: movq %rdi, 8(%rsp)
; nextln: virtual_sp_offset_adjust 16
; nextln: movq 48(%rbp), %rsi
; nextln: movq 56(%rbp), %rsi
; nextln: movq 64(%rbp), %rdi
; nextln: movq %rsi, %rax
; nextln: movq %rdi, %rdx
; nextln: movq 0(%rsp), %rsi
; nextln: movq 8(%rsp), %rdi
; nextln: addq $$16, %rsp
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f8(i64) -> i64 windows_fastcall {
sig0 = (i64, i64, f64, f64, i64, i64) -> i64 windows_fastcall
fn0 = %g sig0
block0(v0: i64):
v1 = fcvt_from_sint.f64 v0
v2 = call fn0(v0, v0, v1, v1, v0, v0)
return v2
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$16, %rsp
; nextln: movq %rsi, 0(%rsp)
; nextln: virtual_sp_offset_adjust 16
; nextln: movq %rcx, %rsi
; nextln: cvtsi2sd %rsi, %xmm3
; nextln: subq $$48, %rsp
; nextln: virtual_sp_offset_adjust 48
; nextln: movq %rsi, %rcx
; nextln: movq %rsi, %rdx
; nextln: movaps %xmm3, %xmm2
; nextln: movq %rsi, 32(%rsp)
; nextln: movq %rsi, 40(%rsp)
; nextln: load_ext_name %g+0, %rsi
; nextln: call *%rsi
; nextln: addq $$48, %rsp
; nextln: virtual_sp_offset_adjust -48
; nextln: movq 0(%rsp), %rsi
; nextln: addq $$16, %rsp
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
function %f9(i64) -> f64 windows_fastcall {
block0(v0: i64):
v1 = load.f64 v0+0
v2 = load.f64 v0+8
v3 = load.f64 v0+16
v4 = load.f64 v0+24
v5 = load.f64 v0+32
v6 = load.f64 v0+40
v7 = load.f64 v0+48
v8 = load.f64 v0+56
v9 = load.f64 v0+64
v10 = load.f64 v0+72
v11 = load.f64 v0+80
v12 = load.f64 v0+88
v13 = load.f64 v0+96
v14 = load.f64 v0+104
v15 = load.f64 v0+112
v16 = load.f64 v0+120
v17 = load.f64 v0+128
v18 = load.f64 v0+136
v19 = load.f64 v0+144
v20 = load.f64 v0+152
v21 = fadd.f64 v1, v2
v22 = fadd.f64 v3, v4
v23 = fadd.f64 v5, v6
v24 = fadd.f64 v7, v8
v25 = fadd.f64 v9, v10
v26 = fadd.f64 v11, v12
v27 = fadd.f64 v13, v14
v28 = fadd.f64 v15, v16
v29 = fadd.f64 v17, v18
v30 = fadd.f64 v19, v20
v31 = fadd.f64 v21, v22
v32 = fadd.f64 v23, v24
v33 = fadd.f64 v25, v26
v34 = fadd.f64 v27, v28
v35 = fadd.f64 v29, v30
v36 = fadd.f64 v31, v32
v37 = fadd.f64 v33, v34
v38 = fadd.f64 v36, v37
v39 = fadd.f64 v38, v35
return v39
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$208, %rsp
; nextln: movdqu %xmm6, 0(%rsp)
; nextln: movdqu %xmm7, 16(%rsp)
; nextln: movdqu %xmm8, 32(%rsp)
; nextln: movdqu %xmm9, 48(%rsp)
; nextln: movdqu %xmm10, 64(%rsp)
; nextln: movdqu %xmm11, 80(%rsp)
; nextln: movdqu %xmm12, 96(%rsp)
; nextln: movdqu %xmm13, 112(%rsp)
; nextln: movdqu %xmm14, 128(%rsp)
; nextln: movdqu %xmm15, 144(%rsp)
; nextln: virtual_sp_offset_adjust 160
; nextln: movsd 0(%rcx), %xmm0
; nextln: movsd %xmm0, rsp(16 + virtual offset)
; nextln: movsd 8(%rcx), %xmm1
; nextln: movsd 16(%rcx), %xmm0
; nextln: movsd %xmm0, rsp(24 + virtual offset)
; nextln: movsd 24(%rcx), %xmm3
; nextln: movsd 32(%rcx), %xmm0
; nextln: movsd %xmm0, rsp(32 + virtual offset)
; nextln: movsd 40(%rcx), %xmm5
; nextln: movsd 48(%rcx), %xmm6
; nextln: movsd 56(%rcx), %xmm7
; nextln: movsd 64(%rcx), %xmm8
; nextln: movsd 72(%rcx), %xmm9
; nextln: movsd 80(%rcx), %xmm10
; nextln: movsd 88(%rcx), %xmm11
; nextln: movsd 96(%rcx), %xmm12
; nextln: movsd 104(%rcx), %xmm13
; nextln: movsd 112(%rcx), %xmm14
; nextln: movsd 120(%rcx), %xmm15
; nextln: movsd 128(%rcx), %xmm0
; nextln: movsd %xmm0, rsp(0 + virtual offset)
; nextln: movsd 136(%rcx), %xmm0
; nextln: movsd 144(%rcx), %xmm2
; nextln: movsd %xmm2, rsp(8 + virtual offset)
; nextln: movsd 152(%rcx), %xmm2
; nextln: nop len=0
; nextln: movsd rsp(16 + virtual offset), %xmm4
; nextln: addsd %xmm1, %xmm4
; nextln: movsd %xmm4, rsp(16 + virtual offset)
; nextln: movsd rsp(24 + virtual offset), %xmm1
; nextln: addsd %xmm3, %xmm1
; nextln: movsd rsp(32 + virtual offset), %xmm4
; nextln: addsd %xmm5, %xmm4
; nextln: addsd %xmm7, %xmm6
; nextln: addsd %xmm9, %xmm8
; nextln: addsd %xmm11, %xmm10
; nextln: addsd %xmm13, %xmm12
; nextln: addsd %xmm15, %xmm14
; nextln: movsd rsp(0 + virtual offset), %xmm3
; nextln: addsd %xmm0, %xmm3
; nextln: movsd rsp(8 + virtual offset), %xmm0
; nextln: addsd %xmm2, %xmm0
; nextln: movsd rsp(16 + virtual offset), %xmm2
; nextln: addsd %xmm1, %xmm2
; nextln: addsd %xmm6, %xmm4
; nextln: addsd %xmm10, %xmm8
; nextln: addsd %xmm14, %xmm12
; nextln: addsd %xmm0, %xmm3
; nextln: addsd %xmm4, %xmm2
; nextln: addsd %xmm12, %xmm8
; nextln: addsd %xmm8, %xmm2
; nextln: addsd %xmm3, %xmm2
; nextln: movaps %xmm2, %xmm0
; nextln: movdqu 0(%rsp), %xmm6
; nextln: movdqu 16(%rsp), %xmm7
; nextln: movdqu 32(%rsp), %xmm8
; nextln: movdqu 48(%rsp), %xmm9
; nextln: movdqu 64(%rsp), %xmm10
; nextln: movdqu 80(%rsp), %xmm11
; nextln: movdqu 96(%rsp), %xmm12
; nextln: movdqu 112(%rsp), %xmm13
; nextln: movdqu 128(%rsp), %xmm14
; nextln: movdqu 144(%rsp), %xmm15
; nextln: addq $$160, %rsp
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret

View File

@@ -1,4 +1,5 @@
test compile test compile
set enable_llvm_abi_extensions=true
target x86_64 target x86_64
feature "experimental_x64" feature "experimental_x64"
@@ -738,17 +739,17 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128):
v11 = iadd.i128 v9, v10 v11 = iadd.i128 v9, v10
return v11 return v11
; check: movq %rsp, %rbp ; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$16, %rsp ; nextln: subq $$16, %rsp
; nextln: movq %r12, 0(%rsp) ; nextln: movq %r12, 0(%rsp)
; nextln: movq %r13, 8(%rsp) ; nextln: movq %r13, 8(%rsp)
; nextln: virtual_sp_offset_adjust 16 ; nextln: virtual_sp_offset_adjust 16
; nextln: movq 16(%rbp), %r9 ; nextln: movq 16(%rbp), %r10
; nextln: movq 24(%rbp), %r10 ; nextln: movq 24(%rbp), %r12
; nextln: movq 32(%rbp), %r12 ; nextln: movq 32(%rbp), %r11
; nextln: movq 40(%rbp), %r11 ; nextln: movq 40(%rbp), %rax
; nextln: movq 48(%rbp), %rax ; nextln: movq 48(%rbp), %r13
; nextln: movq 56(%rbp), %r13
; nextln: addq %rdx, %rdi ; nextln: addq %rdx, %rdi
; nextln: adcq %rcx, %rsi ; nextln: adcq %rcx, %rsi
; nextln: xorq %rcx, %rcx ; nextln: xorq %rcx, %rcx
@@ -786,10 +787,10 @@ block0(v0: i128):
; nextln: movq %r10, 16(%rsi) ; nextln: movq %r10, 16(%rsi)
; nextln: movq %r11, 24(%rsi) ; nextln: movq %r11, 24(%rsi)
; nextln: movq %r12, 32(%rsi) ; nextln: movq %r12, 32(%rsi)
; nextln: movq %r13, 48(%rsi) ; nextln: movq %r13, 40(%rsi)
; nextln: movq %r14, 56(%rsi) ; nextln: movq %r14, 48(%rsi)
; nextln: movq %rdi, 64(%rsi) ; nextln: movq %rdi, 56(%rsi)
; nextln: movq %rbx, 72(%rsi) ; nextln: movq %rbx, 64(%rsi)
} }

View File

@@ -1,4 +1,5 @@
test compile test compile
set enable_llvm_abi_extensions=true
target x86_64 target x86_64
feature "experimental_x64" feature "experimental_x64"