Merge pull request #2678 from cfallin/x64-fastcall
x86-64 Windows fastcall ABI support.
This commit is contained in:
@@ -78,41 +78,41 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
|
||||
match ¶m.purpose {
|
||||
&ir::ArgumentPurpose::VMContext => {
|
||||
// This is SpiderMonkey's `WasmTlsReg`.
|
||||
Some(ABIArg::Reg {
|
||||
regs: ValueRegs::one(xreg(BALDRDASH_TLS_REG).to_real_reg()),
|
||||
ty: ir::types::I64,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::reg(
|
||||
xreg(BALDRDASH_TLS_REG).to_real_reg(),
|
||||
ir::types::I64,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::SignatureId => {
|
||||
// This is SpiderMonkey's `WasmTableCallSigReg`.
|
||||
Some(ABIArg::Reg {
|
||||
regs: ValueRegs::one(xreg(BALDRDASH_SIG_REG).to_real_reg()),
|
||||
ty: ir::types::I64,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::reg(
|
||||
xreg(BALDRDASH_SIG_REG).to_real_reg(),
|
||||
ir::types::I64,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::CalleeTLS => {
|
||||
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
|
||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||
Some(ABIArg::Stack {
|
||||
offset: BALDRDASH_CALLEE_TLS_OFFSET,
|
||||
ty: ir::types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::stack(
|
||||
BALDRDASH_CALLEE_TLS_OFFSET,
|
||||
ir::types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::CallerTLS => {
|
||||
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
|
||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||
Some(ABIArg::Stack {
|
||||
offset: BALDRDASH_CALLER_TLS_OFFSET,
|
||||
ty: ir::types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::stack(
|
||||
BALDRDASH_CALLER_TLS_OFFSET,
|
||||
ir::types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
@@ -161,6 +161,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
|
||||
fn compute_arg_locs(
|
||||
call_conv: isa::CallConv,
|
||||
_flags: &settings::Flags,
|
||||
params: &[ir::AbiParam],
|
||||
args_or_rets: ArgsOrRets,
|
||||
add_ret_area_ptr: bool,
|
||||
@@ -253,12 +254,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
RegClass::V128 => vreg(*next_reg),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ret.push(ABIArg::Reg {
|
||||
regs: ValueRegs::one(reg.to_real_reg()),
|
||||
ty: param.value_type,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
ret.push(ABIArg::reg(
|
||||
reg.to_real_reg(),
|
||||
param.value_type,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
));
|
||||
*next_reg += 1;
|
||||
remaining_reg_vals -= 1;
|
||||
} else {
|
||||
@@ -268,13 +269,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
let size = std::cmp::max(size, 8);
|
||||
// Align.
|
||||
debug_assert!(size.is_power_of_two());
|
||||
next_stack = (next_stack + size - 1) & !(size - 1);
|
||||
ret.push(ABIArg::Stack {
|
||||
offset: next_stack as i64,
|
||||
ty: param.value_type,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
next_stack = align_to(next_stack, size);
|
||||
ret.push(ABIArg::stack(
|
||||
next_stack as i64,
|
||||
param.value_type,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
));
|
||||
next_stack += size;
|
||||
}
|
||||
}
|
||||
@@ -286,19 +287,19 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
let extra_arg = if add_ret_area_ptr {
|
||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||
if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
|
||||
ret.push(ABIArg::Reg {
|
||||
regs: ValueRegs::one(xreg(next_xreg).to_real_reg()),
|
||||
ty: I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: ir::ArgumentPurpose::Normal,
|
||||
});
|
||||
ret.push(ABIArg::reg(
|
||||
xreg(next_xreg).to_real_reg(),
|
||||
I64,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
} else {
|
||||
ret.push(ABIArg::Stack {
|
||||
offset: next_stack as i64,
|
||||
ty: I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: ir::ArgumentPurpose::Normal,
|
||||
});
|
||||
ret.push(ABIArg::stack(
|
||||
next_stack as i64,
|
||||
I64,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
next_stack += 8;
|
||||
}
|
||||
Some(ret.len() - 1)
|
||||
@@ -306,7 +307,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
None
|
||||
};
|
||||
|
||||
next_stack = (next_stack + 15) & !15;
|
||||
next_stack = align_to(next_stack, 16);
|
||||
|
||||
// To avoid overflow issues, limit the arg/return size to something
|
||||
// reasonable -- here, 128 MB.
|
||||
|
||||
@@ -51,6 +51,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
||||
|
||||
fn compute_arg_locs(
|
||||
_call_conv: isa::CallConv,
|
||||
_flags: &settings::Flags,
|
||||
params: &[ir::AbiParam],
|
||||
args_or_rets: ArgsOrRets,
|
||||
add_ret_area_ptr: bool,
|
||||
@@ -81,12 +82,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
||||
if next_rreg < max_reg_val {
|
||||
let reg = rreg(next_rreg);
|
||||
|
||||
ret.push(ABIArg::Reg {
|
||||
regs: ValueRegs::one(reg.to_real_reg()),
|
||||
ty: param.value_type,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
ret.push(ABIArg::reg(
|
||||
reg.to_real_reg(),
|
||||
param.value_type,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
));
|
||||
next_rreg += 1;
|
||||
} else {
|
||||
// Arguments are stored on stack in reversed order.
|
||||
@@ -101,12 +102,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
||||
let extra_arg = if add_ret_area_ptr {
|
||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||
if next_rreg < max_reg_val {
|
||||
ret.push(ABIArg::Reg {
|
||||
regs: ValueRegs::one(rreg(next_rreg).to_real_reg()),
|
||||
ty: I32,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: ir::ArgumentPurpose::Normal,
|
||||
});
|
||||
ret.push(ABIArg::reg(
|
||||
rreg(next_rreg).to_real_reg(),
|
||||
I32,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
} else {
|
||||
stack_args.push((
|
||||
I32,
|
||||
@@ -124,12 +125,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
||||
let max_stack = next_stack;
|
||||
for (ty, ext, purpose) in stack_args.into_iter().rev() {
|
||||
next_stack -= 4;
|
||||
ret.push(ABIArg::Stack {
|
||||
offset: (max_stack - next_stack) as i64,
|
||||
ret.push(ABIArg::stack(
|
||||
(max_stack - next_stack) as i64,
|
||||
ty,
|
||||
extension: ext,
|
||||
ext,
|
||||
purpose,
|
||||
});
|
||||
));
|
||||
}
|
||||
assert_eq!(next_stack, 0);
|
||||
|
||||
|
||||
@@ -31,41 +31,41 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
|
||||
match ¶m.purpose {
|
||||
&ir::ArgumentPurpose::VMContext => {
|
||||
// This is SpiderMonkey's `WasmTlsReg`.
|
||||
Some(ABIArg::Reg {
|
||||
regs: ValueRegs::one(regs::r14().to_real_reg()),
|
||||
ty: types::I64,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::reg(
|
||||
regs::r14().to_real_reg(),
|
||||
types::I64,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::SignatureId => {
|
||||
// This is SpiderMonkey's `WasmTableCallSigReg`.
|
||||
Some(ABIArg::Reg {
|
||||
regs: ValueRegs::one(regs::r10().to_real_reg()),
|
||||
ty: types::I64,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::reg(
|
||||
regs::r10().to_real_reg(),
|
||||
types::I64,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::CalleeTLS => {
|
||||
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
|
||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||
Some(ABIArg::Stack {
|
||||
offset: BALDRDASH_CALLEE_TLS_OFFSET,
|
||||
ty: ir::types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::stack(
|
||||
BALDRDASH_CALLEE_TLS_OFFSET,
|
||||
ir::types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::CallerTLS => {
|
||||
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
|
||||
assert!(call_conv == isa::CallConv::Baldrdash2020);
|
||||
Some(ABIArg::Stack {
|
||||
offset: BALDRDASH_CALLER_TLS_OFFSET,
|
||||
ty: ir::types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: param.purpose,
|
||||
})
|
||||
Some(ABIArg::stack(
|
||||
BALDRDASH_CALLER_TLS_OFFSET,
|
||||
ir::types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
param.purpose,
|
||||
))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
@@ -97,18 +97,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
|
||||
fn compute_arg_locs(
|
||||
call_conv: isa::CallConv,
|
||||
flags: &settings::Flags,
|
||||
params: &[ir::AbiParam],
|
||||
args_or_rets: ArgsOrRets,
|
||||
add_ret_area_ptr: bool,
|
||||
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
||||
let is_baldrdash = call_conv.extends_baldrdash();
|
||||
let is_fastcall = call_conv.extends_windows_fastcall();
|
||||
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
|
||||
|
||||
let mut next_gpr = 0;
|
||||
let mut next_vreg = 0;
|
||||
let mut next_stack: u64 = 0;
|
||||
let mut next_param_idx = 0; // Fastcall cares about overall param index
|
||||
let mut ret = vec![];
|
||||
|
||||
if args_or_rets == ArgsOrRets::Args && is_fastcall {
|
||||
// Fastcall always reserves 32 bytes of shadow space corresponding to
|
||||
// the four initial in-arg parameters.
|
||||
//
|
||||
// (See:
|
||||
// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160)
|
||||
next_stack = 32;
|
||||
}
|
||||
|
||||
if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
|
||||
// Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
|
||||
// caller TLS-register values, respectively.
|
||||
@@ -159,72 +171,92 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
}
|
||||
|
||||
// Find regclass(es) of the register(s) used to store a value of this type.
|
||||
let (rcs, _) = Inst::rc_for_type(param.value_type)?;
|
||||
let intreg = rcs[0] == RegClass::I64;
|
||||
let num_regs = rcs.len();
|
||||
assert!(num_regs <= 2);
|
||||
if num_regs == 2 {
|
||||
assert_eq!(rcs[0], rcs[1]);
|
||||
let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
|
||||
|
||||
// Now assign ABIArgSlots for each register-sized part.
|
||||
//
|
||||
// Note that the handling of `i128` values is unique here:
|
||||
//
|
||||
// - If `enable_llvm_abi_extensions` is set in the flags, each
|
||||
// `i128` is split into two `i64`s and assigned exactly as if it
|
||||
// were two consecutive 64-bit args. This is consistent with LLVM's
|
||||
// behavior, and is needed for some uses of Cranelift (e.g., the
|
||||
// rustc backend).
|
||||
//
|
||||
// - Otherwise, both SysV and Fastcall specify behavior (use of
|
||||
// vector register, a register pair, or passing by reference
|
||||
// depending on the case), but for simplicity, we will just panic if
|
||||
// an i128 type appears in a signature and the LLVM extensions flag
|
||||
// is not set.
|
||||
//
|
||||
// For examples of how rustc compiles i128 args and return values on
|
||||
// both SysV and Fastcall platforms, see:
|
||||
// https://godbolt.org/z/PhG3ob
|
||||
|
||||
if param.value_type.bits() > 64
|
||||
&& !param.value_type.is_vector()
|
||||
&& !flags.enable_llvm_abi_extensions()
|
||||
{
|
||||
panic!(
|
||||
"i128 args/return values not supported unless LLVM ABI extensions are enabled"
|
||||
);
|
||||
}
|
||||
|
||||
let mut regs: SmallVec<[RealReg; 2]> = smallvec![];
|
||||
for j in 0..num_regs {
|
||||
let mut slots = vec![];
|
||||
for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
|
||||
let intreg = *rc == RegClass::I64;
|
||||
let nextreg = if intreg {
|
||||
match args_or_rets {
|
||||
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr + j),
|
||||
ArgsOrRets::Args => {
|
||||
get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
|
||||
}
|
||||
ArgsOrRets::Rets => {
|
||||
get_intreg_for_retval_systemv(&call_conv, next_gpr + j, i + j)
|
||||
get_intreg_for_retval(&call_conv, next_gpr, next_param_idx)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
match args_or_rets {
|
||||
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg + j),
|
||||
ArgsOrRets::Args => {
|
||||
get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
|
||||
}
|
||||
ArgsOrRets::Rets => {
|
||||
get_fltreg_for_retval_systemv(&call_conv, next_vreg + j, i + j)
|
||||
get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx)
|
||||
}
|
||||
}
|
||||
};
|
||||
next_param_idx += 1;
|
||||
if let Some(reg) = nextreg {
|
||||
regs.push(reg.to_real_reg());
|
||||
if intreg {
|
||||
next_gpr += 1;
|
||||
} else {
|
||||
next_vreg += 1;
|
||||
}
|
||||
slots.push(ABIArgSlot::Reg {
|
||||
reg: reg.to_real_reg(),
|
||||
ty: *reg_ty,
|
||||
extension: param.extension,
|
||||
});
|
||||
} else {
|
||||
regs.clear();
|
||||
break;
|
||||
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
|
||||
// stack alignment happens separately after all args.)
|
||||
let size = (reg_ty.bits() / 8) as u64;
|
||||
let size = std::cmp::max(size, 8);
|
||||
// Align.
|
||||
debug_assert!(size.is_power_of_two());
|
||||
next_stack = align_to(next_stack, size);
|
||||
slots.push(ABIArgSlot::Stack {
|
||||
offset: next_stack as i64,
|
||||
ty: *reg_ty,
|
||||
extension: param.extension,
|
||||
});
|
||||
next_stack += size;
|
||||
}
|
||||
}
|
||||
|
||||
if regs.len() > 0 {
|
||||
let regs = match num_regs {
|
||||
1 => ValueRegs::one(regs[0]),
|
||||
2 => ValueRegs::two(regs[0], regs[1]),
|
||||
_ => panic!("More than two registers unexpected"),
|
||||
};
|
||||
ret.push(ABIArg::Reg {
|
||||
regs,
|
||||
ty: param.value_type,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
if intreg {
|
||||
next_gpr += num_regs;
|
||||
} else {
|
||||
next_vreg += num_regs;
|
||||
}
|
||||
} else {
|
||||
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
|
||||
// stack alignment happens separately after all args.)
|
||||
let size = (param.value_type.bits() / 8) as u64;
|
||||
let size = std::cmp::max(size, 8);
|
||||
// Align.
|
||||
debug_assert!(size.is_power_of_two());
|
||||
next_stack = (next_stack + size - 1) & !(size - 1);
|
||||
ret.push(ABIArg::Stack {
|
||||
offset: next_stack as i64,
|
||||
ty: param.value_type,
|
||||
extension: param.extension,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
next_stack += size;
|
||||
}
|
||||
ret.push(ABIArg::Slots {
|
||||
slots,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
}
|
||||
|
||||
if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
|
||||
@@ -233,20 +265,20 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
|
||||
let extra_arg = if add_ret_area_ptr {
|
||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||
if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
|
||||
ret.push(ABIArg::Reg {
|
||||
regs: ValueRegs::one(reg.to_real_reg()),
|
||||
ty: types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: ir::ArgumentPurpose::Normal,
|
||||
});
|
||||
if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
|
||||
ret.push(ABIArg::reg(
|
||||
reg.to_real_reg(),
|
||||
types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
} else {
|
||||
ret.push(ABIArg::Stack {
|
||||
offset: next_stack as i64,
|
||||
ty: types::I64,
|
||||
extension: ir::ArgumentExtension::None,
|
||||
purpose: ir::ArgumentPurpose::Normal,
|
||||
});
|
||||
ret.push(ABIArg::stack(
|
||||
next_stack as i64,
|
||||
types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
next_stack += 8;
|
||||
}
|
||||
Some(ret.len() - 1)
|
||||
@@ -254,7 +286,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
None
|
||||
};
|
||||
|
||||
next_stack = (next_stack + 15) & !15;
|
||||
next_stack = align_to(next_stack, 16);
|
||||
|
||||
// To avoid overflow issues, limit the arg/return size to something reasonable.
|
||||
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
|
||||
@@ -452,10 +484,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
// registers (all XMM registers are caller-save) so we can compute the
|
||||
// total size of the needed stack space easily.
|
||||
let clobbered = get_callee_saves(&call_conv, clobbers);
|
||||
let clobbered_size = 8 * clobbered.len() as u32;
|
||||
let stack_size = clobbered_size + fixed_frame_storage_size;
|
||||
let stack_size = compute_clobber_size(&clobbered) + fixed_frame_storage_size;
|
||||
// Align to 16 bytes.
|
||||
let stack_size = (stack_size + 15) & !15;
|
||||
let stack_size = align_to(stack_size, 16);
|
||||
let clobbered_size = stack_size - fixed_frame_storage_size;
|
||||
// Adjust the stack pointer downward with one `sub rsp, IMM`
|
||||
// instruction.
|
||||
@@ -473,16 +504,23 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
let r_reg = reg.to_reg();
|
||||
match r_reg.get_class() {
|
||||
RegClass::I64 => {
|
||||
insts.push(Inst::mov_r_m(
|
||||
OperandSize::Size64,
|
||||
insts.push(Inst::store(
|
||||
types::I64,
|
||||
r_reg.to_reg(),
|
||||
Amode::imm_reg(cur_offset, regs::rsp()),
|
||||
));
|
||||
cur_offset += 8;
|
||||
}
|
||||
// No XMM regs are callee-save, so we do not need to implement
|
||||
// this.
|
||||
_ => unimplemented!(),
|
||||
RegClass::V128 => {
|
||||
cur_offset = align_to(cur_offset, 16);
|
||||
insts.push(Inst::store(
|
||||
types::I8X16,
|
||||
r_reg.to_reg(),
|
||||
Amode::imm_reg(cur_offset, regs::rsp()),
|
||||
));
|
||||
cur_offset += 16;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -499,8 +537,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
let mut insts = SmallVec::new();
|
||||
|
||||
let clobbered = get_callee_saves(&call_conv, clobbers);
|
||||
let stack_size = 8 * clobbered.len() as u32;
|
||||
let stack_size = (stack_size + 15) & !15;
|
||||
let stack_size = compute_clobber_size(&clobbered);
|
||||
let stack_size = align_to(stack_size, 16);
|
||||
|
||||
// Restore regs by loading from offsets of RSP.
|
||||
let mut cur_offset = 0;
|
||||
@@ -514,7 +552,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
));
|
||||
cur_offset += 8;
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
RegClass::V128 => {
|
||||
cur_offset = align_to(cur_offset, 16);
|
||||
insts.push(Inst::load(
|
||||
types::I8X16,
|
||||
Amode::imm_reg(cur_offset, regs::rsp()),
|
||||
Writable::from_reg(rreg.to_reg()),
|
||||
ExtKind::None,
|
||||
));
|
||||
cur_offset += 16;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
// Adjust RSP back upward.
|
||||
@@ -592,14 +640,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
// Baldrdash should not use struct args.
|
||||
assert!(!call_conv.extends_baldrdash());
|
||||
let mut insts = SmallVec::new();
|
||||
let arg0 = get_intreg_for_arg_systemv(&call_conv, 0).unwrap();
|
||||
let arg1 = get_intreg_for_arg_systemv(&call_conv, 1).unwrap();
|
||||
let arg2 = get_intreg_for_arg_systemv(&call_conv, 2).unwrap();
|
||||
let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap();
|
||||
let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap();
|
||||
let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap();
|
||||
// We need a register to load the address of `memcpy()` below and we
|
||||
// don't have a lowering context to allocate a temp here; so just use a
|
||||
// register we know we are free to mutate as part of this sequence
|
||||
// (because it is clobbered by the call as per the ABI anyway).
|
||||
let memcpy_addr = get_intreg_for_arg_systemv(&call_conv, 3).unwrap();
|
||||
let memcpy_addr = get_intreg_for_arg(&call_conv, 3, 3).unwrap();
|
||||
insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64));
|
||||
insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64));
|
||||
insts.extend(
|
||||
@@ -648,10 +696,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
|
||||
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
|
||||
let mut caller_saved = vec![
|
||||
// Systemv calling convention:
|
||||
// - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved).
|
||||
Writable::from_reg(regs::rsi()),
|
||||
Writable::from_reg(regs::rdi()),
|
||||
// intersection of Systemv and FastCall calling conventions:
|
||||
// - GPR: all except RDI, RSI, RBX, RBP, R12 to R15.
|
||||
// SysV adds RDI, RSI (FastCall makes these callee-saved).
|
||||
Writable::from_reg(regs::rax()),
|
||||
Writable::from_reg(regs::rcx()),
|
||||
Writable::from_reg(regs::rdx()),
|
||||
@@ -659,25 +706,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
Writable::from_reg(regs::r9()),
|
||||
Writable::from_reg(regs::r10()),
|
||||
Writable::from_reg(regs::r11()),
|
||||
// - XMM: all the registers!
|
||||
// - XMM: XMM0-5. SysV adds the rest (XMM6-XMM15).
|
||||
Writable::from_reg(regs::xmm0()),
|
||||
Writable::from_reg(regs::xmm1()),
|
||||
Writable::from_reg(regs::xmm2()),
|
||||
Writable::from_reg(regs::xmm3()),
|
||||
Writable::from_reg(regs::xmm4()),
|
||||
Writable::from_reg(regs::xmm5()),
|
||||
Writable::from_reg(regs::xmm6()),
|
||||
Writable::from_reg(regs::xmm7()),
|
||||
Writable::from_reg(regs::xmm8()),
|
||||
Writable::from_reg(regs::xmm9()),
|
||||
Writable::from_reg(regs::xmm10()),
|
||||
Writable::from_reg(regs::xmm11()),
|
||||
Writable::from_reg(regs::xmm12()),
|
||||
Writable::from_reg(regs::xmm13()),
|
||||
Writable::from_reg(regs::xmm14()),
|
||||
Writable::from_reg(regs::xmm15()),
|
||||
];
|
||||
|
||||
if !call_conv_of_callee.extends_windows_fastcall() {
|
||||
caller_saved.push(Writable::from_reg(regs::rsi()));
|
||||
caller_saved.push(Writable::from_reg(regs::rdi()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm6()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm7()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm8()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm9()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm10()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm11()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm12()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm13()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm14()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm15()));
|
||||
}
|
||||
|
||||
if call_conv_of_callee.extends_baldrdash() {
|
||||
caller_saved.push(Writable::from_reg(regs::r12()));
|
||||
caller_saved.push(Writable::from_reg(regs::r13()));
|
||||
@@ -739,49 +791,67 @@ impl From<StackAMode> for SyntheticAmode {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
|
||||
let is_fastcall = match call_conv {
|
||||
CallConv::Fast
|
||||
| CallConv::Cold
|
||||
| CallConv::SystemV
|
||||
| CallConv::BaldrdashSystemV
|
||||
| CallConv::Baldrdash2020 => {}
|
||||
_ => panic!("int args only supported for SysV calling convention"),
|
||||
| CallConv::Baldrdash2020 => false,
|
||||
CallConv::WindowsFastcall => true,
|
||||
_ => panic!("int args only supported for SysV or Fastcall calling convention"),
|
||||
};
|
||||
match idx {
|
||||
0 => Some(regs::rdi()),
|
||||
1 => Some(regs::rsi()),
|
||||
2 => Some(regs::rdx()),
|
||||
3 => Some(regs::rcx()),
|
||||
4 => Some(regs::r8()),
|
||||
5 => Some(regs::r9()),
|
||||
|
||||
// Fastcall counts by absolute argument number; SysV counts by argument of
|
||||
// this (integer) class.
|
||||
let i = if is_fastcall { arg_idx } else { idx };
|
||||
match (i, is_fastcall) {
|
||||
(0, false) => Some(regs::rdi()),
|
||||
(1, false) => Some(regs::rsi()),
|
||||
(2, false) => Some(regs::rdx()),
|
||||
(3, false) => Some(regs::rcx()),
|
||||
(4, false) => Some(regs::r8()),
|
||||
(5, false) => Some(regs::r9()),
|
||||
(0, true) => Some(regs::rcx()),
|
||||
(1, true) => Some(regs::rdx()),
|
||||
(2, true) => Some(regs::r8()),
|
||||
(3, true) => Some(regs::r9()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
|
||||
let is_fastcall = match call_conv {
|
||||
CallConv::Fast
|
||||
| CallConv::Cold
|
||||
| CallConv::SystemV
|
||||
| CallConv::BaldrdashSystemV
|
||||
| CallConv::Baldrdash2020 => {}
|
||||
_ => panic!("float args only supported for SysV calling convention"),
|
||||
| CallConv::Baldrdash2020 => false,
|
||||
CallConv::WindowsFastcall => true,
|
||||
_ => panic!("float args only supported for SysV or Fastcall calling convention"),
|
||||
};
|
||||
match idx {
|
||||
0 => Some(regs::xmm0()),
|
||||
1 => Some(regs::xmm1()),
|
||||
2 => Some(regs::xmm2()),
|
||||
3 => Some(regs::xmm3()),
|
||||
4 => Some(regs::xmm4()),
|
||||
5 => Some(regs::xmm5()),
|
||||
6 => Some(regs::xmm6()),
|
||||
7 => Some(regs::xmm7()),
|
||||
|
||||
// Fastcall counts by absolute argument number; SysV counts by argument of
|
||||
// this (floating-point) class.
|
||||
let i = if is_fastcall { arg_idx } else { idx };
|
||||
match (i, is_fastcall) {
|
||||
(0, false) => Some(regs::xmm0()),
|
||||
(1, false) => Some(regs::xmm1()),
|
||||
(2, false) => Some(regs::xmm2()),
|
||||
(3, false) => Some(regs::xmm3()),
|
||||
(4, false) => Some(regs::xmm4()),
|
||||
(5, false) => Some(regs::xmm5()),
|
||||
(6, false) => Some(regs::xmm6()),
|
||||
(7, false) => Some(regs::xmm7()),
|
||||
(0, true) => Some(regs::xmm0()),
|
||||
(1, true) => Some(regs::xmm1()),
|
||||
(2, true) => Some(regs::xmm2()),
|
||||
(3, true) => Some(regs::xmm3()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_retval_systemv(
|
||||
fn get_intreg_for_retval(
|
||||
call_conv: &CallConv,
|
||||
intreg_idx: usize,
|
||||
retval_idx: usize,
|
||||
@@ -799,11 +869,16 @@ fn get_intreg_for_retval_systemv(
|
||||
None
|
||||
}
|
||||
}
|
||||
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
CallConv::WindowsFastcall => match intreg_idx {
|
||||
0 => Some(regs::rax()),
|
||||
1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fltreg_for_retval_systemv(
|
||||
fn get_fltreg_for_retval(
|
||||
call_conv: &CallConv,
|
||||
fltreg_idx: usize,
|
||||
retval_idx: usize,
|
||||
@@ -821,7 +896,11 @@ fn get_fltreg_for_retval_systemv(
|
||||
None
|
||||
}
|
||||
}
|
||||
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
CallConv::WindowsFastcall => match fltreg_idx {
|
||||
0 => Some(regs::xmm0()),
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -854,6 +933,21 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_callee_save_fastcall(r: RealReg) -> bool {
|
||||
use regs::*;
|
||||
match r.get_class() {
|
||||
RegClass::I64 => match r.get_hw_encoding() as u8 {
|
||||
ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
|
||||
_ => false,
|
||||
},
|
||||
RegClass::V128 => match r.get_hw_encoding() as u8 {
|
||||
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
|
||||
_ => false,
|
||||
},
|
||||
_ => panic!("Unknown register class: {:?}", r.get_class()),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
|
||||
let mut regs: Vec<Writable<RealReg>> = match call_conv {
|
||||
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
|
||||
@@ -869,7 +963,11 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
|
||||
.cloned()
|
||||
.filter(|r| is_callee_save_systemv(r.to_reg()))
|
||||
.collect(),
|
||||
CallConv::WindowsFastcall => todo!("windows fastcall"),
|
||||
CallConv::WindowsFastcall => regs
|
||||
.iter()
|
||||
.cloned()
|
||||
.filter(|r| is_callee_save_fastcall(r.to_reg()))
|
||||
.collect(),
|
||||
CallConv::Probestack => todo!("probestack?"),
|
||||
};
|
||||
// Sort registers for deterministic code output. We can do an unstable sort because the
|
||||
@@ -877,3 +975,20 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
|
||||
regs.sort_unstable_by_key(|r| r.to_reg().get_index());
|
||||
regs
|
||||
}
|
||||
|
||||
fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
|
||||
let mut clobbered_size = 0;
|
||||
for reg in clobbers {
|
||||
match reg.to_reg().get_class() {
|
||||
RegClass::I64 => {
|
||||
clobbered_size += 8;
|
||||
}
|
||||
RegClass::V128 => {
|
||||
clobbered_size = align_to(clobbered_size, 16);
|
||||
clobbered_size += 16;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
clobbered_size
|
||||
}
|
||||
|
||||
@@ -456,6 +456,7 @@ pub(crate) enum InstructionSet {
|
||||
Popcnt,
|
||||
Lzcnt,
|
||||
BMI1,
|
||||
#[allow(dead_code)] // never constructed (yet).
|
||||
BMI2,
|
||||
}
|
||||
|
||||
|
||||
@@ -23,11 +23,20 @@ use regalloc::{
|
||||
};
|
||||
use std::string::String;
|
||||
|
||||
// Hardware encodings for a few registers.
|
||||
// Hardware encodings (note the special rax, rcx, rdx, rbx order).
|
||||
|
||||
pub const ENC_RAX: u8 = 0;
|
||||
pub const ENC_RCX: u8 = 1;
|
||||
pub const ENC_RDX: u8 = 2;
|
||||
pub const ENC_RBX: u8 = 3;
|
||||
pub const ENC_RSP: u8 = 4;
|
||||
pub const ENC_RBP: u8 = 5;
|
||||
pub const ENC_RSI: u8 = 6;
|
||||
pub const ENC_RDI: u8 = 7;
|
||||
pub const ENC_R8: u8 = 8;
|
||||
pub const ENC_R9: u8 = 9;
|
||||
pub const ENC_R10: u8 = 10;
|
||||
pub const ENC_R11: u8 = 11;
|
||||
pub const ENC_R12: u8 = 12;
|
||||
pub const ENC_R13: u8 = 13;
|
||||
pub const ENC_R14: u8 = 14;
|
||||
@@ -38,31 +47,31 @@ fn gpr(enc: u8, index: u8) -> Reg {
|
||||
}
|
||||
|
||||
pub(crate) fn rsi() -> Reg {
|
||||
gpr(6, 16)
|
||||
gpr(ENC_RSI, 16)
|
||||
}
|
||||
pub(crate) fn rdi() -> Reg {
|
||||
gpr(7, 17)
|
||||
gpr(ENC_RDI, 17)
|
||||
}
|
||||
pub(crate) fn rax() -> Reg {
|
||||
gpr(0, 18)
|
||||
gpr(ENC_RAX, 18)
|
||||
}
|
||||
pub(crate) fn rcx() -> Reg {
|
||||
gpr(1, 19)
|
||||
gpr(ENC_RCX, 19)
|
||||
}
|
||||
pub(crate) fn rdx() -> Reg {
|
||||
gpr(2, 20)
|
||||
gpr(ENC_RDX, 20)
|
||||
}
|
||||
pub(crate) fn r8() -> Reg {
|
||||
gpr(8, 21)
|
||||
gpr(ENC_R8, 21)
|
||||
}
|
||||
pub(crate) fn r9() -> Reg {
|
||||
gpr(9, 22)
|
||||
gpr(ENC_R9, 22)
|
||||
}
|
||||
pub(crate) fn r10() -> Reg {
|
||||
gpr(10, 23)
|
||||
gpr(ENC_R10, 23)
|
||||
}
|
||||
pub(crate) fn r11() -> Reg {
|
||||
gpr(11, 24)
|
||||
gpr(ENC_R11, 24)
|
||||
}
|
||||
pub(crate) fn r12() -> Reg {
|
||||
gpr(ENC_R12, 25)
|
||||
|
||||
Reference in New Issue
Block a user