Handle spilling i128 arguments into the stack in aarch64
This commit is contained in:
@@ -187,7 +187,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
let is_baldrdash = call_conv.extends_baldrdash();
|
||||
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
|
||||
|
||||
// See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
|
||||
// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
|
||||
//
|
||||
// MacOS aarch64 is slightly different, see also
|
||||
// https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
|
||||
@@ -265,7 +265,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
param.value_type
|
||||
);
|
||||
|
||||
let (rcs, _) = Inst::rc_for_type(param.value_type)?;
|
||||
let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;
|
||||
|
||||
if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
|
||||
assert!(rcs[0] == RegClass::I64);
|
||||
@@ -288,7 +288,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
|
||||
// Handle multi register params
|
||||
//
|
||||
// See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), (Section 5.4 Stage C).
|
||||
// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C).
|
||||
//
|
||||
// For arguments with alignment of 16 we round up the the register number
|
||||
// to the next even value. So we can never allocate for example an i128
|
||||
@@ -301,7 +301,11 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
// restriction of passing the lower half in Xn and the upper half in Xn+1
|
||||
// (Stage C.9)
|
||||
//
|
||||
// For examples of how llvm handles this: https://godbolt.org/z/bhd3vvEfh
|
||||
// For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh
|
||||
//
|
||||
// On the Apple ABI it is unspecified if we can spill half the value into the stack
|
||||
// i.e load the lower half into x7 and the upper half into the stack
|
||||
// LLVM does not seem to do this, so we are going to replicate that behaviour
|
||||
let is_multi_reg = rcs.len() >= 2;
|
||||
if is_multi_reg {
|
||||
assert!(
|
||||
@@ -348,10 +352,8 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
remaining_reg_vals -= 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Single Register parameters
|
||||
if !is_multi_reg {
|
||||
} else {
|
||||
// Single Register parameters
|
||||
let rc = rcs[0];
|
||||
let next_reg = match rc {
|
||||
RegClass::I64 => &mut next_xreg,
|
||||
@@ -400,12 +402,28 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
debug_assert!(size.is_power_of_two());
|
||||
next_stack = align_to(next_stack, size);
|
||||
|
||||
ret.push(ABIArg::stack(
|
||||
next_stack as i64,
|
||||
param.value_type,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
));
|
||||
let slots = reg_types
|
||||
.iter()
|
||||
.copied()
|
||||
// Build the stack locations from each slot
|
||||
.scan(next_stack, |next_stack, ty| {
|
||||
let slot_offset = *next_stack as i64;
|
||||
*next_stack += (ty_bits(ty) / 8) as u64;
|
||||
|
||||
Some((ty, slot_offset))
|
||||
})
|
||||
.map(|(ty, offset)| ABIArgSlot::Stack {
|
||||
offset,
|
||||
ty,
|
||||
extension: param.extension,
|
||||
})
|
||||
.collect();
|
||||
|
||||
ret.push(ABIArg::Slots {
|
||||
slots,
|
||||
purpose: param.purpose,
|
||||
});
|
||||
|
||||
next_stack += size;
|
||||
}
|
||||
|
||||
|
||||
@@ -158,18 +158,18 @@ impl NarrowValueMode {
|
||||
}
|
||||
}
|
||||
|
||||
/// Emits instruction(s) to generate the given 64-bit constant value into a newly-allocated
|
||||
/// temporary register, returning that register.
|
||||
fn generate_constant<C: LowerCtx<I = Inst>>(ctx: &mut C, ty: Type, c: u64) -> ValueRegs<Reg> {
|
||||
/// Emits instruction(s) to generate the given constant value into newly-allocated
|
||||
/// temporary registers, returning these registers.
|
||||
fn generate_constant<C: LowerCtx<I = Inst>>(ctx: &mut C, ty: Type, c: u128) -> ValueRegs<Reg> {
|
||||
let from_bits = ty_bits(ty);
|
||||
let masked = if from_bits < 64 {
|
||||
c & ((1u64 << from_bits) - 1)
|
||||
let masked = if from_bits < 128 {
|
||||
c & ((1u128 << from_bits) - 1)
|
||||
} else {
|
||||
c
|
||||
};
|
||||
|
||||
let cst_copy = ctx.alloc_tmp(ty);
|
||||
for inst in Inst::gen_constant(cst_copy, masked as u128, ty, |ty| {
|
||||
for inst in Inst::gen_constant(cst_copy, masked, ty, |ty| {
|
||||
ctx.alloc_tmp(ty).only_reg().unwrap()
|
||||
})
|
||||
.into_iter()
|
||||
@@ -181,7 +181,7 @@ fn generate_constant<C: LowerCtx<I = Inst>>(ctx: &mut C, ty: Type, c: u64) -> Va
|
||||
|
||||
/// Extends a register according to `narrow_mode`.
|
||||
/// If extended, the value is always extended to 64 bits, for simplicity.
|
||||
fn narrow_reg<C: LowerCtx<I = Inst>>(
|
||||
fn extend_reg<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
ty: Type,
|
||||
in_reg: Reg,
|
||||
@@ -252,6 +252,26 @@ fn narrow_reg<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
}
|
||||
|
||||
/// Lowers an instruction input to multiple regs
|
||||
fn lower_input_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
input: InsnInput,
|
||||
) -> (ValueRegs<Reg>, Type, bool) {
|
||||
debug!("lower_input_to_regs: input {:?}", input);
|
||||
let ty = ctx.input_ty(input.insn, input.input);
|
||||
let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
|
||||
let is_const = inputs.constant.is_some();
|
||||
|
||||
let in_regs = if let Some(c) = inputs.constant {
|
||||
// Generate constants fresh at each use to minimize long-range register pressure.
|
||||
generate_constant(ctx, ty, c as u128)
|
||||
} else {
|
||||
ctx.put_input_in_regs(input.insn, input.input)
|
||||
};
|
||||
|
||||
(in_regs, ty, is_const)
|
||||
}
|
||||
|
||||
/// Lower an instruction input to a register
|
||||
///
|
||||
/// The given register will be extended appropriately, according to
|
||||
@@ -262,17 +282,12 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
|
||||
input: InsnInput,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> Reg {
|
||||
let reg = put_input_in_regs(ctx, input)
|
||||
let (in_regs, ty, is_const) = lower_input_to_regs(ctx, input);
|
||||
let reg = in_regs
|
||||
.only_reg()
|
||||
.expect("Multi-register value not expected");
|
||||
|
||||
let is_const = ctx
|
||||
.get_input_as_source_or_const(input.insn, input.input)
|
||||
.constant
|
||||
.is_some();
|
||||
|
||||
let ty = ctx.input_ty(input.insn, input.input);
|
||||
narrow_reg(ctx, ty, reg, is_const, narrow_mode)
|
||||
extend_reg(ctx, ty, reg, is_const, narrow_mode)
|
||||
}
|
||||
|
||||
/// Lower an instruction input to multiple regs
|
||||
@@ -280,17 +295,7 @@ pub(crate) fn put_input_in_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
input: InsnInput,
|
||||
) -> ValueRegs<Reg> {
|
||||
debug!("put_input_in_reg: input {:?}", input);
|
||||
let ty = ctx.input_ty(input.insn, input.input);
|
||||
let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
|
||||
|
||||
let in_regs = if let Some(c) = inputs.constant {
|
||||
// Generate constants fresh at each use to minimize long-range register pressure.
|
||||
generate_constant(ctx, ty, c)
|
||||
} else {
|
||||
ctx.put_input_in_regs(input.insn, input.input)
|
||||
};
|
||||
|
||||
let (in_regs, _, _) = lower_input_to_regs(ctx, input);
|
||||
in_regs
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user