Merge pull request #2892 from afonso360/aarch64-multireg-args
Handle i128 arguments in the aarch64 ABI
This commit is contained in:
@@ -183,10 +183,11 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
args_or_rets: ArgsOrRets,
|
args_or_rets: ArgsOrRets,
|
||||||
add_ret_area_ptr: bool,
|
add_ret_area_ptr: bool,
|
||||||
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
||||||
|
let is_apple_cc = call_conv == isa::CallConv::AppleAarch64;
|
||||||
let is_baldrdash = call_conv.extends_baldrdash();
|
let is_baldrdash = call_conv.extends_baldrdash();
|
||||||
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
|
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
|
||||||
|
|
||||||
// See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
|
// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
|
||||||
//
|
//
|
||||||
// MacOS aarch64 is slightly different, see also
|
// MacOS aarch64 is slightly different, see also
|
||||||
// https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
|
// https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
|
||||||
@@ -194,8 +195,6 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
// following ways:
|
// following ways:
|
||||||
// - sign- and zero- extensions of data types less than 32 bits are not
|
// - sign- and zero- extensions of data types less than 32 bits are not
|
||||||
// implemented yet.
|
// implemented yet.
|
||||||
// - i128 arguments passing isn't implemented yet in the standard (non
|
|
||||||
// MacOS) aarch64 ABI.
|
|
||||||
// - we align the arguments stack space to a 16-bytes boundary, while
|
// - we align the arguments stack space to a 16-bytes boundary, while
|
||||||
// the MacOS allows aligning only on 8 bytes. In practice it means we're
|
// the MacOS allows aligning only on 8 bytes. In practice it means we're
|
||||||
// slightly overallocating when calling, which is fine, and doesn't
|
// slightly overallocating when calling, which is fine, and doesn't
|
||||||
@@ -265,20 +264,16 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
"Invalid type for AArch64: {:?}",
|
"Invalid type for AArch64: {:?}",
|
||||||
param.value_type
|
param.value_type
|
||||||
);
|
);
|
||||||
let (rcs, _) = Inst::rc_for_type(param.value_type).unwrap();
|
|
||||||
assert!(rcs.len() == 1, "Multi-reg values not supported yet");
|
|
||||||
let rc = rcs[0];
|
|
||||||
|
|
||||||
let next_reg = match rc {
|
let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;
|
||||||
RegClass::I64 => &mut next_xreg,
|
|
||||||
RegClass::V128 => &mut next_vreg,
|
|
||||||
_ => panic!("Invalid register class: {:?}", rc),
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
|
if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
|
||||||
assert!(rc == RegClass::I64);
|
assert!(rcs[0] == RegClass::I64);
|
||||||
ret.push(param);
|
ret.push(param);
|
||||||
} else if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
|
||||||
let offset = next_stack as i64;
|
let offset = next_stack as i64;
|
||||||
let size = size as u64;
|
let size = size as u64;
|
||||||
assert!(size % 8 == 0, "StructArgument size is not properly aligned");
|
assert!(size % 8 == 0, "StructArgument size is not properly aligned");
|
||||||
@@ -288,7 +283,85 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
size,
|
size,
|
||||||
purpose: param.purpose,
|
purpose: param.purpose,
|
||||||
});
|
});
|
||||||
} else if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 {
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle multi register params
|
||||||
|
//
|
||||||
|
// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C).
|
||||||
|
//
|
||||||
|
// For arguments with alignment of 16 we round up the the register number
|
||||||
|
// to the next even value. So we can never allocate for example an i128
|
||||||
|
// to X1 and X2, we have to skip one register and do X2, X3
|
||||||
|
// (Stage C.8)
|
||||||
|
// Note: The Apple ABI deviates a bit here. They don't respect Stage C.8
|
||||||
|
// and will happily allocate a i128 to X1 and X2
|
||||||
|
//
|
||||||
|
// For integer types with alignment of 16 we also have the additional
|
||||||
|
// restriction of passing the lower half in Xn and the upper half in Xn+1
|
||||||
|
// (Stage C.9)
|
||||||
|
//
|
||||||
|
// For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh
|
||||||
|
//
|
||||||
|
// On the Apple ABI it is unspecified if we can spill half the value into the stack
|
||||||
|
// i.e load the lower half into x7 and the upper half into the stack
|
||||||
|
// LLVM does not seem to do this, so we are going to replicate that behaviour
|
||||||
|
let is_multi_reg = rcs.len() >= 2;
|
||||||
|
if is_multi_reg {
|
||||||
|
assert!(
|
||||||
|
rcs.len() == 2,
|
||||||
|
"Unable to handle multi reg params with more than 2 regs"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
rcs == &[RegClass::I64, RegClass::I64],
|
||||||
|
"Unable to handle non i64 regs"
|
||||||
|
);
|
||||||
|
|
||||||
|
let reg_class_space = max_per_class_reg_vals - next_xreg;
|
||||||
|
let reg_space = remaining_reg_vals;
|
||||||
|
|
||||||
|
if reg_space >= 2 && reg_class_space >= 2 {
|
||||||
|
// The aarch64 ABI does not allow us to start a split argument
|
||||||
|
// at an odd numbered register. So we need to skip one register
|
||||||
|
//
|
||||||
|
// TODO: The Fast ABI should probably not skip the register
|
||||||
|
if !is_apple_cc && next_xreg % 2 != 0 {
|
||||||
|
next_xreg += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let lower_reg = xreg(next_xreg);
|
||||||
|
let upper_reg = xreg(next_xreg + 1);
|
||||||
|
|
||||||
|
ret.push(ABIArg::Slots {
|
||||||
|
slots: vec![
|
||||||
|
ABIArgSlot::Reg {
|
||||||
|
reg: lower_reg.to_real_reg(),
|
||||||
|
ty: param.value_type,
|
||||||
|
extension: param.extension,
|
||||||
|
},
|
||||||
|
ABIArgSlot::Reg {
|
||||||
|
reg: upper_reg.to_real_reg(),
|
||||||
|
ty: param.value_type,
|
||||||
|
extension: param.extension,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
purpose: param.purpose,
|
||||||
|
});
|
||||||
|
|
||||||
|
next_xreg += 2;
|
||||||
|
remaining_reg_vals -= 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Single Register parameters
|
||||||
|
let rc = rcs[0];
|
||||||
|
let next_reg = match rc {
|
||||||
|
RegClass::I64 => &mut next_xreg,
|
||||||
|
RegClass::V128 => &mut next_vreg,
|
||||||
|
_ => panic!("Invalid register class: {:?}", rc),
|
||||||
|
};
|
||||||
|
|
||||||
|
if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 {
|
||||||
let reg = match rc {
|
let reg = match rc {
|
||||||
RegClass::I64 => xreg(*next_reg),
|
RegClass::I64 => xreg(*next_reg),
|
||||||
RegClass::V128 => vreg(*next_reg),
|
RegClass::V128 => vreg(*next_reg),
|
||||||
@@ -302,11 +375,16 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
));
|
));
|
||||||
*next_reg += 1;
|
*next_reg += 1;
|
||||||
remaining_reg_vals -= 1;
|
remaining_reg_vals -= 1;
|
||||||
} else {
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spill to the stack
|
||||||
|
|
||||||
// Compute the stack slot's size.
|
// Compute the stack slot's size.
|
||||||
let size = (ty_bits(param.value_type) / 8) as u64;
|
let size = (ty_bits(param.value_type) / 8) as u64;
|
||||||
|
|
||||||
let size = if call_conv == isa::CallConv::AppleAarch64
|
let size = if is_apple_cc
|
||||||
|| (call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets)
|
|| (call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets)
|
||||||
{
|
{
|
||||||
// MacOS aarch64 and Wasmtime allow stack slots with
|
// MacOS aarch64 and Wasmtime allow stack slots with
|
||||||
@@ -324,15 +402,30 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
debug_assert!(size.is_power_of_two());
|
debug_assert!(size.is_power_of_two());
|
||||||
next_stack = align_to(next_stack, size);
|
next_stack = align_to(next_stack, size);
|
||||||
|
|
||||||
ret.push(ABIArg::stack(
|
let slots = reg_types
|
||||||
next_stack as i64,
|
.iter()
|
||||||
param.value_type,
|
.copied()
|
||||||
param.extension,
|
// Build the stack locations from each slot
|
||||||
param.purpose,
|
.scan(next_stack, |next_stack, ty| {
|
||||||
));
|
let slot_offset = *next_stack as i64;
|
||||||
|
*next_stack += (ty_bits(ty) / 8) as u64;
|
||||||
|
|
||||||
|
Some((ty, slot_offset))
|
||||||
|
})
|
||||||
|
.map(|(ty, offset)| ABIArgSlot::Stack {
|
||||||
|
offset,
|
||||||
|
ty,
|
||||||
|
extension: param.extension,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
ret.push(ABIArg::Slots {
|
||||||
|
slots,
|
||||||
|
purpose: param.purpose,
|
||||||
|
});
|
||||||
|
|
||||||
next_stack += size;
|
next_stack += size;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
|
if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
|
||||||
ret.reverse();
|
ret.reverse();
|
||||||
|
|||||||
@@ -158,42 +158,37 @@ impl NarrowValueMode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lower an instruction input to a reg.
|
/// Emits instruction(s) to generate the given constant value into newly-allocated
|
||||||
///
|
/// temporary registers, returning these registers.
|
||||||
/// The given register will be extended appropriately, according to
|
fn generate_constant<C: LowerCtx<I = Inst>>(ctx: &mut C, ty: Type, c: u128) -> ValueRegs<Reg> {
|
||||||
/// `narrow_mode` and the input's type. If extended, the value is
|
let from_bits = ty_bits(ty);
|
||||||
/// always extended to 64 bits, for simplicity.
|
let masked = if from_bits < 128 {
|
||||||
pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
|
c & ((1u128 << from_bits) - 1)
|
||||||
ctx: &mut C,
|
|
||||||
input: InsnInput,
|
|
||||||
narrow_mode: NarrowValueMode,
|
|
||||||
) -> Reg {
|
|
||||||
debug!("put_input_in_reg: input {:?}", input);
|
|
||||||
let ty = ctx.input_ty(input.insn, input.input);
|
|
||||||
let from_bits = ty_bits(ty) as u8;
|
|
||||||
let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
|
|
||||||
let in_reg = if let Some(c) = inputs.constant {
|
|
||||||
// Generate constants fresh at each use to minimize long-range register pressure.
|
|
||||||
let masked = if from_bits < 64 {
|
|
||||||
c & ((1u64 << from_bits) - 1)
|
|
||||||
} else {
|
} else {
|
||||||
c
|
c
|
||||||
};
|
};
|
||||||
let to_reg = ctx.alloc_tmp(ty).only_reg().unwrap();
|
|
||||||
for inst in Inst::gen_constant(ValueRegs::one(to_reg), masked as u128, ty, |ty| {
|
let cst_copy = ctx.alloc_tmp(ty);
|
||||||
|
for inst in Inst::gen_constant(cst_copy, masked, ty, |ty| {
|
||||||
ctx.alloc_tmp(ty).only_reg().unwrap()
|
ctx.alloc_tmp(ty).only_reg().unwrap()
|
||||||
})
|
})
|
||||||
.into_iter()
|
.into_iter()
|
||||||
{
|
{
|
||||||
ctx.emit(inst);
|
ctx.emit(inst);
|
||||||
}
|
}
|
||||||
to_reg.to_reg()
|
non_writable_value_regs(cst_copy)
|
||||||
} else {
|
}
|
||||||
ctx.put_input_in_regs(input.insn, input.input)
|
|
||||||
.only_reg()
|
|
||||||
.unwrap()
|
|
||||||
};
|
|
||||||
|
|
||||||
|
/// Extends a register according to `narrow_mode`.
|
||||||
|
/// If extended, the value is always extended to 64 bits, for simplicity.
|
||||||
|
fn extend_reg<C: LowerCtx<I = Inst>>(
|
||||||
|
ctx: &mut C,
|
||||||
|
ty: Type,
|
||||||
|
in_reg: Reg,
|
||||||
|
is_const: bool,
|
||||||
|
narrow_mode: NarrowValueMode,
|
||||||
|
) -> Reg {
|
||||||
|
let from_bits = ty_bits(ty) as u8;
|
||||||
match (narrow_mode, from_bits) {
|
match (narrow_mode, from_bits) {
|
||||||
(NarrowValueMode::None, _) => in_reg,
|
(NarrowValueMode::None, _) => in_reg,
|
||||||
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
|
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
|
||||||
@@ -221,7 +216,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
|
|||||||
(NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
|
(NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
|
||||||
|
|
||||||
(NarrowValueMode::ZeroExtend64, n) if n < 64 => {
|
(NarrowValueMode::ZeroExtend64, n) if n < 64 => {
|
||||||
if inputs.constant.is_some() {
|
if is_const {
|
||||||
// Constants are zero-extended to full 64-bit width on load already.
|
// Constants are zero-extended to full 64-bit width on load already.
|
||||||
in_reg
|
in_reg
|
||||||
} else {
|
} else {
|
||||||
@@ -257,6 +252,53 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Lowers an instruction input to multiple regs
|
||||||
|
fn lower_input_to_regs<C: LowerCtx<I = Inst>>(
|
||||||
|
ctx: &mut C,
|
||||||
|
input: InsnInput,
|
||||||
|
) -> (ValueRegs<Reg>, Type, bool) {
|
||||||
|
debug!("lower_input_to_regs: input {:?}", input);
|
||||||
|
let ty = ctx.input_ty(input.insn, input.input);
|
||||||
|
let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
|
||||||
|
let is_const = inputs.constant.is_some();
|
||||||
|
|
||||||
|
let in_regs = if let Some(c) = inputs.constant {
|
||||||
|
// Generate constants fresh at each use to minimize long-range register pressure.
|
||||||
|
generate_constant(ctx, ty, c as u128)
|
||||||
|
} else {
|
||||||
|
ctx.put_input_in_regs(input.insn, input.input)
|
||||||
|
};
|
||||||
|
|
||||||
|
(in_regs, ty, is_const)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lower an instruction input to a register
|
||||||
|
///
|
||||||
|
/// The given register will be extended appropriately, according to
|
||||||
|
/// `narrow_mode` and the input's type. If extended, the value is
|
||||||
|
/// always extended to 64 bits, for simplicity.
|
||||||
|
pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
|
||||||
|
ctx: &mut C,
|
||||||
|
input: InsnInput,
|
||||||
|
narrow_mode: NarrowValueMode,
|
||||||
|
) -> Reg {
|
||||||
|
let (in_regs, ty, is_const) = lower_input_to_regs(ctx, input);
|
||||||
|
let reg = in_regs
|
||||||
|
.only_reg()
|
||||||
|
.expect("Multi-register value not expected");
|
||||||
|
|
||||||
|
extend_reg(ctx, ty, reg, is_const, narrow_mode)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lower an instruction input to multiple regs
|
||||||
|
pub(crate) fn put_input_in_regs<C: LowerCtx<I = Inst>>(
|
||||||
|
ctx: &mut C,
|
||||||
|
input: InsnInput,
|
||||||
|
) -> ValueRegs<Reg> {
|
||||||
|
let (in_regs, _, _) = lower_input_to_regs(ctx, input);
|
||||||
|
in_regs
|
||||||
|
}
|
||||||
|
|
||||||
/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
|
/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
|
||||||
///
|
///
|
||||||
/// The `narrow_mode` flag indicates whether the consumer of this value needs
|
/// The `narrow_mode` flag indicates whether the consumer of this value needs
|
||||||
|
|||||||
@@ -1529,10 +1529,21 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
// N.B.: according to the AArch64 ABI, the top bits of a register
|
// N.B.: according to the AArch64 ABI, the top bits of a register
|
||||||
// (above the bits for the value's type) are undefined, so we
|
// (above the bits for the value's type) are undefined, so we
|
||||||
// need not extend the return values.
|
// need not extend the return values.
|
||||||
let reg = put_input_in_reg(ctx, *input, NarrowValueMode::None);
|
let src_regs = put_input_in_regs(ctx, *input);
|
||||||
let retval_reg = ctx.retval(i).only_reg().unwrap();
|
let retval_regs = ctx.retval(i);
|
||||||
|
|
||||||
|
assert_eq!(src_regs.len(), retval_regs.len());
|
||||||
let ty = ctx.input_ty(insn, i);
|
let ty = ctx.input_ty(insn, i);
|
||||||
ctx.emit(Inst::gen_move(retval_reg, reg, ty));
|
let (_, tys) = Inst::rc_for_type(ty)?;
|
||||||
|
|
||||||
|
src_regs
|
||||||
|
.regs()
|
||||||
|
.iter()
|
||||||
|
.zip(retval_regs.regs().iter())
|
||||||
|
.zip(tys.iter())
|
||||||
|
.for_each(|((&src, &dst), &ty)| {
|
||||||
|
ctx.emit(Inst::gen_move(dst, src, ty));
|
||||||
|
});
|
||||||
}
|
}
|
||||||
// N.B.: the Ret itself is generated by the ABI.
|
// N.B.: the Ret itself is generated by the ABI.
|
||||||
}
|
}
|
||||||
@@ -1710,13 +1721,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
assert!(inputs.len() == abi.num_args());
|
assert!(inputs.len() == abi.num_args());
|
||||||
for i in abi.get_copy_to_arg_order() {
|
for i in abi.get_copy_to_arg_order() {
|
||||||
let input = inputs[i];
|
let input = inputs[i];
|
||||||
let arg_reg = put_input_in_reg(ctx, input, NarrowValueMode::None);
|
let arg_regs = put_input_in_regs(ctx, input);
|
||||||
abi.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(arg_reg));
|
abi.emit_copy_regs_to_arg(ctx, i, arg_regs);
|
||||||
}
|
}
|
||||||
abi.emit_call(ctx);
|
abi.emit_call(ctx);
|
||||||
for (i, output) in outputs.iter().enumerate() {
|
for (i, output) in outputs.iter().enumerate() {
|
||||||
let retval_reg = get_output_reg(ctx, *output).only_reg().unwrap();
|
let retval_regs = get_output_reg(ctx, *output);
|
||||||
abi.emit_copy_retval_to_regs(ctx, i, ValueRegs::one(retval_reg));
|
abi.emit_copy_retval_to_regs(ctx, i, retval_regs);
|
||||||
}
|
}
|
||||||
abi.emit_stack_post_adjust(ctx);
|
abi.emit_stack_post_adjust(ctx);
|
||||||
}
|
}
|
||||||
@@ -2263,7 +2274,39 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
panic!("Vector ops not implemented.");
|
panic!("Vector ops not implemented.");
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Isplit | Opcode::Iconcat => panic!("Vector ops not supported."),
|
Opcode::Isplit => {
|
||||||
|
assert_eq!(
|
||||||
|
ctx.input_ty(insn, 0),
|
||||||
|
I128,
|
||||||
|
"Isplit only implemented for i128's"
|
||||||
|
);
|
||||||
|
assert_eq!(ctx.output_ty(insn, 0), I64);
|
||||||
|
assert_eq!(ctx.output_ty(insn, 1), I64);
|
||||||
|
|
||||||
|
let src_regs = put_input_in_regs(ctx, inputs[0]);
|
||||||
|
let dst_lo = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
let dst_hi = get_output_reg(ctx, outputs[1]).only_reg().unwrap();
|
||||||
|
|
||||||
|
ctx.emit(Inst::gen_move(dst_lo, src_regs.regs()[0], I64));
|
||||||
|
ctx.emit(Inst::gen_move(dst_hi, src_regs.regs()[1], I64));
|
||||||
|
}
|
||||||
|
|
||||||
|
Opcode::Iconcat => {
|
||||||
|
assert_eq!(
|
||||||
|
ctx.output_ty(insn, 0),
|
||||||
|
I128,
|
||||||
|
"Iconcat only implemented for i128's"
|
||||||
|
);
|
||||||
|
assert_eq!(ctx.input_ty(insn, 0), I64);
|
||||||
|
assert_eq!(ctx.input_ty(insn, 1), I64);
|
||||||
|
|
||||||
|
let src_lo = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
let src_hi = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
|
||||||
|
ctx.emit(Inst::gen_move(dst.regs()[0], src_lo, I64));
|
||||||
|
ctx.emit(Inst::gen_move(dst.regs()[1], src_hi, I64));
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Imax | Opcode::Umax | Opcode::Umin | Opcode::Imin => {
|
Opcode::Imax | Opcode::Umax | Opcode::Umin | Opcode::Imin => {
|
||||||
let alu_op = match op {
|
let alu_op = match op {
|
||||||
|
|||||||
@@ -250,3 +250,232 @@ block0:
|
|||||||
; nextln: add sp, sp, #32
|
; nextln: add sp, sp, #32
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
; i128 tests
|
||||||
|
function %f11(i128, i64) -> i64 {
|
||||||
|
block0(v0: i128, v1: i64):
|
||||||
|
v2, v3 = isplit v0
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x0, x1
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
function %f11_call(i64) -> i64 {
|
||||||
|
fn0 = %f11(i128, i64) -> i64
|
||||||
|
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iconst.i64 42
|
||||||
|
v2 = iconcat v1, v0
|
||||||
|
v3 = call fn0(v2, v1)
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x1, x0
|
||||||
|
; nextln: movz x0, #42
|
||||||
|
; nextln: movz x2, #42
|
||||||
|
; nextln: ldr x3, 8 ; b 12 ; data
|
||||||
|
; nextln: blr x3
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
; The AArch64 ABI requires that the i128 argument be aligned
|
||||||
|
; and to be passed in x2 and x3
|
||||||
|
function %f12(i64, i128) -> i64 {
|
||||||
|
block0(v0: i64, v1: i128):
|
||||||
|
v2, v3 = isplit v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x0, x2
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
function %f12_call(i64) -> i64 {
|
||||||
|
fn0 = %f12(i64, i128) -> i64
|
||||||
|
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iconst.i64 42
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
v3 = call fn0(v1, v2)
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: movz x3, #42
|
||||||
|
; nextln: mov x2, x0
|
||||||
|
; nextln: movz x0, #42
|
||||||
|
; nextln: ldr x1, 8 ; b 12 ; data
|
||||||
|
; nextln: blr x1
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
; The Apple AArch64 ABI allows the i128 argument to not be aligned
|
||||||
|
; and to be passed in x1 and x2
|
||||||
|
function %f13(i64, i128) -> i64 apple_aarch64 {
|
||||||
|
block0(v0: i64, v1: i128):
|
||||||
|
v2, v3 = isplit v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x0, x1
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
function %f13_call(i64) -> i64 apple_aarch64 {
|
||||||
|
fn0 = %f13(i64, i128) -> i64 apple_aarch64
|
||||||
|
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iconst.i64 42
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
v3 = call fn0(v1, v2)
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: movz x2, #42
|
||||||
|
; nextln: mov x1, x0
|
||||||
|
; nextln: movz x0, #42
|
||||||
|
; nextln: ldr x3, 8 ; b 12 ; data
|
||||||
|
; nextln: blr x3
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
; We only have 8 registers to pass data in
|
||||||
|
; make sure we spill the last argument even though there is one slot available
|
||||||
|
function %f14(i128, i128, i128, i64, i128) -> i128 {
|
||||||
|
block0(v0: i128, v1: i128, v2: i128, v3: i64, v4: i128):
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: ldur x0, [fp, #16]
|
||||||
|
; nextln: ldur x1, [fp, #24]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f14_call(i128, i64) -> i128 {
|
||||||
|
fn0 = %f14(i128, i128, i128, i64, i128) -> i128
|
||||||
|
|
||||||
|
block0(v0: i128, v1: i64):
|
||||||
|
v2 = call fn0(v0, v0, v0, v1, v0)
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
|
||||||
|
; TODO: Some codegen optimization possible here with x0,x1 moving to x7,x8 and then moving back
|
||||||
|
; nextln: mov x7, x0
|
||||||
|
; nextln: mov x8, x1
|
||||||
|
; nextln: mov x6, x2
|
||||||
|
; nextln: sub sp, sp, #16
|
||||||
|
; nextln: virtual_sp_offset_adjust 16
|
||||||
|
; nextln: mov x0, x7
|
||||||
|
; nextln: mov x1, x8
|
||||||
|
; nextln: mov x2, x7
|
||||||
|
; nextln: mov x3, x8
|
||||||
|
; nextln: mov x4, x7
|
||||||
|
; nextln: mov x5, x8
|
||||||
|
; nextln: stur x7, [sp]
|
||||||
|
; nextln: stur x8, [sp, #8]
|
||||||
|
|
||||||
|
; nextln: ldr x7, 8 ; b 12 ; data
|
||||||
|
; nextln: blr x7
|
||||||
|
; nextln: add sp, sp, #16
|
||||||
|
; nextln: virtual_sp_offset_adjust -16
|
||||||
|
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
; We have one register slot available (Similar to %f14), however apple
|
||||||
|
; allows us to start i128 on non even numbered registers (x7 in this case).
|
||||||
|
;
|
||||||
|
; It is unspecified if we can split the i128 into x7 + the stack.
|
||||||
|
; In practice LLVM does not do this, so we are going to go with that.
|
||||||
|
function %f15(i128, i128, i128, i64, i128) -> i128 apple_aarch64{
|
||||||
|
block0(v0: i128, v1: i128, v2: i128, v3: i64, v4: i128):
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: ldur x0, [fp, #16]
|
||||||
|
; nextln: ldur x1, [fp, #24]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f15_call(i128, i64) -> i128 apple_aarch64 {
|
||||||
|
fn0 = %f15(i128, i128, i128, i64, i128) -> i128 apple_aarch64
|
||||||
|
|
||||||
|
block0(v0: i128, v1: i64):
|
||||||
|
v2 = call fn0(v0, v0, v0, v1, v0)
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
|
||||||
|
; nextln: mov x7, x0
|
||||||
|
; nextln: mov x8, x1
|
||||||
|
; nextln: mov x6, x2
|
||||||
|
; nextln: sub sp, sp, #16
|
||||||
|
; nextln: virtual_sp_offset_adjust 16
|
||||||
|
; nextln: mov x0, x7
|
||||||
|
; nextln: mov x1, x8
|
||||||
|
; nextln: mov x2, x7
|
||||||
|
; nextln: mov x3, x8
|
||||||
|
; nextln: mov x4, x7
|
||||||
|
; nextln: mov x5, x8
|
||||||
|
; nextln: stur x7, [sp]
|
||||||
|
; nextln: stur x8, [sp, #8]
|
||||||
|
|
||||||
|
; nextln: ldr x7, 8 ; b 12 ; data
|
||||||
|
; nextln: blr x7
|
||||||
|
; nextln: add sp, sp, #16
|
||||||
|
; nextln: virtual_sp_offset_adjust -16
|
||||||
|
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
function %f16() -> i32, i32 wasmtime_system_v {
|
||||||
|
block0:
|
||||||
|
v0 = iconst.i32 0
|
||||||
|
v1 = iconst.i32 1
|
||||||
|
return v0, v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x1, x0
|
||||||
|
; nextln: movz x0, #0
|
||||||
|
; nextln: movz x2, #1
|
||||||
|
; nextln: stur w2, [x1]
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user