aarch64: Add i128 load & store tests and refactor address calculation
The previous address calculation code had a bug where we tried to add offsets into a temporary register before defining it, causing the regalloc to complain.
This commit is contained in:
@@ -701,7 +701,7 @@ pub(crate) fn lower_pair_address<C: LowerCtx<I = Inst>>(
|
|||||||
// Collect addends through an arbitrary tree of 32-to-64-bit sign/zero
|
// Collect addends through an arbitrary tree of 32-to-64-bit sign/zero
|
||||||
// extends and addition ops. We update these as we consume address
|
// extends and addition ops. We update these as we consume address
|
||||||
// components, so they represent the remaining addends not yet handled.
|
// components, so they represent the remaining addends not yet handled.
|
||||||
let (addends64, addends32, args_offset) = collect_address_addends(ctx, roots);
|
let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, roots);
|
||||||
let offset = args_offset + (offset as i64);
|
let offset = args_offset + (offset as i64);
|
||||||
|
|
||||||
trace!(
|
trace!(
|
||||||
@@ -713,41 +713,40 @@ pub(crate) fn lower_pair_address<C: LowerCtx<I = Inst>>(
|
|||||||
|
|
||||||
// Pairs basically only have reg + imm formats so we only have to worry about those
|
// Pairs basically only have reg + imm formats so we only have to worry about those
|
||||||
|
|
||||||
let imm7_offset = SImm7Scaled::maybe_from_i64(offset, I64);
|
let base_reg = if let Some(reg64) = addends64.pop() {
|
||||||
match (&addends64[..], &addends32[..], imm7_offset) {
|
reg64
|
||||||
(&[add64], &[], Some(offset)) => PairAMode::SignedOffset(add64, offset),
|
} else if let Some((reg32, extendop)) = addends32.pop() {
|
||||||
(&[], &[add32], Some(offset)) => {
|
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
|
||||||
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
|
let signed = match extendop {
|
||||||
let (reg, extendop) = add32;
|
ExtendOp::SXTW => true,
|
||||||
let signed = match extendop {
|
ExtendOp::UXTW => false,
|
||||||
ExtendOp::SXTW => true,
|
_ => unreachable!(),
|
||||||
ExtendOp::UXTW => false,
|
};
|
||||||
_ => unreachable!(),
|
ctx.emit(Inst::Extend {
|
||||||
};
|
rd: tmp,
|
||||||
ctx.emit(Inst::Extend {
|
rn: reg32,
|
||||||
rd: tmp,
|
signed,
|
||||||
rn: reg,
|
from_bits: 32,
|
||||||
signed,
|
to_bits: 64,
|
||||||
from_bits: 32,
|
});
|
||||||
to_bits: 64,
|
tmp.to_reg()
|
||||||
});
|
} else {
|
||||||
PairAMode::SignedOffset(tmp.to_reg(), offset)
|
zero_reg()
|
||||||
}
|
};
|
||||||
(&[], &[], Some(offset)) => PairAMode::SignedOffset(zero_reg(), offset),
|
|
||||||
|
|
||||||
(_, _, _) => {
|
let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
|
||||||
// This is the general case, we just grab all addends and sum them into a register
|
ctx.emit(Inst::gen_move(addr, base_reg, I64));
|
||||||
let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
|
|
||||||
lower_add_addends(ctx, addr, addends64, addends32);
|
|
||||||
|
|
||||||
let imm7 = imm7_offset.unwrap_or_else(|| {
|
// We have the base register, if we have any others, we need to add them
|
||||||
lower_add_immediate(ctx, addr, addr.to_reg(), offset);
|
lower_add_addends(ctx, addr, addends64, addends32);
|
||||||
SImm7Scaled::maybe_from_i64(0, I64).unwrap()
|
|
||||||
});
|
|
||||||
|
|
||||||
PairAMode::SignedOffset(addr.to_reg(), imm7)
|
// Figure out what offset we should emit
|
||||||
}
|
let imm7 = SImm7Scaled::maybe_from_i64(offset, I64).unwrap_or_else(|| {
|
||||||
}
|
lower_add_immediate(ctx, addr, addr.to_reg(), offset);
|
||||||
|
SImm7Scaled::maybe_from_i64(0, I64).unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
PairAMode::SignedOffset(addr.to_reg(), imm7)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lower the address of a load or store.
|
/// Lower the address of a load or store.
|
||||||
|
|||||||
@@ -386,3 +386,130 @@ block0(v0: i64, v1: i64, v2: i64):
|
|||||||
; nextln: ldrsh x0, [x0]
|
; nextln: ldrsh x0, [x0]
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
function %i128(i64) -> i128 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = load.i128 v0
|
||||||
|
store.i128 v1, v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x1, x0
|
||||||
|
; nextln: ldp x2, x1, [x1]
|
||||||
|
; nextln: stp x2, x1, [x0]
|
||||||
|
; nextln: mov x0, x2
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
function %i128_imm_offset(i64) -> i128 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = load.i128 v0+16
|
||||||
|
store.i128 v1, v0+16
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x1, x0
|
||||||
|
; nextln: ldp x2, x1, [x1, #16]
|
||||||
|
; nextln: stp x2, x1, [x0, #16]
|
||||||
|
; nextln: mov x0, x2
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %i128_imm_offset_large(i64) -> i128 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = load.i128 v0+504
|
||||||
|
store.i128 v1, v0+504
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x1, x0
|
||||||
|
; nextln: ldp x2, x1, [x1, #504]
|
||||||
|
; nextln: stp x2, x1, [x0, #504]
|
||||||
|
; nextln: mov x0, x2
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %i128_imm_offset_negative_large(i64) -> i128 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = load.i128 v0-512
|
||||||
|
store.i128 v1, v0-512
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x1, x0
|
||||||
|
; nextln: ldp x2, x1, [x1, #-512]
|
||||||
|
; nextln: stp x2, x1, [x0, #-512]
|
||||||
|
; nextln: mov x0, x2
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
function %i128_add_offset(i64) -> i128 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iadd_imm v0, 32
|
||||||
|
v2 = load.i128 v1
|
||||||
|
store.i128 v2, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x1, x0
|
||||||
|
; nextln: ldp x2, x1, [x1, #32]
|
||||||
|
; nextln: stp x2, x1, [x0, #32]
|
||||||
|
; nextln: mov x0, x2
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
function %i128_32bit_sextend_simple(i32) -> i128 {
|
||||||
|
block0(v0: i32):
|
||||||
|
v1 = sextend.i64 v0
|
||||||
|
v2 = load.i128 v1
|
||||||
|
store.i128 v2, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; TODO: We should be able to deduplicate the sxtw instruction
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: sxtw x1, w0
|
||||||
|
; nextln: ldp x2, x1, [x1]
|
||||||
|
; nextln: sxtw x0, w0
|
||||||
|
; nextln: stp x2, x1, [x0]
|
||||||
|
; nextln: mov x0, x2
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
function %i128_32bit_sextend(i64, i32) -> i128 {
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v2 = sextend.i64 v1
|
||||||
|
v3 = iadd.i64 v0, v2
|
||||||
|
v4 = iadd_imm.i64 v3, 24
|
||||||
|
v5 = load.i128 v4
|
||||||
|
store.i128 v5, v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x2, x0
|
||||||
|
; nextln: add x2, x2, x1, SXTW
|
||||||
|
; nextln: ldp x3, x2, [x2, #24]
|
||||||
|
; nextln: add x0, x0, x1, SXTW
|
||||||
|
; nextln: stp x3, x2, [x0, #24]
|
||||||
|
; nextln: mov x0, x3
|
||||||
|
; nextln: mov x1, x2
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|||||||
@@ -89,3 +89,48 @@ block0(v0: i64,v1: i64):
|
|||||||
; run: %i128_stack_store_load_big_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true
|
; run: %i128_stack_store_load_big_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true
|
||||||
; run: %i128_stack_store_load_big_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true
|
; run: %i128_stack_store_load_big_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true
|
||||||
; run: %i128_stack_store_load_big_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true
|
; run: %i128_stack_store_load_big_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
function %i128_store_load(i64, i64) -> b1 {
|
||||||
|
ss0 = explicit_slot 16
|
||||||
|
|
||||||
|
block0(v0: i64,v1: i64):
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
|
||||||
|
v3 = stack_addr.i64 ss0
|
||||||
|
store.i128 v2, v3
|
||||||
|
v4 = load.i128 v3
|
||||||
|
|
||||||
|
v5 = icmp.i128 eq v2, v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
; run: %i128_store_load(0, 0) == true
|
||||||
|
; run: %i128_store_load(-1, -1) == true
|
||||||
|
; run: %i128_store_load(-1, 0) == true
|
||||||
|
; run: %i128_store_load(0, -1) == true
|
||||||
|
; run: %i128_store_load(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true
|
||||||
|
; run: %i128_store_load(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true
|
||||||
|
; run: %i128_store_load(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true
|
||||||
|
|
||||||
|
|
||||||
|
function %i128_store_load_offset(i64, i64) -> b1 {
|
||||||
|
ss0 = explicit_slot 32
|
||||||
|
|
||||||
|
block0(v0: i64,v1: i64):
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
|
||||||
|
v3 = stack_addr.i64 ss0
|
||||||
|
store.i128 v2, v3+16
|
||||||
|
v4 = load.i128 v3+16
|
||||||
|
|
||||||
|
v5 = icmp.i128 eq v2, v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
; run: %i128_store_load_offset(0, 0) == true
|
||||||
|
; run: %i128_store_load_offset(-1, -1) == true
|
||||||
|
; run: %i128_store_load_offset(-1, 0) == true
|
||||||
|
; run: %i128_store_load_offset(0, -1) == true
|
||||||
|
; run: %i128_store_load_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true
|
||||||
|
; run: %i128_store_load_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true
|
||||||
|
; run: %i128_store_load_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true
|
||||||
|
|||||||
Reference in New Issue
Block a user