aarch64: Add i128 load & store tests and refactor address calculation

The previous address calculation code had a bug where we tried to
add offsets into a temporary register before defining it, causing
the regalloc to complain.
This commit is contained in:
Afonso Bordado
2021-06-17 15:50:08 +01:00
parent 1c05e06bd5
commit c82764605f
3 changed files with 204 additions and 33 deletions

View File

@@ -386,3 +386,130 @@ block0(v0: i64, v1: i64, v2: i64):
; nextln: ldrsh x0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %i128(i64) -> i128 {
block0(v0: i64):
v1 = load.i128 v0
store.i128 v1, v0
return v1
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; nextln: ldp x2, x1, [x1]
; nextln: stp x2, x1, [x0]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %i128_imm_offset(i64) -> i128 {
block0(v0: i64):
v1 = load.i128 v0+16
store.i128 v1, v0+16
return v1
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; nextln: ldp x2, x1, [x1, #16]
; nextln: stp x2, x1, [x0, #16]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %i128_imm_offset_large(i64) -> i128 {
block0(v0: i64):
v1 = load.i128 v0+504
store.i128 v1, v0+504
return v1
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; nextln: ldp x2, x1, [x1, #504]
; nextln: stp x2, x1, [x0, #504]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %i128_imm_offset_negative_large(i64) -> i128 {
block0(v0: i64):
v1 = load.i128 v0-512
store.i128 v1, v0-512
return v1
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; nextln: ldp x2, x1, [x1, #-512]
; nextln: stp x2, x1, [x0, #-512]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %i128_add_offset(i64) -> i128 {
block0(v0: i64):
v1 = iadd_imm v0, 32
v2 = load.i128 v1
store.i128 v2, v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; nextln: ldp x2, x1, [x1, #32]
; nextln: stp x2, x1, [x0, #32]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %i128_32bit_sextend_simple(i32) -> i128 {
block0(v0: i32):
v1 = sextend.i64 v0
v2 = load.i128 v1
store.i128 v2, v1
return v2
}
; TODO: We should be able to deduplicate the sxtw instruction
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxtw x1, w0
; nextln: ldp x2, x1, [x1]
; nextln: sxtw x0, w0
; nextln: stp x2, x1, [x0]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %i128_32bit_sextend(i64, i32) -> i128 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = iadd.i64 v0, v2
v4 = iadd_imm.i64 v3, 24
v5 = load.i128 v4
store.i128 v5, v4
return v5
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x2, x0
; nextln: add x2, x2, x1, SXTW
; nextln: ldp x3, x2, [x2, #24]
; nextln: add x0, x0, x1, SXTW
; nextln: stp x3, x2, [x0, #24]
; nextln: mov x0, x3
; nextln: mov x1, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

View File

@@ -89,3 +89,48 @@ block0(v0: i64,v1: i64):
; run: %i128_stack_store_load_big_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true
; run: %i128_stack_store_load_big_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true
; run: %i128_stack_store_load_big_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true
function %i128_store_load(i64, i64) -> b1 {
ss0 = explicit_slot 16
block0(v0: i64,v1: i64):
v2 = iconcat v0, v1
v3 = stack_addr.i64 ss0
store.i128 v2, v3
v4 = load.i128 v3
v5 = icmp.i128 eq v2, v4
return v5
}
; run: %i128_store_load(0, 0) == true
; run: %i128_store_load(-1, -1) == true
; run: %i128_store_load(-1, 0) == true
; run: %i128_store_load(0, -1) == true
; run: %i128_store_load(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true
; run: %i128_store_load(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true
; run: %i128_store_load(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true
function %i128_store_load_offset(i64, i64) -> b1 {
ss0 = explicit_slot 32
block0(v0: i64,v1: i64):
v2 = iconcat v0, v1
v3 = stack_addr.i64 ss0
store.i128 v2, v3+16
v4 = load.i128 v3+16
v5 = icmp.i128 eq v2, v4
return v5
}
; run: %i128_store_load_offset(0, 0) == true
; run: %i128_store_load_offset(-1, -1) == true
; run: %i128_store_load_offset(-1, 0) == true
; run: %i128_store_load_offset(0, -1) == true
; run: %i128_store_load_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true
; run: %i128_store_load_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true
; run: %i128_store_load_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true