When a load/store instruction needs an address of the form `v0 + uextend(v1)` or `v0 + sextend(v1)` (or the commuted forms thereof), we currently generate a separate zero/sign-extend operation and then use a plain `[rA, rB]` addressing mode. This patch extends `lower_address()` to look at both addends of an address if it has two addends and a zero offset, recognize extension operations, and incorporate them directly into a `[rA, rB, UXTW]` or `[rA, rB, SXTW]` form. This should improve our performence on WebAssembly workloads, at least, because we often see a 64-bit linear memory base indexed by a 32-bit (Wasm) pointer value.
59 lines
1.1 KiB
Plaintext
59 lines
1.1 KiB
Plaintext
test compile
|
|
target aarch64
|
|
|
|
function %f0(i64, i32) -> i32 {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = uextend.i64 v1
|
|
v3 = load_complex.i32 v0+v2
|
|
return v3
|
|
}
|
|
|
|
; check: stp fp, lr, [sp, #-16]!
|
|
; nextln: mov fp, sp
|
|
; nextln: ldr w0, [x0, w1, UXTW]
|
|
; nextln: mov sp, fp
|
|
; nextln: ldp fp, lr, [sp], #16
|
|
; nextln: ret
|
|
|
|
function %f1(i64, i32) -> i32 {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = uextend.i64 v1
|
|
v3 = load_complex.i32 v2+v0
|
|
return v3
|
|
}
|
|
|
|
; check: stp fp, lr, [sp, #-16]!
|
|
; nextln: mov fp, sp
|
|
; nextln: ldr w0, [x0, w1, UXTW]
|
|
; nextln: mov sp, fp
|
|
; nextln: ldp fp, lr, [sp], #16
|
|
; nextln: ret
|
|
|
|
function %f1(i64, i32) -> i32 {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = sextend.i64 v1
|
|
v3 = load_complex.i32 v0+v2
|
|
return v3
|
|
}
|
|
|
|
; check: stp fp, lr, [sp, #-16]!
|
|
; nextln: mov fp, sp
|
|
; nextln: ldr w0, [x0, w1, SXTW]
|
|
; nextln: mov sp, fp
|
|
; nextln: ldp fp, lr, [sp], #16
|
|
; nextln: ret
|
|
|
|
function %f1(i64, i32) -> i32 {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = sextend.i64 v1
|
|
v3 = load_complex.i32 v2+v0
|
|
return v3
|
|
}
|
|
|
|
; check: stp fp, lr, [sp, #-16]!
|
|
; nextln: mov fp, sp
|
|
; nextln: ldr w0, [x0, w1, SXTW]
|
|
; nextln: mov sp, fp
|
|
; nextln: ldp fp, lr, [sp], #16
|
|
; nextln: ret
|