AArch64: make use of reg-reg-extend amode.
When a load/store instruction needs an address of the form `v0 + uextend(v1)` or `v0 + sextend(v1)` (or the commuted forms thereof), we currently generate a separate zero/sign-extend operation and then use a plain `[rA, rB]` addressing mode. This patch extends `lower_address()` to look at both addends of an address if it has two addends and a zero offset, recognize extension operations, and incorporate them directly into a `[rA, rB, UXTW]` or `[rA, rB, SXTW]` form. This should improve our performence on WebAssembly workloads, at least, because we often see a 64-bit linear memory base indexed by a 32-bit (Wasm) pointer value.
This commit is contained in:
58
cranelift/filetests/filetests/vcode/aarch64/amodes.clif
Normal file
58
cranelift/filetests/filetests/vcode/aarch64/amodes.clif
Normal file
@@ -0,0 +1,58 @@
|
||||
test compile
|
||||
target aarch64
|
||||
|
||||
function %f0(i64, i32) -> i32 {
|
||||
block0(v0: i64, v1: i32):
|
||||
v2 = uextend.i64 v1
|
||||
v3 = load_complex.i32 v0+v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr w0, [x0, w1, UXTW]
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f1(i64, i32) -> i32 {
|
||||
block0(v0: i64, v1: i32):
|
||||
v2 = uextend.i64 v1
|
||||
v3 = load_complex.i32 v2+v0
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr w0, [x0, w1, UXTW]
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f1(i64, i32) -> i32 {
|
||||
block0(v0: i64, v1: i32):
|
||||
v2 = sextend.i64 v1
|
||||
v3 = load_complex.i32 v0+v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr w0, [x0, w1, SXTW]
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f1(i64, i32) -> i32 {
|
||||
block0(v0: i64, v1: i32):
|
||||
v2 = sextend.i64 v1
|
||||
v3 = load_complex.i32 v2+v0
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr w0, [x0, w1, SXTW]
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
Reference in New Issue
Block a user