Merge pull request #2061 from cfallin/aarch64-amode

Aarch64 codegen quality: support more general add+extend address computations.
This commit is contained in:
Chris Fallin
2020-07-27 13:48:55 -07:00
committed by GitHub
5 changed files with 442 additions and 83 deletions

View File

@@ -15,7 +15,7 @@ block0(v0: i64, v1: i32):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f1(i64, i32) -> i32 {
function %f2(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = load_complex.i32 v2+v0
@@ -29,7 +29,7 @@ block0(v0: i64, v1: i32):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f1(i64, i32) -> i32 {
function %f3(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = load_complex.i32 v0+v2
@@ -43,7 +43,7 @@ block0(v0: i64, v1: i32):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f1(i64, i32) -> i32 {
function %f4(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = load_complex.i32 v2+v0
@@ -56,3 +56,216 @@ block0(v0: i64, v1: i32):
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f5(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = iadd.i64 v0, v2
v4 = load.i32 v3
return v4
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr w0, [x0, w1, SXTW]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f6(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = iadd.i64 v2, v0
v4 = load.i32 v3
return v4
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr w0, [x0, w1, SXTW]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f7(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = uextend.i64 v0
v3 = uextend.i64 v1
v4 = iadd.i64 v2, v3
v5 = load.i32 v4
return v5
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov w0, w0
; nextln: ldr w0, [x0, w1, UXTW]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f8(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = iconst.i64 32
v4 = iadd.i64 v2, v3
v5 = iadd.i64 v4, v0
v6 = iadd.i64 v5, v5
v7 = load.i32 v6+4
return v7
}
; v6+4 = 2*v5 = 2*v4 + 2*v0 + 4 = 2*v2 + 2*v3 + 2*v0 + 4
; = 2*sextend($x1) + 2*$x0 + 68
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: add x2, x0, #68
; nextln: add x0, x2, x0
; nextln: add x0, x0, x1, SXTW
; nextln: ldr w0, [x0, w1, SXTW]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f9(i64, i64, i64) -> i32 {
block0(v0: i64, v1: i64, v2: i64):
v3 = iconst.i64 48
v4 = iadd.i64 v0, v1
v5 = iadd.i64 v4, v2
v6 = iadd.i64 v5, v3
v7 = load.i32 v6
return v7
}
; v6 = $x0 + $x1 + $x2 + 48
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: add x0, x0, x2
; nextln: add x0, x0, x1
; nextln: ldur w0, [x0, #48]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f10(i64, i64, i64) -> i32 {
block0(v0: i64, v1: i64, v2: i64):
v3 = iconst.i64 4100
v4 = iadd.i64 v0, v1
v5 = iadd.i64 v4, v2
v6 = iadd.i64 v5, v3
v7 = load.i32 v6
return v7
}
; v6 = $x0 + $x1 + $x2 + 4100
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x3, #4100
; nextln: add x1, x3, x1
; nextln: add x1, x1, x2
; nextln: ldr w0, [x1, x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f10() -> i32 {
block0:
v1 = iconst.i64 1234
v2 = load.i32 v1
return v2
}
; v6 = $x0 + $x1 + $x2 + 48
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x0, #1234
; nextln: ldr w0, [x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f11(i64) -> i32 {
block0(v0: i64):
v1 = iconst.i64 8388608 ; Imm12: 0x800 << 12
v2 = iadd.i64 v0, v1
v3 = load.i32 v2
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: add x0, x0, #8388608
; nextln: ldr w0, [x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f12(i64) -> i32 {
block0(v0: i64):
v1 = iconst.i64 -4
v2 = iadd.i64 v0, v1
v3 = load.i32 v2
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub x0, x0, #4
; nextln: ldr w0, [x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f13(i64) -> i32 {
block0(v0: i64):
v1 = iconst.i64 1000000000
v2 = iadd.i64 v0, v1
v3 = load.i32 v2
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x1, #51712
; nextln: movk x1, #15258, LSL #16
; nextln: add x0, x1, x0
; nextln: ldr w0, [x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f14(i32) -> i32 {
block0(v0: i32):
v1 = sextend.i64 v0
v2 = load.i32 v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxtw x0, w0
; nextln: ldr w0, [x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f15(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = sextend.i64 v0
v3 = sextend.i64 v1
v4 = iadd.i64 v2, v3
v5 = load.i32 v4
return v5
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxtw x0, w0
; nextln: ldr w0, [x0, w1, SXTW]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

View File

@@ -15,7 +15,7 @@ block0(v0: i64, v1: i32):
; check: Block 0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldur w2, [x0]
; nextln: ldr w2, [x0]
; nextln: add w2, w2, #0
; nextln: subs wzr, w1, w2
; nextln: b.ls label1 ; b label2

View File

@@ -92,7 +92,7 @@ block3(v7: r64, v8: r64):
; nextln: ldur x19, [sp, #32]
; nextln: ldur x20, [sp, #40]
; nextln: add x1, sp, #16
; nextln: stur x19, [x1]
; nextln: str x19, [x1]
; nextln: and w0, w0, #1
; nextln: cbz x0, label1 ; b label3
; check: Block 1:
@@ -108,7 +108,7 @@ block3(v7: r64, v8: r64):
; nextln: b label5
; check: Block 5:
; check: add x1, sp, #16
; nextln: ldur x1, [x1]
; nextln: ldr x1, [x1]
; nextln: mov x2, x1
; nextln: mov x1, x19
; nextln: ldp x19, x20, [sp], #16

View File

@@ -51,7 +51,7 @@ block0:
; nextln: mov fp, sp
; nextln: sub sp, sp, #16
; nextln: mov x0, sp
; nextln: ldur x0, [x0]
; nextln: ldr x0, [x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -71,7 +71,7 @@ block0:
; nextln: ldr x16, 8 ; b 12 ; data 100016
; nextln: sub sp, sp, x16, UXTX
; nextln: mov x0, sp
; nextln: ldur x0, [x0]
; nextln: ldr x0, [x0]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -89,7 +89,7 @@ block0(v0: i64):
; nextln: mov fp, sp
; nextln: sub sp, sp, #16
; nextln: mov x1, sp
; nextln: stur x0, [x1]
; nextln: str x0, [x1]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -109,7 +109,7 @@ block0(v0: i64):
; nextln: ldr x16, 8 ; b 12 ; data 100016
; nextln: sub sp, sp, x16, UXTX
; nextln: mov x1, sp
; nextln: stur x0, [x1]
; nextln: str x0, [x1]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret