Cranelift: Simplify leaf functions that do not use the stack (#2960)

* Cranelift AArch64: Simplify leaf functions that do not use the stack

Leaf functions that do not use the stack (e.g. do not clobber any
callee-saved registers) do not need a frame record.

Copyright (c) 2021, Arm Limited.
This commit is contained in:
Anton Kirilov
2021-08-27 11:12:37 +01:00
committed by GitHub
parent 12515e6646
commit 7b98be1bee
34 changed files with 650 additions and 1385 deletions

View File

@@ -9,10 +9,7 @@ block0(v0: i64, v1: i32):
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr w0, [x0, w1, UXTW]
; nextln: ldp fp, lr, [sp], #16
; check: ldr w0, [x0, w1, UXTW]
; nextln: ret
function %f2(i64, i32) -> i32 {
@@ -22,10 +19,7 @@ block0(v0: i64, v1: i32):
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr w0, [x0, w1, UXTW]
; nextln: ldp fp, lr, [sp], #16
; check: ldr w0, [x0, w1, UXTW]
; nextln: ret
function %f3(i64, i32) -> i32 {
@@ -35,10 +29,7 @@ block0(v0: i64, v1: i32):
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr w0, [x0, w1, SXTW]
; nextln: ldp fp, lr, [sp], #16
; check: ldr w0, [x0, w1, SXTW]
; nextln: ret
function %f4(i64, i32) -> i32 {
@@ -48,10 +39,7 @@ block0(v0: i64, v1: i32):
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr w0, [x0, w1, SXTW]
; nextln: ldp fp, lr, [sp], #16
; check: ldr w0, [x0, w1, SXTW]
; nextln: ret
function %f5(i64, i32) -> i32 {
@@ -62,10 +50,7 @@ block0(v0: i64, v1: i32):
return v4
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr w0, [x0, w1, SXTW]
; nextln: ldp fp, lr, [sp], #16
; check: ldr w0, [x0, w1, SXTW]
; nextln: ret
function %f6(i64, i32) -> i32 {
@@ -76,10 +61,7 @@ block0(v0: i64, v1: i32):
return v4
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr w0, [x0, w1, SXTW]
; nextln: ldp fp, lr, [sp], #16
; check: ldr w0, [x0, w1, SXTW]
; nextln: ret
function %f7(i32, i32) -> i32 {
@@ -91,11 +73,8 @@ block0(v0: i32, v1: i32):
return v5
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov w0, w0
; check: mov w0, w0
; nextln: ldr w0, [x0, w1, UXTW]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f8(i64, i32) -> i32 {
@@ -112,13 +91,10 @@ block0(v0: i64, v1: i32):
; v6+4 = 2*v5 = 2*v4 + 2*v0 + 4 = 2*v2 + 2*v3 + 2*v0 + 4
; = 2*sextend($x1) + 2*$x0 + 68
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: add x2, x0, #68
; check: add x2, x0, #68
; nextln: add x0, x2, x0
; nextln: add x0, x0, x1, SXTW
; nextln: ldr w0, [x0, w1, SXTW]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f9(i64, i64, i64) -> i32 {
@@ -133,12 +109,9 @@ block0(v0: i64, v1: i64, v2: i64):
; v6 = $x0 + $x1 + $x2 + 48
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: add x0, x0, x2
; check: add x0, x0, x2
; nextln: add x0, x0, x1
; nextln: ldur w0, [x0, #48]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f10(i64, i64, i64) -> i32 {
@@ -153,13 +126,10 @@ block0(v0: i64, v1: i64, v2: i64):
; v6 = $x0 + $x1 + $x2 + 4100
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x3, #4100
; check: movz x3, #4100
; nextln: add x1, x3, x1
; nextln: add x1, x1, x2
; nextln: ldr w0, [x1, x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f10() -> i32 {
@@ -171,11 +141,8 @@ block0:
; v6 = $x0 + $x1 + $x2 + 48
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x0, #1234
; check: movz x0, #1234
; nextln: ldr w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f11(i64) -> i32 {
@@ -186,11 +153,8 @@ block0(v0: i64):
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: add x0, x0, #8388608
; check: add x0, x0, #8388608
; nextln: ldr w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f12(i64) -> i32 {
@@ -201,11 +165,8 @@ block0(v0: i64):
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub x0, x0, #4
; check: sub x0, x0, #4
; nextln: ldr w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f13(i64) -> i32 {
@@ -216,13 +177,10 @@ block0(v0: i64):
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz w1, #51712
; check: movz w1, #51712
; nextln: movk w1, #15258, LSL #16
; nextln: add x0, x1, x0
; nextln: ldr w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f14(i32) -> i32 {
@@ -232,11 +190,8 @@ block0(v0: i32):
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxtw x0, w0
; check: sxtw x0, w0
; nextln: ldr w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f15(i32, i32) -> i32 {
@@ -248,11 +203,8 @@ block0(v0: i32, v1: i32):
return v5
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxtw x0, w0
; check: sxtw x0, w0
; nextln: ldr w0, [x0, w1, SXTW]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f16(i64) -> i32 {
@@ -263,10 +215,7 @@ block0(v0: i64):
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr w0, [x0]
; nextln: ldp fp, lr, [sp], #16
; check: ldr w0, [x0]
; nextln: ret
function %f17(i64) -> i32 {
@@ -277,10 +226,7 @@ block0(v0: i64):
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldur w0, [x0, #4]
; nextln: ldp fp, lr, [sp], #16
; check: ldur w0, [x0, #4]
; nextln: ret
function %f18(i64, i32) -> i16x8 {
@@ -290,11 +236,8 @@ block0(v0: i64, v1: i32):
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr d0, [x0, w1, UXTW]
; check: ldr d0, [x0, w1, UXTW]
; nextln: sxtl v0.8h, v0.8b
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f19(i64, i64) -> i32x4 {
@@ -303,12 +246,9 @@ block0(v0: i64, v1: i64):
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: add x0, x0, x1
; check: add x0, x0, x1
; nextln: ldr d0, [x0, #8]
; nextln: uxtl v0.4s, v0.4h
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f20(i64, i32) -> i64x2 {
@@ -318,11 +258,8 @@ block0(v0: i64, v1: i32):
return v3
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: ldr d0, [x0, w1, SXTW]
; check: ldr d0, [x0, w1, SXTW]
; nextln: uxtl v0.2d, v0.2s
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f18(i64, i64, i64) -> i32 {
@@ -333,11 +270,8 @@ block0(v0: i64, v1: i64, v2: i64):
return v5
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movn w0, #4097
; check: movn w0, #4097
; nextln: ldrsh x0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f19(i64, i64, i64) -> i32 {
@@ -348,11 +282,8 @@ block0(v0: i64, v1: i64, v2: i64):
return v5
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x0, #4098
; check: movz x0, #4098
; nextln: ldrsh x0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f20(i64, i64, i64) -> i32 {
@@ -363,12 +294,9 @@ block0(v0: i64, v1: i64, v2: i64):
return v5
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movn w0, #4097
; check: movn w0, #4097
; nextln: sxtw x0, w0
; nextln: ldrsh x0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f21(i64, i64, i64) -> i32 {
@@ -379,12 +307,9 @@ block0(v0: i64, v1: i64, v2: i64):
return v5
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x0, #4098
; check: movz x0, #4098
; nextln: sxtw x0, w0
; nextln: ldrsh x0, [x0]
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -395,13 +320,10 @@ block0(v0: i64):
return v1
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; check: mov x1, x0
; nextln: ldp x2, x1, [x1]
; nextln: stp x2, x1, [x0]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -412,13 +334,10 @@ block0(v0: i64):
return v1
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; check: mov x1, x0
; nextln: ldp x2, x1, [x1, #16]
; nextln: stp x2, x1, [x0, #16]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %i128_imm_offset_large(i64) -> i128 {
@@ -428,13 +347,10 @@ block0(v0: i64):
return v1
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; check: mov x1, x0
; nextln: ldp x2, x1, [x1, #504]
; nextln: stp x2, x1, [x0, #504]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %i128_imm_offset_negative_large(i64) -> i128 {
@@ -444,13 +360,10 @@ block0(v0: i64):
return v1
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; check: mov x1, x0
; nextln: ldp x2, x1, [x1, #-512]
; nextln: stp x2, x1, [x0, #-512]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -462,13 +375,10 @@ block0(v0: i64):
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; check: mov x1, x0
; nextln: ldp x2, x1, [x1, #32]
; nextln: stp x2, x1, [x0, #32]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -481,14 +391,11 @@ block0(v0: i32):
}
; TODO: We should be able to deduplicate the sxtw instruction
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxtw x1, w0
; check: sxtw x1, w0
; nextln: ldp x2, x1, [x1]
; nextln: sxtw x0, w0
; nextln: stp x2, x1, [x0]
; nextln: mov x0, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -502,14 +409,11 @@ block0(v0: i64, v1: i32):
return v5
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x2, x0
; check: mov x2, x0
; nextln: add x2, x2, x1, SXTW
; nextln: ldp x3, x2, [x2, #24]
; nextln: add x0, x0, x1, SXTW
; nextln: stp x3, x2, [x0, #24]
; nextln: mov x0, x3
; nextln: mov x1, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret