Fix AArch64 ABI to respect half-caller-save, half-callee-save vec regs.

This PR updates the AArch64 ABI implementation so that it (i) properly
respects that v8-v15 inclusive have callee-save lower halves, and
caller-save upper halves, by conservatively approximating (to full
registers) in the appropriate directions when generating prologue
caller-saves and when informing the regalloc of clobbered regs across
callsites.

In order to prevent saving all of these vector registers in the prologue
of every non-leaf function due to the above approximation, this also
makes use of a new regalloc.rs feature to exclude call instructions'
writes from the clobber set returned by register allocation. This is
safe whenever the caller and callee have the same ABI (because anything
the callee could clobber, the caller is allowed to clobber as well
without saving it in the prologue).

Fixes #2254.
This commit is contained in:
Chris Fallin
2020-10-05 18:43:26 -07:00
parent e22e2c3722
commit 71768bb6cf
29 changed files with 325 additions and 206 deletions

View File

@@ -151,34 +151,27 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #48
; nextln: str q8, [sp]
; nextln: str q9, [sp, #16]
; nextln: str q10, [sp, #32]
; nextln: virtual_sp_offset_adjust 48
; nextln: sub sp, sp, #32
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v8.16b, v0.16b
; nextln: str s0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v9.16b, v0.16b
; nextln: str d0, [sp, #8]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v10.16b, v0.16b
; nextln: str d0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v8.16b
; nextln: ldr s0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v9.16b
; nextln: ldr d0, [sp, #8]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v10.16b
; nextln: ldr d0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr q8, [sp]
; nextln: ldr q9, [sp, #16]
; nextln: ldr q10, [sp, #32]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -202,33 +195,26 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #48
; nextln: str q8, [sp]
; nextln: str q9, [sp, #16]
; nextln: str q10, [sp, #32]
; nextln: virtual_sp_offset_adjust 48
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v8.16b, v0.16b
; nextln: str q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v9.16b, v0.16b
; nextln: str q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v10.16b, v0.16b
; nextln: str q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v8.16b
; nextln: ldr q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v9.16b
; nextln: ldr q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v10.16b
; nextln: ldr q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr q8, [sp]
; nextln: ldr q9, [sp, #16]
; nextln: ldr q10, [sp, #32]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -255,34 +241,27 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #48
; nextln: str q8, [sp]
; nextln: str q9, [sp, #16]
; nextln: str q10, [sp, #32]
; nextln: virtual_sp_offset_adjust 48
; nextln: sub sp, sp, #32
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v8.16b, v0.16b
; nextln: str s0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v9.16b, v0.16b
; nextln: str d0, [sp, #8]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v10.16b, v0.16b
; nextln: str q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v8.16b
; nextln: ldr s0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v9.16b
; nextln: ldr d0, [sp, #8]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v10.16b
; nextln: ldr q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr q8, [sp]
; nextln: ldr q9, [sp, #16]
; nextln: ldr q10, [sp, #32]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

View File

@@ -0,0 +1,99 @@
test compile
target aarch64
function %f(f64) -> f64 {
block0(v0: f64):
v1 = fadd.f64 v0, v0
v2 = fadd.f64 v0, v0
v3 = fadd.f64 v0, v0
v4 = fadd.f64 v0, v0
v5 = fadd.f64 v0, v0
v6 = fadd.f64 v0, v0
v7 = fadd.f64 v0, v0
v8 = fadd.f64 v0, v0
v9 = fadd.f64 v0, v0
v10 = fadd.f64 v0, v0
v11 = fadd.f64 v0, v0
v12 = fadd.f64 v0, v0
v13 = fadd.f64 v0, v0
v14 = fadd.f64 v0, v0
v15 = fadd.f64 v0, v0
v16 = fadd.f64 v0, v0
v17 = fadd.f64 v0, v0
v18 = fadd.f64 v0, v0
v19 = fadd.f64 v0, v0
v20 = fadd.f64 v0, v0
v21 = fadd.f64 v0, v0
v22 = fadd.f64 v0, v0
v23 = fadd.f64 v0, v0
v24 = fadd.f64 v0, v0
v25 = fadd.f64 v0, v0
v26 = fadd.f64 v0, v0
v27 = fadd.f64 v0, v0
v28 = fadd.f64 v0, v0
v29 = fadd.f64 v0, v0
v30 = fadd.f64 v0, v0
v31 = fadd.f64 v0, v0
v32 = fadd.f64 v0, v1
v33 = fadd.f64 v2, v3
v34 = fadd.f64 v4, v5
v35 = fadd.f64 v6, v7
v36 = fadd.f64 v8, v9
v37 = fadd.f64 v10, v11
v38 = fadd.f64 v12, v13
v39 = fadd.f64 v14, v15
v40 = fadd.f64 v16, v17
v41 = fadd.f64 v18, v19
v42 = fadd.f64 v20, v21
v43 = fadd.f64 v22, v23
v44 = fadd.f64 v24, v25
v45 = fadd.f64 v26, v27
v46 = fadd.f64 v28, v29
v47 = fadd.f64 v30, v31
v48 = fadd.f64 v32, v33
v49 = fadd.f64 v34, v35
v50 = fadd.f64 v36, v37
v51 = fadd.f64 v38, v39
v52 = fadd.f64 v40, v41
v53 = fadd.f64 v42, v43
v54 = fadd.f64 v44, v45
v55 = fadd.f64 v46, v47
v56 = fadd.f64 v48, v49
v57 = fadd.f64 v50, v51
v58 = fadd.f64 v52, v53
v59 = fadd.f64 v54, v55
v60 = fadd.f64 v56, v57
v61 = fadd.f64 v58, v59
v62 = fadd.f64 v60, v61
return v62
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #128
; nextln: str q8, [sp]
; nextln: str q9, [sp, #16]
; nextln: str q10, [sp, #32]
; nextln: str q11, [sp, #48]
; nextln: str q12, [sp, #64]
; nextln: str q13, [sp, #80]
; nextln: str q14, [sp, #96]
; nextln: str q15, [sp, #112]
; check: ldr q8, [sp]
; nextln: ldr q9, [sp, #16]
; nextln: ldr q10, [sp, #32]
; nextln: ldr q11, [sp, #48]
; nextln: ldr q12, [sp, #64]
; nextln: ldr q13, [sp, #80]
; nextln: ldr q14, [sp, #96]
; nextln: ldr q15, [sp, #112]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret