SIMD & FP registers are now saved and restored in pairs, similarly to general-purpose registers. Also, only the bottom 64 bits of the registers are saved and restored (in case of non-Baldrdash ABIs), which is the requirement from the Procedure Call Standard for the Arm 64-bit Architecture. As for the callee-saved general-purpose registers, if a procedure needs to save and restore an odd number of them, it no longer uses store and load pair instructions for the last register. Copyright (c) 2021, Arm Limited.
149 lines
3.4 KiB
Plaintext
149 lines
3.4 KiB
Plaintext
test compile
|
|
set unwind_info=false
|
|
target aarch64
|
|
|
|
function %f(f64) -> f64 {
|
|
block0(v0: f64):
|
|
v1 = fadd.f64 v0, v0
|
|
v2 = fadd.f64 v0, v0
|
|
v3 = fadd.f64 v0, v0
|
|
v4 = fadd.f64 v0, v0
|
|
v5 = fadd.f64 v0, v0
|
|
v6 = fadd.f64 v0, v0
|
|
v7 = fadd.f64 v0, v0
|
|
v8 = fadd.f64 v0, v0
|
|
v9 = fadd.f64 v0, v0
|
|
v10 = fadd.f64 v0, v0
|
|
v11 = fadd.f64 v0, v0
|
|
v12 = fadd.f64 v0, v0
|
|
v13 = fadd.f64 v0, v0
|
|
v14 = fadd.f64 v0, v0
|
|
v15 = fadd.f64 v0, v0
|
|
v16 = fadd.f64 v0, v0
|
|
v17 = fadd.f64 v0, v0
|
|
v18 = fadd.f64 v0, v0
|
|
v19 = fadd.f64 v0, v0
|
|
v20 = fadd.f64 v0, v0
|
|
v21 = fadd.f64 v0, v0
|
|
v22 = fadd.f64 v0, v0
|
|
v23 = fadd.f64 v0, v0
|
|
v24 = fadd.f64 v0, v0
|
|
v25 = fadd.f64 v0, v0
|
|
v26 = fadd.f64 v0, v0
|
|
v27 = fadd.f64 v0, v0
|
|
v28 = fadd.f64 v0, v0
|
|
v29 = fadd.f64 v0, v0
|
|
v30 = fadd.f64 v0, v0
|
|
v31 = fadd.f64 v0, v0
|
|
|
|
v32 = fadd.f64 v0, v1
|
|
v33 = fadd.f64 v2, v3
|
|
v34 = fadd.f64 v4, v5
|
|
v35 = fadd.f64 v6, v7
|
|
v36 = fadd.f64 v8, v9
|
|
v37 = fadd.f64 v10, v11
|
|
v38 = fadd.f64 v12, v13
|
|
v39 = fadd.f64 v14, v15
|
|
v40 = fadd.f64 v16, v17
|
|
v41 = fadd.f64 v18, v19
|
|
v42 = fadd.f64 v20, v21
|
|
v43 = fadd.f64 v22, v23
|
|
v44 = fadd.f64 v24, v25
|
|
v45 = fadd.f64 v26, v27
|
|
v46 = fadd.f64 v28, v29
|
|
v47 = fadd.f64 v30, v31
|
|
|
|
v48 = fadd.f64 v32, v33
|
|
v49 = fadd.f64 v34, v35
|
|
v50 = fadd.f64 v36, v37
|
|
v51 = fadd.f64 v38, v39
|
|
v52 = fadd.f64 v40, v41
|
|
v53 = fadd.f64 v42, v43
|
|
v54 = fadd.f64 v44, v45
|
|
v55 = fadd.f64 v46, v47
|
|
|
|
v56 = fadd.f64 v48, v49
|
|
v57 = fadd.f64 v50, v51
|
|
v58 = fadd.f64 v52, v53
|
|
v59 = fadd.f64 v54, v55
|
|
|
|
v60 = fadd.f64 v56, v57
|
|
v61 = fadd.f64 v58, v59
|
|
|
|
v62 = fadd.f64 v60, v61
|
|
|
|
return v62
|
|
}
|
|
|
|
; check: stp fp, lr, [sp, #-16]!
|
|
; nextln: mov fp, sp
|
|
; nextln: stp d14, d15, [sp, #-16]!
|
|
; nextln: stp d12, d13, [sp, #-16]!
|
|
; nextln: stp d10, d11, [sp, #-16]!
|
|
; nextln: stp d8, d9, [sp, #-16]!
|
|
|
|
; check: ldp d8, d9, [sp], #16
|
|
; nextln: ldp d10, d11, [sp], #16
|
|
; nextln: ldp d12, d13, [sp], #16
|
|
; nextln: ldp d14, d15, [sp], #16
|
|
; nextln: ldp fp, lr, [sp], #16
|
|
; nextln: ret
|
|
|
|
function %f2(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = iadd.i64 v0, v0
|
|
v2 = iadd.i64 v0, v1
|
|
v3 = iadd.i64 v0, v2
|
|
v4 = iadd.i64 v0, v3
|
|
v5 = iadd.i64 v0, v4
|
|
v6 = iadd.i64 v0, v5
|
|
v7 = iadd.i64 v0, v6
|
|
v8 = iadd.i64 v0, v7
|
|
v9 = iadd.i64 v0, v8
|
|
v10 = iadd.i64 v0, v9
|
|
v11 = iadd.i64 v0, v10
|
|
v12 = iadd.i64 v0, v11
|
|
v13 = iadd.i64 v0, v12
|
|
v14 = iadd.i64 v0, v13
|
|
v15 = iadd.i64 v0, v14
|
|
v16 = iadd.i64 v0, v15
|
|
v17 = iadd.i64 v0, v16
|
|
v18 = iadd.i64 v0, v17
|
|
|
|
v19 = iadd.i64 v0, v1
|
|
v20 = iadd.i64 v2, v3
|
|
v21 = iadd.i64 v4, v5
|
|
v22 = iadd.i64 v6, v7
|
|
v23 = iadd.i64 v8, v9
|
|
v24 = iadd.i64 v10, v11
|
|
v25 = iadd.i64 v12, v13
|
|
v26 = iadd.i64 v14, v15
|
|
v27 = iadd.i64 v16, v17
|
|
|
|
v28 = iadd.i64 v18, v19
|
|
v29 = iadd.i64 v20, v21
|
|
v30 = iadd.i64 v22, v23
|
|
v31 = iadd.i64 v24, v25
|
|
v32 = iadd.i64 v26, v27
|
|
|
|
v33 = iadd.i64 v28, v29
|
|
v34 = iadd.i64 v30, v31
|
|
|
|
v35 = iadd.i64 v32, v33
|
|
v36 = iadd.i64 v34, v35
|
|
|
|
return v36
|
|
}
|
|
|
|
; check: stp fp, lr, [sp, #-16]!
|
|
; nextln: mov fp, sp
|
|
; nextln: str x22, [sp, #-16]!
|
|
; nextln: stp x19, x20, [sp, #-16]!
|
|
; nextln: add x1, x0, x0
|
|
|
|
; check: add x0, x1, x0
|
|
; nextln: ldp x19, x20, [sp], #16
|
|
; nextln: ldr x22, [sp], #16
|
|
; nextln: ldp fp, lr, [sp], #16
|
|
; nextln: ret
|