Merge pull request #1734 from peterhuene/fix-saved-fprs
Cranelift: Fix FPR saving and shadow space allocation for Windows x64.
This commit is contained in:
@@ -8,29 +8,57 @@ function %one_arg(i64) windows_fastcall {
|
||||
block0(v0: i64):
|
||||
return
|
||||
}
|
||||
; check: function %one_arg(i64 [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
||||
; nextln: ss0 = incoming_arg 16, offset -48
|
||||
; check: function %one_arg(i64 [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
||||
; nextln: ss0 = incoming_arg 16, offset -16
|
||||
; check: block0(v0: i64 [%rcx], v1: i64 [%rbp]):
|
||||
; nextln: x86_push v1
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: v2 = x86_pop.i64
|
||||
; nextln: return v2
|
||||
; nextln: }
|
||||
|
||||
; check if we still use registers for 4 arguments
|
||||
function %four_args(i64, i64, i64, i64) windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||
return
|
||||
}
|
||||
; check: function %four_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
||||
; check: function %four_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
||||
; nextln: ss0 = incoming_arg 16, offset -16
|
||||
; check: block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]):
|
||||
; nextln: x86_push v4
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: v5 = x86_pop.i64
|
||||
; nextln: return v5
|
||||
; nextln: }
|
||||
|
||||
; check if float arguments are passed through XMM registers
|
||||
function %four_float_args(f64, f64, f64, f64) windows_fastcall {
|
||||
block0(v0: f64, v1: f64, v2: f64, v3: f64):
|
||||
return
|
||||
}
|
||||
; check: function %four_float_args(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
||||
; check: function %four_float_args(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
||||
; nextln: ss0 = incoming_arg 16, offset -16
|
||||
; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v4: i64 [%rbp]):
|
||||
; nextln: x86_push v4
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: v5 = x86_pop.i64
|
||||
; nextln: return v5
|
||||
; nextln: }
|
||||
|
||||
; check if we use stack space for > 4 arguments
|
||||
function %five_args(i64, i64, i64, i64, i64) windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64):
|
||||
return
|
||||
}
|
||||
; check: function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
||||
; check: function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
||||
; nextln: ss0 = incoming_arg 8, offset 32
|
||||
; nextln: ss1 = incoming_arg 16, offset -16
|
||||
; check: block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [ss0], v5: i64 [%rbp]):
|
||||
; nextln: x86_push v5
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: v6 = x86_pop.i64
|
||||
; nextln: return v6
|
||||
; nextln: }
|
||||
|
||||
; check that we preserve xmm6 and above if we're using them locally
|
||||
function %float_callee_saves(f64, f64, f64, f64) windows_fastcall {
|
||||
@@ -40,38 +68,51 @@ block0(v0: f64, v1: f64, v2: f64, v3: f64):
|
||||
[-, %xmm7] v5 = fadd v0, v1
|
||||
return
|
||||
}
|
||||
; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall {
|
||||
; nextln: ss0 = explicit_slot 32, offset -80
|
||||
; nextln: ss1 = incoming_arg 16, offset -48
|
||||
; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]):
|
||||
; nextln: x86_push v6
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: adjust_sp_down_imm 64
|
||||
; nextln: v7 = stack_addr.i64 ss0
|
||||
; nextln: store notrap aligned v8, v7
|
||||
; nextln: store notrap aligned v9, v7+16
|
||||
; check: v10 = stack_addr.i64 ss0
|
||||
; nextln: v11 = load.f64x2 notrap aligned v10
|
||||
; nextln: v12 = load.f64x2 notrap aligned v10+16
|
||||
; nextln: adjust_sp_up_imm 64
|
||||
; nextln: v13 = x86_pop.i64
|
||||
; nextln: v13, v11, v12
|
||||
; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 csr [%rsp], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall {
|
||||
; nextln: ss0 = incoming_arg 48, offset -48
|
||||
; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rsp], v7: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]):
|
||||
; nextln: x86_push v7
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: adjust_sp_down_imm 32
|
||||
; nextln: store notrap aligned v8, v6+16
|
||||
; nextln: store notrap aligned v9, v6
|
||||
; nextln: v11 = load.f64x2 notrap aligned v6+16
|
||||
; nextln: v12 = load.f64x2 notrap aligned v6
|
||||
; nextln: adjust_sp_up_imm 32
|
||||
; nextln: v10 = x86_pop.i64
|
||||
; nextln: return v10, v11, v12
|
||||
; nextln: }
|
||||
|
||||
function %mixed_int_float(i64, f64, i64, f32) windows_fastcall {
|
||||
block0(v0: i64, v1: f64, v2: i64, v3: f32):
|
||||
return
|
||||
}
|
||||
; check: function %mixed_int_float(i64 [%rcx], f64 [%xmm1], i64 [%r8], f32 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
||||
; check: function %mixed_int_float(i64 [%rcx], f64 [%xmm1], i64 [%r8], f32 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
||||
; nextln: ss0 = incoming_arg 16, offset -16
|
||||
; check: block0(v0: i64 [%rcx], v1: f64 [%xmm1], v2: i64 [%r8], v3: f32 [%xmm3], v4: i64 [%rbp]):
|
||||
; nextln: x86_push v4
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: v5 = x86_pop.i64
|
||||
; nextln: return v5
|
||||
; nextln: }
|
||||
|
||||
function %ret_val_float(f32, f64, i64, i64) -> f64 windows_fastcall {
|
||||
block0(v0: f32, v1: f64, v2: i64, v3: i64):
|
||||
return v1
|
||||
}
|
||||
; check: function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall {
|
||||
; check: function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall {
|
||||
; nextln: ss0 = incoming_arg 16, offset -16
|
||||
; check: block0(v0: f32 [%xmm0], v1: f64 [%xmm1], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]):
|
||||
; nextln: x86_push v4
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: regmove v1, %xmm1 -> %xmm0
|
||||
; nextln: v5 = x86_pop.i64
|
||||
; nextln: return v1, v5
|
||||
; nextln: }
|
||||
|
||||
function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall {
|
||||
fn0 = %foo(i64, i64, i64, i64) -> i64
|
||||
fn1 = %foo2(i64, i64, i64, i64) -> i64
|
||||
fn0 = %foo(i64, i64, i64, i64) -> i64 windows_fastcall
|
||||
fn1 = %foo2(i64, i64, i64, i64) -> i64 windows_fastcall
|
||||
block0(v0: i64):
|
||||
v1 = load.i64 v0+0
|
||||
v2 = load.i64 v0+8
|
||||
@@ -94,3 +135,100 @@ block0(v0: i64):
|
||||
store.i64 v9, v0+72
|
||||
return v10
|
||||
}
|
||||
; check: function %internal_stack_a(i64 [%rcx], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 [%rax], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] windows_fastcall {
|
||||
; nextln: ss0 = spill_slot 8, offset -56
|
||||
; nextln: ss1 = spill_slot 8, offset -64
|
||||
; nextln: ss2 = spill_slot 8, offset -72
|
||||
; nextln: ss3 = spill_slot 8, offset -80
|
||||
; nextln: ss4 = spill_slot 8, offset -88
|
||||
; nextln: ss5 = spill_slot 8, offset -96
|
||||
; nextln: ss6 = spill_slot 8, offset -104
|
||||
; nextln: ss7 = spill_slot 8, offset -112
|
||||
; nextln: ss8 = spill_slot 8, offset -120
|
||||
; nextln: ss9 = spill_slot 8, offset -128
|
||||
; nextln: ss10 = incoming_arg 48, offset -48
|
||||
; nextln: ss11 = explicit_slot 32, offset -160
|
||||
; nextln: sig0 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall
|
||||
; nextln: sig1 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall
|
||||
; nextln: fn0 = %foo sig0
|
||||
; nextln: fn1 = %foo2 sig1
|
||||
; check: block0(v11: i64 [%rcx], v52: i64 [%rbp], v53: i64 [%r12], v54: i64 [%r13], v55: i64 [%r14], v56: i64 [%r15]):
|
||||
; nextln: x86_push v52
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: x86_push v53
|
||||
; nextln: x86_push v54
|
||||
; nextln: x86_push v55
|
||||
; nextln: x86_push v56
|
||||
; nextln: adjust_sp_down_imm 112
|
||||
; nextln: v0 = spill v11
|
||||
; nextln: v12 = copy_to_ssa.i64 %rcx
|
||||
; nextln: v13 = load.i64 v12
|
||||
; nextln: v1 = spill v13
|
||||
; nextln: v14 = fill_nop v0
|
||||
; nextln: v15 = load.i64 v14+8
|
||||
; nextln: v2 = spill v15
|
||||
; nextln: v16 = fill_nop v0
|
||||
; nextln: v17 = load.i64 v16+16
|
||||
; nextln: v3 = spill v17
|
||||
; nextln: v18 = fill_nop v0
|
||||
; nextln: v19 = load.i64 v18+24
|
||||
; nextln: v4 = spill v19
|
||||
; nextln: v20 = fill_nop v0
|
||||
; nextln: v21 = load.i64 v20+32
|
||||
; nextln: v5 = spill v21
|
||||
; nextln: v22 = fill_nop v0
|
||||
; nextln: v23 = load.i64 v22+40
|
||||
; nextln: v6 = spill v23
|
||||
; nextln: v24 = fill_nop v0
|
||||
; nextln: v25 = load.i64 v24+48
|
||||
; nextln: v7 = spill v25
|
||||
; nextln: v26 = fill_nop v0
|
||||
; nextln: v27 = load.i64 v26+56
|
||||
; nextln: v8 = spill v27
|
||||
; nextln: v28 = fill_nop v0
|
||||
; nextln: v29 = load.i64 v28+64
|
||||
; nextln: v9 = spill v29
|
||||
; nextln: v30 = fill v1
|
||||
; nextln: v31 = fill v2
|
||||
; nextln: v32 = fill v3
|
||||
; nextln: v33 = fill v4
|
||||
; nextln: regmove v30, %r15 -> %rcx
|
||||
; nextln: regmove v31, %r14 -> %rdx
|
||||
; nextln: regmove v32, %r13 -> %r8
|
||||
; nextln: regmove v33, %r12 -> %r9
|
||||
; nextln: v10 = call fn0(v30, v31, v32, v33)
|
||||
; nextln: v34 = fill v1
|
||||
; nextln: v35 = fill v0
|
||||
; nextln: store v34, v35+8
|
||||
; nextln: v36 = fill v2
|
||||
; nextln: v37 = fill_nop v0
|
||||
; nextln: store v36, v37+16
|
||||
; nextln: v38 = fill v3
|
||||
; nextln: v39 = fill_nop v0
|
||||
; nextln: store v38, v39+24
|
||||
; nextln: v40 = fill v4
|
||||
; nextln: v41 = fill_nop v0
|
||||
; nextln: store v40, v41+32
|
||||
; nextln: v42 = fill v5
|
||||
; nextln: v43 = fill_nop v0
|
||||
; nextln: store v42, v43+40
|
||||
; nextln: v44 = fill v6
|
||||
; nextln: v45 = fill_nop v0
|
||||
; nextln: store v44, v45+48
|
||||
; nextln: v46 = fill v7
|
||||
; nextln: v47 = fill_nop v0
|
||||
; nextln: store v46, v47+56
|
||||
; nextln: v48 = fill v8
|
||||
; nextln: v49 = fill_nop v0
|
||||
; nextln: store v48, v49+64
|
||||
; nextln: v50 = fill v9
|
||||
; nextln: v51 = fill_nop v0
|
||||
; nextln: store v50, v51+72
|
||||
; nextln: adjust_sp_up_imm 112
|
||||
; nextln: v61 = x86_pop.i64
|
||||
; nextln: v60 = x86_pop.i64
|
||||
; nextln: v59 = x86_pop.i64
|
||||
; nextln: v58 = x86_pop.i64
|
||||
; nextln: v57 = x86_pop.i64
|
||||
; nextln: return v10, v57, v58, v59, v60, v61
|
||||
; nextln: }
|
||||
@@ -3,13 +3,35 @@ set opt_level=speed_and_size
|
||||
set is_pic
|
||||
target x86_64 haswell
|
||||
|
||||
; check the unwind information with a function with no args
|
||||
function %no_args() windows_fastcall {
|
||||
; check the unwind information with a leaf function with no args
|
||||
function %no_args_leaf() windows_fastcall {
|
||||
block0:
|
||||
return
|
||||
}
|
||||
; sameln: version: 1
|
||||
; nextln: flags: 0
|
||||
; nextln: prologue size: 4
|
||||
; nextln: frame register: 5
|
||||
; nextln: frame register offset: 0
|
||||
; nextln: unwind codes: 2
|
||||
; nextln:
|
||||
; nextln: offset: 1
|
||||
; nextln: op: PushNonvolatileRegister
|
||||
; nextln: info: 5
|
||||
; nextln:
|
||||
; nextln: offset: 4
|
||||
; nextln: op: SetFramePointer
|
||||
; nextln: info: 0
|
||||
|
||||
; check the unwind information with a non-leaf function with no args
|
||||
function %no_args() windows_fastcall {
|
||||
fn0 = %foo()
|
||||
block0:
|
||||
call fn0()
|
||||
return
|
||||
}
|
||||
; sameln: version: 1
|
||||
; nextln: flags: 0
|
||||
; nextln: prologue size: 8
|
||||
; nextln: frame register: 5
|
||||
; nextln: frame register offset: 0
|
||||
@@ -51,7 +73,7 @@ block0:
|
||||
; nextln: offset: 17
|
||||
; nextln: op: LargeStackAlloc
|
||||
; nextln: info: 0
|
||||
; nextln: value: 12504 (u16)
|
||||
; nextln: value: 12500 (u16)
|
||||
|
||||
; check a function with large-sized stack alloc
|
||||
function %large_stack() windows_fastcall {
|
||||
@@ -77,7 +99,7 @@ block0:
|
||||
; nextln: offset: 17
|
||||
; nextln: op: LargeStackAlloc
|
||||
; nextln: info: 1
|
||||
; nextln: value: 524320 (u32)
|
||||
; nextln: value: 524288 (u32)
|
||||
|
||||
function %fpr_with_function_call(i64, i64) windows_fastcall {
|
||||
fn0 = %foo(f64, f64, i64, i64, i64) windows_fastcall;
|
||||
@@ -113,9 +135,9 @@ block0(v0: i64, v1: i64):
|
||||
;
|
||||
; sameln: version: 1
|
||||
; nextln: flags: 0
|
||||
; nextln: prologue size: 25
|
||||
; nextln: prologue size: 22
|
||||
; nextln: frame register: 5
|
||||
; nextln: frame register offset: 12
|
||||
; nextln: frame register offset: 2
|
||||
; nextln: unwind codes: 5
|
||||
; nextln:
|
||||
; nextln: offset: 1
|
||||
@@ -135,10 +157,10 @@ block0(v0: i64, v1: i64):
|
||||
; nextln: info: 0
|
||||
; nextln: value: 23 (u16)
|
||||
; nextln:
|
||||
; nextln: offset: 25
|
||||
; nextln: offset: 22
|
||||
; nextln: op: SaveXmm128
|
||||
; nextln: info: 15
|
||||
; nextln: value: 3 (u16)
|
||||
; nextln: value: 0 (u16)
|
||||
|
||||
; check a function that has CSRs
|
||||
function %lots_of_registers(i64, i64) windows_fastcall {
|
||||
@@ -191,9 +213,9 @@ block0(v0: i64, v1: i64):
|
||||
}
|
||||
; sameln: version: 1
|
||||
; nextln: flags: 0
|
||||
; nextln: prologue size: 41
|
||||
; nextln: prologue size: 35
|
||||
; nextln: frame register: 5
|
||||
; nextln: frame register offset: 10
|
||||
; nextln: frame register offset: 7
|
||||
; nextln: unwind codes: 13
|
||||
; nextln:
|
||||
; nextln: offset: 1
|
||||
@@ -234,19 +256,19 @@ block0(v0: i64, v1: i64):
|
||||
; nextln:
|
||||
; nextln: offset: 19
|
||||
; nextln: op: SmallStackAlloc
|
||||
; nextln: info: 12
|
||||
; nextln: info: 8
|
||||
; nextln:
|
||||
; nextln: offset: 31
|
||||
; nextln: offset: 24
|
||||
; nextln: op: SaveXmm128
|
||||
; nextln: info: 6
|
||||
; nextln: value: 0 (u16)
|
||||
; nextln: value: 2 (u16)
|
||||
; nextln:
|
||||
; nextln: offset: 36
|
||||
; nextln: offset: 29
|
||||
; nextln: op: SaveXmm128
|
||||
; nextln: info: 7
|
||||
; nextln: value: 1 (u16)
|
||||
; nextln:
|
||||
; nextln: offset: 41
|
||||
; nextln: offset: 35
|
||||
; nextln: op: SaveXmm128
|
||||
; nextln: info: 8
|
||||
; nextln: value: 2 (u16)
|
||||
; nextln: value: 0 (u16)
|
||||
|
||||
Reference in New Issue
Block a user