255 lines
8.9 KiB
Plaintext
255 lines
8.9 KiB
Plaintext
test compile
|
|
set opt_level=speed_and_size
|
|
set is_pic
|
|
target x86_64 haswell
|
|
|
|
; check if for one arg we use the right register
|
|
function %one_arg(i64) windows_fastcall {
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
; check: function %one_arg(i64 [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
|
; nextln: ss0 = incoming_arg 16, offset -16
|
|
; check: block0(v0: i64 [%rcx], v1: i64 [%rbp]):
|
|
; nextln: x86_push v1
|
|
; nextln: copy_special %rsp -> %rbp
|
|
; nextln: v2 = x86_pop.i64
|
|
; nextln: return v2
|
|
; nextln: }
|
|
|
|
; check if we still use registers for 4 arguments
|
|
function %four_args(i64, i64, i64, i64) windows_fastcall {
|
|
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
|
return
|
|
}
|
|
; check: function %four_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
|
; nextln: ss0 = incoming_arg 16, offset -16
|
|
; check: block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]):
|
|
; nextln: x86_push v4
|
|
; nextln: copy_special %rsp -> %rbp
|
|
; nextln: v5 = x86_pop.i64
|
|
; nextln: return v5
|
|
; nextln: }
|
|
|
|
; check if float arguments are passed through XMM registers
|
|
function %four_float_args(f64, f64, f64, f64) windows_fastcall {
|
|
block0(v0: f64, v1: f64, v2: f64, v3: f64):
|
|
return
|
|
}
|
|
; check: function %four_float_args(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
|
; nextln: ss0 = incoming_arg 16, offset -16
|
|
; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v4: i64 [%rbp]):
|
|
; nextln: x86_push v4
|
|
; nextln: copy_special %rsp -> %rbp
|
|
; nextln: v5 = x86_pop.i64
|
|
; nextln: return v5
|
|
; nextln: }
|
|
|
|
; check if we use stack space for > 4 arguments
|
|
function %five_args(i64, i64, i64, i64, i64) windows_fastcall {
|
|
block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64):
|
|
return
|
|
}
|
|
; check: function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
|
; nextln: ss0 = incoming_arg 8, offset 32
|
|
; nextln: ss1 = incoming_arg 16, offset -16
|
|
; check: block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [ss0], v5: i64 [%rbp]):
|
|
; nextln: x86_push v5
|
|
; nextln: copy_special %rsp -> %rbp
|
|
; nextln: v6 = x86_pop.i64
|
|
; nextln: return v6
|
|
; nextln: }
|
|
|
|
; check that we preserve xmm6 and above if we're using them locally
|
|
function %float_callee_saves(f64, f64, f64, f64) windows_fastcall {
|
|
block0(v0: f64, v1: f64, v2: f64, v3: f64):
|
|
; explicitly use a callee-save register
|
|
[-, %xmm6] v4 = fadd v0, v1
|
|
[-, %xmm7] v5 = fadd v0, v1
|
|
return
|
|
}
|
|
; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 csr [%rsp], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall {
|
|
; nextln: ss0 = incoming_arg 48, offset -48
|
|
; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rsp], v7: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]):
|
|
; nextln: x86_push v7
|
|
; nextln: copy_special %rsp -> %rbp
|
|
; nextln: adjust_sp_down_imm 32
|
|
; nextln: store notrap aligned v8, v6+16
|
|
; nextln: store notrap aligned v9, v6
|
|
; nextln: v11 = load.f64x2 notrap aligned v6+16
|
|
; nextln: v12 = load.f64x2 notrap aligned v6
|
|
; nextln: adjust_sp_up_imm 32
|
|
; nextln: v10 = x86_pop.i64
|
|
; nextln: return v10, v11, v12
|
|
; nextln: }
|
|
|
|
function %mixed_int_float(i64, f64, i64, f32) windows_fastcall {
|
|
block0(v0: i64, v1: f64, v2: i64, v3: f32):
|
|
return
|
|
}
|
|
; check: function %mixed_int_float(i64 [%rcx], f64 [%xmm1], i64 [%r8], f32 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
|
; nextln: ss0 = incoming_arg 16, offset -16
|
|
; check: block0(v0: i64 [%rcx], v1: f64 [%xmm1], v2: i64 [%r8], v3: f32 [%xmm3], v4: i64 [%rbp]):
|
|
; nextln: x86_push v4
|
|
; nextln: copy_special %rsp -> %rbp
|
|
; nextln: v5 = x86_pop.i64
|
|
; nextln: return v5
|
|
; nextln: }
|
|
|
|
function %ret_val_float(f32, f64, i64, i64) -> f64 windows_fastcall {
|
|
block0(v0: f32, v1: f64, v2: i64, v3: i64):
|
|
return v1
|
|
}
|
|
; check: function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall {
|
|
; nextln: ss0 = incoming_arg 16, offset -16
|
|
; check: block0(v0: f32 [%xmm0], v1: f64 [%xmm1], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]):
|
|
; nextln: x86_push v4
|
|
; nextln: copy_special %rsp -> %rbp
|
|
; nextln: regmove v1, %xmm1 -> %xmm0
|
|
; nextln: v5 = x86_pop.i64
|
|
; nextln: return v1, v5
|
|
; nextln: }
|
|
|
|
function %ret_val_i128(i64, i64) -> i128 windows_fastcall {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = iconcat v0, v1
|
|
return v2
|
|
}
|
|
; check: function %ret_val_i128(i64 [%rdx], i64 [%r8], i64 sret [%rcx], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] windows_fastcall {
|
|
|
|
; check if i128 is passed by reference
|
|
function %i128_arg(i128) windows_fastcall {
|
|
block0(v0: i128):
|
|
return
|
|
}
|
|
; check: function %i128_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
|
|
|
; check if vector types are passed by reference
|
|
function %i32x4_arg(i32x4) windows_fastcall {
|
|
block0(v0: i32x4):
|
|
return
|
|
}
|
|
; check: function %i32x4_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
|
|
|
|
function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall {
|
|
fn0 = %foo(i64, i64, i64, i64) -> i64 windows_fastcall
|
|
fn1 = %foo2(i64, i64, i64, i64) -> i64 windows_fastcall
|
|
block0(v0: i64):
|
|
v1 = load.i64 v0+0
|
|
v2 = load.i64 v0+8
|
|
v3 = load.i64 v0+16
|
|
v4 = load.i64 v0+24
|
|
v5 = load.i64 v0+32
|
|
v6 = load.i64 v0+40
|
|
v7 = load.i64 v0+48
|
|
v8 = load.i64 v0+56
|
|
v9 = load.i64 v0+64
|
|
v10 = call fn0(v1, v2, v3, v4)
|
|
store.i64 v1, v0+8
|
|
store.i64 v2, v0+16
|
|
store.i64 v3, v0+24
|
|
store.i64 v4, v0+32
|
|
store.i64 v5, v0+40
|
|
store.i64 v6, v0+48
|
|
store.i64 v7, v0+56
|
|
store.i64 v8, v0+64
|
|
store.i64 v9, v0+72
|
|
return v10
|
|
}
|
|
; check: function %internal_stack_a(i64 [%rcx], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 [%rax], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] windows_fastcall {
|
|
; nextln: ss0 = spill_slot 8, offset -56
|
|
; nextln: ss1 = spill_slot 8, offset -64
|
|
; nextln: ss2 = spill_slot 8, offset -72
|
|
; nextln: ss3 = spill_slot 8, offset -80
|
|
; nextln: ss4 = spill_slot 8, offset -88
|
|
; nextln: ss5 = spill_slot 8, offset -96
|
|
; nextln: ss6 = spill_slot 8, offset -104
|
|
; nextln: ss7 = spill_slot 8, offset -112
|
|
; nextln: ss8 = spill_slot 8, offset -120
|
|
; nextln: ss9 = spill_slot 8, offset -128
|
|
; nextln: ss10 = incoming_arg 48, offset -48
|
|
; nextln: ss11 = explicit_slot 32, offset -160
|
|
; nextln: sig0 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall
|
|
; nextln: sig1 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall
|
|
; nextln: fn0 = %foo sig0
|
|
; nextln: fn1 = %foo2 sig1
|
|
; check: block0(v11: i64 [%rcx], v52: i64 [%rbp], v53: i64 [%r12], v54: i64 [%r13], v55: i64 [%r14], v56: i64 [%r15]):
|
|
; nextln: x86_push v52
|
|
; nextln: copy_special %rsp -> %rbp
|
|
; nextln: x86_push v53
|
|
; nextln: x86_push v54
|
|
; nextln: x86_push v55
|
|
; nextln: x86_push v56
|
|
; nextln: adjust_sp_down_imm 112
|
|
; nextln: v0 = spill v11
|
|
; nextln: v12 = copy_to_ssa.i64 %rcx
|
|
; nextln: v13 = load.i64 v12
|
|
; nextln: v1 = spill v13
|
|
; nextln: v14 = fill_nop v0
|
|
; nextln: v15 = load.i64 v14+8
|
|
; nextln: v2 = spill v15
|
|
; nextln: v16 = fill_nop v0
|
|
; nextln: v17 = load.i64 v16+16
|
|
; nextln: v3 = spill v17
|
|
; nextln: v18 = fill_nop v0
|
|
; nextln: v19 = load.i64 v18+24
|
|
; nextln: v4 = spill v19
|
|
; nextln: v20 = fill_nop v0
|
|
; nextln: v21 = load.i64 v20+32
|
|
; nextln: v5 = spill v21
|
|
; nextln: v22 = fill_nop v0
|
|
; nextln: v23 = load.i64 v22+40
|
|
; nextln: v6 = spill v23
|
|
; nextln: v24 = fill_nop v0
|
|
; nextln: v25 = load.i64 v24+48
|
|
; nextln: v7 = spill v25
|
|
; nextln: v26 = fill_nop v0
|
|
; nextln: v27 = load.i64 v26+56
|
|
; nextln: v8 = spill v27
|
|
; nextln: v28 = fill_nop v0
|
|
; nextln: v29 = load.i64 v28+64
|
|
; nextln: v9 = spill v29
|
|
; nextln: v30 = fill v1
|
|
; nextln: v31 = fill v2
|
|
; nextln: v32 = fill v3
|
|
; nextln: v33 = fill v4
|
|
; nextln: regmove v30, %r15 -> %rcx
|
|
; nextln: regmove v31, %r14 -> %rdx
|
|
; nextln: regmove v32, %r13 -> %r8
|
|
; nextln: regmove v33, %r12 -> %r9
|
|
; nextln: v10 = call fn0(v30, v31, v32, v33)
|
|
; nextln: v34 = fill v1
|
|
; nextln: v35 = fill v0
|
|
; nextln: store v34, v35+8
|
|
; nextln: v36 = fill v2
|
|
; nextln: v37 = fill_nop v0
|
|
; nextln: store v36, v37+16
|
|
; nextln: v38 = fill v3
|
|
; nextln: v39 = fill_nop v0
|
|
; nextln: store v38, v39+24
|
|
; nextln: v40 = fill v4
|
|
; nextln: v41 = fill_nop v0
|
|
; nextln: store v40, v41+32
|
|
; nextln: v42 = fill v5
|
|
; nextln: v43 = fill_nop v0
|
|
; nextln: store v42, v43+40
|
|
; nextln: v44 = fill v6
|
|
; nextln: v45 = fill_nop v0
|
|
; nextln: store v44, v45+48
|
|
; nextln: v46 = fill v7
|
|
; nextln: v47 = fill_nop v0
|
|
; nextln: store v46, v47+56
|
|
; nextln: v48 = fill v8
|
|
; nextln: v49 = fill_nop v0
|
|
; nextln: store v48, v49+64
|
|
; nextln: v50 = fill v9
|
|
; nextln: v51 = fill_nop v0
|
|
; nextln: store v50, v51+72
|
|
; nextln: adjust_sp_up_imm 112
|
|
; nextln: v61 = x86_pop.i64
|
|
; nextln: v60 = x86_pop.i64
|
|
; nextln: v59 = x86_pop.i64
|
|
; nextln: v58 = x86_pop.i64
|
|
; nextln: v57 = x86_pop.i64
|
|
; nextln: return v10, v57, v58, v59, v60, v61
|
|
; nextln: } |