wasmtime/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif

test compile
set opt_level=speed_and_size
set is_pic
target x86_64 haswell

; check if for one arg we use the right register
function %one_arg(i64) windows_fastcall {
block0(v0: i64):
    return
}
; check:  function %one_arg(i64 [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
; nextln: ss0 = incoming_arg 16, offset -16
; check:  block0(v0: i64 [%rcx], v1: i64 [%rbp]):
; nextln: x86_push v1
; nextln: copy_special %rsp -> %rbp
; nextln: v2 = x86_pop.i64
; nextln: return v2
; nextln: }

; check if we still use registers for 4 arguments
function %four_args(i64, i64, i64, i64) windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
    return
}
; check:  function %four_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
; nextln: ss0 = incoming_arg 16, offset -16
; check:  block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]):
; nextln: x86_push v4
; nextln: copy_special %rsp -> %rbp
; nextln: v5 = x86_pop.i64
; nextln: return v5
; nextln: }

; check if float arguments are passed through XMM registers
function %four_float_args(f64, f64, f64, f64) windows_fastcall {
block0(v0: f64, v1: f64, v2: f64, v3: f64):
    return
}
; check:  function %four_float_args(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
; nextln: ss0 = incoming_arg 16, offset -16
; check:  block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v4: i64 [%rbp]):
; nextln: x86_push v4
; nextln: copy_special %rsp -> %rbp
; nextln: v5 = x86_pop.i64
; nextln: return v5
; nextln: }

; check if we use stack space for > 4 arguments
function %five_args(i64, i64, i64, i64, i64) windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64):
    return
}
; check:  function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
; nextln: ss0 = incoming_arg 8, offset 32
; nextln: ss1 = incoming_arg 16, offset -16
; check:  block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [ss0], v5: i64 [%rbp]):
; nextln: x86_push v5
; nextln: copy_special %rsp -> %rbp
; nextln: v6 = x86_pop.i64
; nextln: return v6
; nextln: }

; check that we preserve xmm6 and above if we're using them locally
function %float_callee_saves(f64, f64, f64, f64) windows_fastcall {
block0(v0: f64, v1: f64, v2: f64, v3: f64):
; explicitly use a callee-save register
[-, %xmm6]  v4 = fadd v0, v1
[-, %xmm7]  v5 = fadd v0, v1
    return
}
; check:  function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 csr [%rsp], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall {
; nextln: ss0 = incoming_arg 48, offset -48
; check:  block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rsp], v7: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]):
; nextln: x86_push v7
; nextln: copy_special %rsp -> %rbp
; nextln: adjust_sp_down_imm 32
; nextln: store notrap aligned v8, v6+16
; nextln: store notrap aligned v9, v6
; nextln: v11 = load.f64x2 notrap aligned v6+16
; nextln: v12 = load.f64x2 notrap aligned v6
; nextln: adjust_sp_up_imm 32
; nextln: v10 = x86_pop.i64
; nextln: return v10, v11, v12
; nextln: }

function %mixed_int_float(i64, f64, i64, f32) windows_fastcall {
block0(v0: i64, v1: f64, v2: i64, v3: f32):
    return
}
; check:  function %mixed_int_float(i64 [%rcx], f64 [%xmm1], i64 [%r8], f32 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
; nextln: ss0 = incoming_arg 16, offset -16
; check:  block0(v0: i64 [%rcx], v1: f64 [%xmm1], v2: i64 [%r8], v3: f32 [%xmm3], v4: i64 [%rbp]):
; nextln: x86_push v4
; nextln: copy_special %rsp -> %rbp
; nextln: v5 = x86_pop.i64
; nextln: return v5
; nextln: }

function %ret_val_float(f32, f64, i64, i64) -> f64 windows_fastcall {
block0(v0: f32, v1: f64, v2: i64, v3: i64):
    return v1
}
; check:  function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall {
; nextln: ss0 = incoming_arg 16, offset -16
; check:  block0(v0: f32 [%xmm0], v1: f64 [%xmm1], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]):
; nextln: x86_push v4
; nextln: copy_special %rsp -> %rbp
; nextln: regmove v1, %xmm1 -> %xmm0
; nextln: v5 = x86_pop.i64
; nextln: return v1, v5
; nextln: }

function %ret_val_i128(i64, i64) -> i128 windows_fastcall {
block0(v0: i64, v1: i64):
    v2 = iconcat v0, v1
    return v2
}
; check: function %ret_val_i128(i64 [%rdx], i64 [%r8], i64 sret [%rcx], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] windows_fastcall {

; check if i128 is passed by reference
function %i128_arg(i128) windows_fastcall {
block0(v0: i128):
    return
}
; check: function %i128_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {

; check if vector types are passed by reference
function %i32x4_arg(i32x4) windows_fastcall {
block0(v0: i32x4):
    return
}
; check: function %i32x4_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {

function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall {
  fn0 = %foo(i64, i64, i64, i64) -> i64 windows_fastcall
  fn1 = %foo2(i64, i64, i64, i64) -> i64 windows_fastcall
block0(v0: i64):
    v1 = load.i64 v0+0
    v2 = load.i64 v0+8
    v3 = load.i64 v0+16
    v4 = load.i64 v0+24
    v5 = load.i64 v0+32
    v6 = load.i64 v0+40
    v7 = load.i64 v0+48
    v8 = load.i64 v0+56
    v9 = load.i64 v0+64
    v10 = call fn0(v1, v2, v3, v4)
    store.i64 v1, v0+8
    store.i64 v2, v0+16
    store.i64 v3, v0+24
    store.i64 v4, v0+32
    store.i64 v5, v0+40
    store.i64 v6, v0+48
    store.i64 v7, v0+56
    store.i64 v8, v0+64
    store.i64 v9, v0+72
    return v10
}
; check:  function %internal_stack_a(i64 [%rcx], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 [%rax], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] windows_fastcall {
; nextln: ss0 = spill_slot 8, offset -56
; nextln: ss1 = spill_slot 8, offset -64
; nextln: ss2 = spill_slot 8, offset -72
; nextln: ss3 = spill_slot 8, offset -80
; nextln: ss4 = spill_slot 8, offset -88
; nextln: ss5 = spill_slot 8, offset -96
; nextln: ss6 = spill_slot 8, offset -104
; nextln: ss7 = spill_slot 8, offset -112
; nextln: ss8 = spill_slot 8, offset -120
; nextln: ss9 = spill_slot 8, offset -128
; nextln: ss10 = incoming_arg 48, offset -48
; nextln: ss11 = explicit_slot 32, offset -160
; nextln: sig0 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall
; nextln: sig1 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall
; nextln: fn0 = %foo sig0
; nextln: fn1 = %foo2 sig1
; check:  block0(v11: i64 [%rcx], v52: i64 [%rbp], v53: i64 [%r12], v54: i64 [%r13], v55: i64 [%r14], v56: i64 [%r15]):
; nextln: x86_push v52
; nextln: copy_special %rsp -> %rbp
; nextln: x86_push v53
; nextln: x86_push v54
; nextln: x86_push v55
; nextln: x86_push v56
; nextln: adjust_sp_down_imm 112
; nextln: v0 = spill v11
; nextln: v12 = copy_to_ssa.i64 %rcx
; nextln: v13 = load.i64 v12
; nextln: v1 = spill v13
; nextln: v14 = fill_nop v0
; nextln: v15 = load.i64 v14+8
; nextln: v2 = spill v15
; nextln: v16 = fill_nop v0
; nextln: v17 = load.i64 v16+16
; nextln: v3 = spill v17
; nextln: v18 = fill_nop v0
; nextln: v19 = load.i64 v18+24
; nextln: v4 = spill v19
; nextln: v20 = fill_nop v0
; nextln: v21 = load.i64 v20+32
; nextln: v5 = spill v21
; nextln: v22 = fill_nop v0
; nextln: v23 = load.i64 v22+40
; nextln: v6 = spill v23
; nextln: v24 = fill_nop v0
; nextln: v25 = load.i64 v24+48
; nextln: v7 = spill v25
; nextln: v26 = fill_nop v0
; nextln: v27 = load.i64 v26+56
; nextln: v8 = spill v27
; nextln: v28 = fill_nop v0
; nextln: v29 = load.i64 v28+64
; nextln: v9 = spill v29
; nextln: v30 = fill v1
; nextln: v31 = fill v2
; nextln: v32 = fill v3
; nextln: v33 = fill v4
; nextln: regmove v30, %r15 -> %rcx
; nextln: regmove v31, %r14 -> %rdx
; nextln: regmove v32, %r13 -> %r8
; nextln: regmove v33, %r12 -> %r9
; nextln: v10 = call fn0(v30, v31, v32, v33)
; nextln: v34 = fill v1
; nextln: v35 = fill v0
; nextln: store v34, v35+8
; nextln: v36 = fill v2
; nextln: v37 = fill_nop v0
; nextln: store v36, v37+16
; nextln: v38 = fill v3
; nextln: v39 = fill_nop v0
; nextln: store v38, v39+24
; nextln: v40 = fill v4
; nextln: v41 = fill_nop v0
; nextln: store v40, v41+32
; nextln: v42 = fill v5
; nextln: v43 = fill_nop v0
; nextln: store v42, v43+40
; nextln: v44 = fill v6
; nextln: v45 = fill_nop v0
; nextln: store v44, v45+48
; nextln: v46 = fill v7
; nextln: v47 = fill_nop v0
; nextln: store v46, v47+56
; nextln: v48 = fill v8
; nextln: v49 = fill_nop v0
; nextln: store v48, v49+64
; nextln: v50 = fill v9
; nextln: v51 = fill_nop v0
; nextln: store v50, v51+72
; nextln: adjust_sp_up_imm 112
; nextln: v61 = x86_pop.i64
; nextln: v60 = x86_pop.i64
; nextln: v59 = x86_pop.i64
; nextln: v58 = x86_pop.i64
; nextln: v57 = x86_pop.i64
; nextln: return v10, v57, v58, v59, v60, v61
; nextln: }