wasmtime/cranelift/filetests/filetests/isa/x64/fastcall.clif

test compile precise-output
set enable_llvm_abi_extensions=true
set unwind_info=true
target x86_64

function %f0(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
  return v0
}

; VCode_ShowWithRRU {{
;   Entry block: 0
; Block 0:
;   (original IR block: block0)
;   (instruction range: 0 .. 8)
;   Inst 0:   pushq   %rbp
;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;   Inst 2:   movq    %rsp, %rbp
;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;   Inst 4:   movq    %rcx, %rax
;   Inst 5:   movq    %rbp, %rsp
;   Inst 6:   popq    %rbp
;   Inst 7:   ret
; }}

function %f1(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
  return v1
}

; VCode_ShowWithRRU {{
;   Entry block: 0
; Block 0:
;   (original IR block: block0)
;   (instruction range: 0 .. 8)
;   Inst 0:   pushq   %rbp
;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;   Inst 2:   movq    %rsp, %rbp
;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;   Inst 4:   movq    %rdx, %rax
;   Inst 5:   movq    %rbp, %rsp
;   Inst 6:   popq    %rbp
;   Inst 7:   ret
; }}

function %f2(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
  return v2
}

; VCode_ShowWithRRU {{
;   Entry block: 0
; Block 0:
;   (original IR block: block0)
;   (instruction range: 0 .. 8)
;   Inst 0:   pushq   %rbp
;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;   Inst 2:   movq    %rsp, %rbp
;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;   Inst 4:   movq    %r8, %rax
;   Inst 5:   movq    %rbp, %rsp
;   Inst 6:   popq    %rbp
;   Inst 7:   ret
; }}

function %f3(i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
  return v3
}

; VCode_ShowWithRRU {{
;   Entry block: 0
; Block 0:
;   (original IR block: block0)
;   (instruction range: 0 .. 8)
;   Inst 0:   pushq   %rbp
;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;   Inst 2:   movq    %rsp, %rbp
;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;   Inst 4:   movq    %r9, %rax
;   Inst 5:   movq    %rbp, %rsp
;   Inst 6:   popq    %rbp
;   Inst 7:   ret
; }}

function %f4(i64, i64, f64, i64) -> f64 windows_fastcall {
block0(v0: i64, v1: i64, v2: f64, v3: i64):
  return v2
}

; VCode_ShowWithRRU {{
;   Entry block: 0
; Block 0:
;   (original IR block: block0)
;   (instruction range: 0 .. 8)
;   Inst 0:   pushq   %rbp
;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;   Inst 2:   movq    %rsp, %rbp
;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;   Inst 4:   movaps  %xmm2, %xmm0
;   Inst 5:   movq    %rbp, %rsp
;   Inst 6:   popq    %rbp
;   Inst 7:   ret
; }}

function %f5(i64, i64, f64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: f64, v3: i64):
  return v3
}

; VCode_ShowWithRRU {{
;   Entry block: 0
; Block 0:
;   (original IR block: block0)
;   (instruction range: 0 .. 8)
;   Inst 0:   pushq   %rbp
;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;   Inst 2:   movq    %rsp, %rbp
;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;   Inst 4:   movq    %r9, %rax
;   Inst 5:   movq    %rbp, %rsp
;   Inst 6:   popq    %rbp
;   Inst 7:   ret
; }}

function %f6(i64, i64, i64, i64, i64, i64) -> i64 windows_fastcall {
block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
  return v5

;; This is truly odd (because of the regalloc ordering), but it works. Note
;; that we're spilling and using rsi, which is a callee-save in fastcall, because
;; the regalloc order is optimized for SysV. Also note that because we copy args
;; out of their input locations to separate vregs, we have a spurious load
;; from [rbp+48]. Ordinarily these moves are coalesced because the dest vreg
;; is allocated as a caller-save (volatile), but here again we allocate rsi
;; first and so have to spill it (and consequently don't coalesce).
;;
;; TODO(#2704): fix regalloc's register priority ordering!
}

; VCode_ShowWithRRU {{
;   Entry block: 0
; Block 0:
;   (original IR block: block0)
;   (instruction range: 0 .. 15)
;   Inst 0:   pushq   %rbp
;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;   Inst 2:   movq    %rsp, %rbp
;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 16 }
;   Inst 4:   subq    $16, %rsp
;   Inst 5:   movq    %rsi, 0(%rsp)
;   Inst 6:   unwind SaveReg { clobber_offset: 0, reg: r16J }
;   Inst 7:   movq    48(%rbp), %rsi
;   Inst 8:   movq    56(%rbp), %rsi
;   Inst 9:   movq    %rsi, %rax
;   Inst 10:   movq    0(%rsp), %rsi
;   Inst 11:   addq    $16, %rsp
;   Inst 12:   movq    %rbp, %rsp
;   Inst 13:   popq    %rbp
;   Inst 14:   ret
; }}

function %f7(i128, i64, i128, i128) -> i128 windows_fastcall {
block0(v0: i128, v1: i64, v2: i128, v3: i128):
  return v3
}

; VCode_ShowWithRRU {{
;   Entry block: 0
; Block 0:
;   (original IR block: block0)
;   (instruction range: 0 .. 20)
;   Inst 0:   pushq   %rbp
;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;   Inst 2:   movq    %rsp, %rbp
;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 16 }
;   Inst 4:   subq    $16, %rsp
;   Inst 5:   movq    %rsi, 0(%rsp)
;   Inst 6:   unwind SaveReg { clobber_offset: 0, reg: r16J }
;   Inst 7:   movq    %rdi, 8(%rsp)
;   Inst 8:   unwind SaveReg { clobber_offset: 8, reg: r17J }
;   Inst 9:   movq    48(%rbp), %rsi
;   Inst 10:   movq    56(%rbp), %rsi
;   Inst 11:   movq    64(%rbp), %rdi
;   Inst 12:   movq    %rsi, %rax
;   Inst 13:   movq    %rdi, %rdx
;   Inst 14:   movq    0(%rsp), %rsi
;   Inst 15:   movq    8(%rsp), %rdi
;   Inst 16:   addq    $16, %rsp
;   Inst 17:   movq    %rbp, %rsp
;   Inst 18:   popq    %rbp
;   Inst 19:   ret
; }}

function %f8(i64) -> i64 windows_fastcall {
  sig0 = (i64, i64, f64, f64, i64, i64) -> i64 windows_fastcall
  fn0 = %g sig0

block0(v0: i64):
  v1 = fcvt_from_sint.f64 v0
  v2 = call fn0(v0, v0, v1, v1, v0, v0)
  return v2
}

; VCode_ShowWithRRU {{
;   Entry block: 0
; Block 0:
;   (original IR block: block0)
;   (instruction range: 0 .. 25)
;   Inst 0:   pushq   %rbp
;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;   Inst 2:   movq    %rsp, %rbp
;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 16 }
;   Inst 4:   subq    $16, %rsp
;   Inst 5:   movq    %rsi, 0(%rsp)
;   Inst 6:   unwind SaveReg { clobber_offset: 0, reg: r16J }
;   Inst 7:   movq    %rcx, %rsi
;   Inst 8:   cvtsi2sd %rsi, %xmm3
;   Inst 9:   subq    $48, %rsp
;   Inst 10:   virtual_sp_offset_adjust 48
;   Inst 11:   movq    %rsi, %rcx
;   Inst 12:   movq    %rsi, %rdx
;   Inst 13:   movaps  %xmm3, %xmm2
;   Inst 14:   movq    %rsi, 32(%rsp)
;   Inst 15:   movq    %rsi, 40(%rsp)
;   Inst 16:   load_ext_name %g+0, %rsi
;   Inst 17:   call    *%rsi
;   Inst 18:   addq    $48, %rsp
;   Inst 19:   virtual_sp_offset_adjust -48
;   Inst 20:   movq    0(%rsp), %rsi
;   Inst 21:   addq    $16, %rsp
;   Inst 22:   movq    %rbp, %rsp
;   Inst 23:   popq    %rbp
;   Inst 24:   ret
; }}

function %f9(i64) -> f64 windows_fastcall {
block0(v0: i64):
  v1 = load.f64 v0+0
  v2 = load.f64 v0+8
  v3 = load.f64 v0+16
  v4 = load.f64 v0+24
  v5 = load.f64 v0+32
  v6 = load.f64 v0+40
  v7 = load.f64 v0+48
  v8 = load.f64 v0+56
  v9 = load.f64 v0+64
  v10 = load.f64 v0+72
  v11 = load.f64 v0+80
  v12 = load.f64 v0+88
  v13 = load.f64 v0+96
  v14 = load.f64 v0+104
  v15 = load.f64 v0+112
  v16 = load.f64 v0+120
  v17 = load.f64 v0+128
  v18 = load.f64 v0+136
  v19 = load.f64 v0+144
  v20 = load.f64 v0+152

  v21 = fadd.f64 v1, v2
  v22 = fadd.f64 v3, v4
  v23 = fadd.f64 v5, v6
  v24 = fadd.f64 v7, v8
  v25 = fadd.f64 v9, v10
  v26 = fadd.f64 v11, v12
  v27 = fadd.f64 v13, v14
  v28 = fadd.f64 v15, v16
  v29 = fadd.f64 v17, v18
  v30 = fadd.f64 v19, v20

  v31 = fadd.f64 v21, v22
  v32 = fadd.f64 v23, v24
  v33 = fadd.f64 v25, v26
  v34 = fadd.f64 v27, v28
  v35 = fadd.f64 v29, v30

  v36 = fadd.f64 v31, v32
  v37 = fadd.f64 v33, v34

  v38 = fadd.f64 v36, v37

  v39 = fadd.f64 v38, v35

  return v39
}

; VCode_ShowWithRRU {{
;   Entry block: 0
; Block 0:
;   (original IR block: block0)
;   (instruction range: 0 .. 85)
;   Inst 0:   pushq   %rbp
;   Inst 1:   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;   Inst 2:   movq    %rsp, %rbp
;   Inst 3:   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 }
;   Inst 4:   subq    $224, %rsp
;   Inst 5:   movdqu  %xmm6, 64(%rsp)
;   Inst 6:   unwind SaveReg { clobber_offset: 0, reg: r6V }
;   Inst 7:   movdqu  %xmm7, 80(%rsp)
;   Inst 8:   unwind SaveReg { clobber_offset: 16, reg: r7V }
;   Inst 9:   movdqu  %xmm8, 96(%rsp)
;   Inst 10:   unwind SaveReg { clobber_offset: 32, reg: r8V }
;   Inst 11:   movdqu  %xmm9, 112(%rsp)
;   Inst 12:   unwind SaveReg { clobber_offset: 48, reg: r9V }
;   Inst 13:   movdqu  %xmm10, 128(%rsp)
;   Inst 14:   unwind SaveReg { clobber_offset: 64, reg: r10V }
;   Inst 15:   movdqu  %xmm11, 144(%rsp)
;   Inst 16:   unwind SaveReg { clobber_offset: 80, reg: r11V }
;   Inst 17:   movdqu  %xmm12, 160(%rsp)
;   Inst 18:   unwind SaveReg { clobber_offset: 96, reg: r12V }
;   Inst 19:   movdqu  %xmm13, 176(%rsp)
;   Inst 20:   unwind SaveReg { clobber_offset: 112, reg: r13V }
;   Inst 21:   movdqu  %xmm14, 192(%rsp)
;   Inst 22:   unwind SaveReg { clobber_offset: 128, reg: r14V }
;   Inst 23:   movdqu  %xmm15, 208(%rsp)
;   Inst 24:   unwind SaveReg { clobber_offset: 144, reg: r15V }
;   Inst 25:   movsd   0(%rcx), %xmm0
;   Inst 26:   movdqu  %xmm0, rsp(48 + virtual offset)
;   Inst 27:   movsd   8(%rcx), %xmm1
;   Inst 28:   movsd   16(%rcx), %xmm0
;   Inst 29:   movdqu  %xmm0, rsp(32 + virtual offset)
;   Inst 30:   movsd   24(%rcx), %xmm3
;   Inst 31:   movsd   32(%rcx), %xmm4
;   Inst 32:   movsd   40(%rcx), %xmm5
;   Inst 33:   movsd   48(%rcx), %xmm6
;   Inst 34:   movsd   56(%rcx), %xmm7
;   Inst 35:   movsd   64(%rcx), %xmm8
;   Inst 36:   movsd   72(%rcx), %xmm9
;   Inst 37:   movsd   80(%rcx), %xmm10
;   Inst 38:   movsd   88(%rcx), %xmm11
;   Inst 39:   movsd   96(%rcx), %xmm12
;   Inst 40:   movsd   104(%rcx), %xmm13
;   Inst 41:   movsd   112(%rcx), %xmm14
;   Inst 42:   movsd   120(%rcx), %xmm15
;   Inst 43:   movsd   128(%rcx), %xmm2
;   Inst 44:   movdqu  %xmm2, rsp(0 + virtual offset)
;   Inst 45:   movsd   136(%rcx), %xmm2
;   Inst 46:   movsd   144(%rcx), %xmm0
;   Inst 47:   movdqu  %xmm0, rsp(16 + virtual offset)
;   Inst 48:   movdqu  rsp(48 + virtual offset), %xmm0
;   Inst 49:   addsd   %xmm1, %xmm0
;   Inst 50:   movdqu  rsp(32 + virtual offset), %xmm1
;   Inst 51:   addsd   %xmm3, %xmm1
;   Inst 52:   addsd   %xmm5, %xmm4
;   Inst 53:   addsd   %xmm7, %xmm6
;   Inst 54:   addsd   %xmm9, %xmm8
;   Inst 55:   addsd   %xmm11, %xmm10
;   Inst 56:   addsd   %xmm13, %xmm12
;   Inst 57:   addsd   %xmm15, %xmm14
;   Inst 58:   movdqu  rsp(0 + virtual offset), %xmm3
;   Inst 59:   addsd   %xmm2, %xmm3
;   Inst 60:   movdqu  rsp(16 + virtual offset), %xmm2
;   Inst 61:   addsd   152(%rcx), %xmm2
;   Inst 62:   addsd   %xmm1, %xmm0
;   Inst 63:   addsd   %xmm6, %xmm4
;   Inst 64:   addsd   %xmm10, %xmm8
;   Inst 65:   addsd   %xmm14, %xmm12
;   Inst 66:   addsd   %xmm2, %xmm3
;   Inst 67:   addsd   %xmm4, %xmm0
;   Inst 68:   addsd   %xmm12, %xmm8
;   Inst 69:   addsd   %xmm8, %xmm0
;   Inst 70:   addsd   %xmm3, %xmm0
;   Inst 71:   movdqu  64(%rsp), %xmm6
;   Inst 72:   movdqu  80(%rsp), %xmm7
;   Inst 73:   movdqu  96(%rsp), %xmm8
;   Inst 74:   movdqu  112(%rsp), %xmm9
;   Inst 75:   movdqu  128(%rsp), %xmm10
;   Inst 76:   movdqu  144(%rsp), %xmm11
;   Inst 77:   movdqu  160(%rsp), %xmm12
;   Inst 78:   movdqu  176(%rsp), %xmm13
;   Inst 79:   movdqu  192(%rsp), %xmm14
;   Inst 80:   movdqu  208(%rsp), %xmm15
;   Inst 81:   addq    $224, %rsp
;   Inst 82:   movq    %rbp, %rsp
;   Inst 83:   popq    %rbp
;   Inst 84:   ret
; }}