Merge pull request #2678 from cfallin/x64-fastcall
x86-64 Windows fastcall ABI support.
This commit is contained in:
299
cranelift/filetests/filetests/isa/x64/fastcall.clif
Normal file
299
cranelift/filetests/filetests/isa/x64/fastcall.clif
Normal file
@@ -0,0 +1,299 @@
|
||||
test compile
|
||||
set enable_llvm_abi_extensions=true
|
||||
target x86_64
|
||||
feature "experimental_x64"
|
||||
|
||||
function %f0(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %rcx, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f1(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %rdx, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f2(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %r8, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f3(i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64):
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %r9, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f4(i64, i64, f64, i64) -> f64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: f64, v3: i64):
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movaps %xmm2, %xmm0
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f5(i64, i64, f64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: f64, v3: i64):
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movq %r9, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f6(i64, i64, i64, i64, i64, i64) -> i64 windows_fastcall {
|
||||
block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
|
||||
return v5
|
||||
}
|
||||
|
||||
;; This is truly odd (because of the regalloc ordering), but it works. Note
|
||||
;; that we're spilling and using rsi, which is a callee-save in fastcall, because
|
||||
;; the regalloc order is optimized for SysV. Also note that because we copy args
|
||||
;; out of their input locations to separate vregs, we have a spurious load
|
||||
;; from [rbp+48]. Ordinarily these moves are coalesced because the dest vreg
|
||||
;; is allocated as a caller-save (volatile), but here again we allocate rsi
|
||||
;; first and so have to spill it (and consequently don't coalesce).
|
||||
;;
|
||||
;; TODO(#2704): fix regalloc's register priority ordering!
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: subq $$16, %rsp
|
||||
; nextln: movq %rsi, 0(%rsp)
|
||||
; nextln: virtual_sp_offset_adjust 16
|
||||
; nextln: movq 48(%rbp), %rsi
|
||||
; nextln: movq 56(%rbp), %rsi
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq 0(%rsp), %rsi
|
||||
; nextln: addq $$16, %rsp
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f7(i128, i64, i128, i128) -> i128 windows_fastcall {
|
||||
block0(v0: i128, v1: i64, v2: i128, v3: i128):
|
||||
return v3
|
||||
}
|
||||
|
||||
;; Again, terrible regalloc behavior. The important part is that `v3` comes
|
||||
;; from [rbp+56] and [rbp+64], i.e., the second and third non-shadow
|
||||
;; stack slot.
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: subq $$16, %rsp
|
||||
; nextln: movq %rsi, 0(%rsp)
|
||||
; nextln: movq %rdi, 8(%rsp)
|
||||
; nextln: virtual_sp_offset_adjust 16
|
||||
; nextln: movq 48(%rbp), %rsi
|
||||
; nextln: movq 56(%rbp), %rsi
|
||||
; nextln: movq 64(%rbp), %rdi
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq %rdi, %rdx
|
||||
; nextln: movq 0(%rsp), %rsi
|
||||
; nextln: movq 8(%rsp), %rdi
|
||||
; nextln: addq $$16, %rsp
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f8(i64) -> i64 windows_fastcall {
|
||||
sig0 = (i64, i64, f64, f64, i64, i64) -> i64 windows_fastcall
|
||||
fn0 = %g sig0
|
||||
|
||||
block0(v0: i64):
|
||||
v1 = fcvt_from_sint.f64 v0
|
||||
v2 = call fn0(v0, v0, v1, v1, v0, v0)
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: subq $$16, %rsp
|
||||
; nextln: movq %rsi, 0(%rsp)
|
||||
; nextln: virtual_sp_offset_adjust 16
|
||||
; nextln: movq %rcx, %rsi
|
||||
; nextln: cvtsi2sd %rsi, %xmm3
|
||||
; nextln: subq $$48, %rsp
|
||||
; nextln: virtual_sp_offset_adjust 48
|
||||
; nextln: movq %rsi, %rcx
|
||||
; nextln: movq %rsi, %rdx
|
||||
; nextln: movaps %xmm3, %xmm2
|
||||
; nextln: movq %rsi, 32(%rsp)
|
||||
; nextln: movq %rsi, 40(%rsp)
|
||||
; nextln: load_ext_name %g+0, %rsi
|
||||
; nextln: call *%rsi
|
||||
; nextln: addq $$48, %rsp
|
||||
; nextln: virtual_sp_offset_adjust -48
|
||||
; nextln: movq 0(%rsp), %rsi
|
||||
; nextln: addq $$16, %rsp
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
function %f9(i64) -> f64 windows_fastcall {
|
||||
block0(v0: i64):
|
||||
v1 = load.f64 v0+0
|
||||
v2 = load.f64 v0+8
|
||||
v3 = load.f64 v0+16
|
||||
v4 = load.f64 v0+24
|
||||
v5 = load.f64 v0+32
|
||||
v6 = load.f64 v0+40
|
||||
v7 = load.f64 v0+48
|
||||
v8 = load.f64 v0+56
|
||||
v9 = load.f64 v0+64
|
||||
v10 = load.f64 v0+72
|
||||
v11 = load.f64 v0+80
|
||||
v12 = load.f64 v0+88
|
||||
v13 = load.f64 v0+96
|
||||
v14 = load.f64 v0+104
|
||||
v15 = load.f64 v0+112
|
||||
v16 = load.f64 v0+120
|
||||
v17 = load.f64 v0+128
|
||||
v18 = load.f64 v0+136
|
||||
v19 = load.f64 v0+144
|
||||
v20 = load.f64 v0+152
|
||||
|
||||
v21 = fadd.f64 v1, v2
|
||||
v22 = fadd.f64 v3, v4
|
||||
v23 = fadd.f64 v5, v6
|
||||
v24 = fadd.f64 v7, v8
|
||||
v25 = fadd.f64 v9, v10
|
||||
v26 = fadd.f64 v11, v12
|
||||
v27 = fadd.f64 v13, v14
|
||||
v28 = fadd.f64 v15, v16
|
||||
v29 = fadd.f64 v17, v18
|
||||
v30 = fadd.f64 v19, v20
|
||||
|
||||
v31 = fadd.f64 v21, v22
|
||||
v32 = fadd.f64 v23, v24
|
||||
v33 = fadd.f64 v25, v26
|
||||
v34 = fadd.f64 v27, v28
|
||||
v35 = fadd.f64 v29, v30
|
||||
|
||||
v36 = fadd.f64 v31, v32
|
||||
v37 = fadd.f64 v33, v34
|
||||
|
||||
v38 = fadd.f64 v36, v37
|
||||
|
||||
v39 = fadd.f64 v38, v35
|
||||
|
||||
return v39
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: subq $$208, %rsp
|
||||
; nextln: movdqu %xmm6, 0(%rsp)
|
||||
; nextln: movdqu %xmm7, 16(%rsp)
|
||||
; nextln: movdqu %xmm8, 32(%rsp)
|
||||
; nextln: movdqu %xmm9, 48(%rsp)
|
||||
; nextln: movdqu %xmm10, 64(%rsp)
|
||||
; nextln: movdqu %xmm11, 80(%rsp)
|
||||
; nextln: movdqu %xmm12, 96(%rsp)
|
||||
; nextln: movdqu %xmm13, 112(%rsp)
|
||||
; nextln: movdqu %xmm14, 128(%rsp)
|
||||
; nextln: movdqu %xmm15, 144(%rsp)
|
||||
; nextln: virtual_sp_offset_adjust 160
|
||||
; nextln: movsd 0(%rcx), %xmm0
|
||||
; nextln: movsd %xmm0, rsp(16 + virtual offset)
|
||||
; nextln: movsd 8(%rcx), %xmm1
|
||||
; nextln: movsd 16(%rcx), %xmm0
|
||||
; nextln: movsd %xmm0, rsp(24 + virtual offset)
|
||||
; nextln: movsd 24(%rcx), %xmm3
|
||||
; nextln: movsd 32(%rcx), %xmm0
|
||||
; nextln: movsd %xmm0, rsp(32 + virtual offset)
|
||||
; nextln: movsd 40(%rcx), %xmm5
|
||||
; nextln: movsd 48(%rcx), %xmm6
|
||||
; nextln: movsd 56(%rcx), %xmm7
|
||||
; nextln: movsd 64(%rcx), %xmm8
|
||||
; nextln: movsd 72(%rcx), %xmm9
|
||||
; nextln: movsd 80(%rcx), %xmm10
|
||||
; nextln: movsd 88(%rcx), %xmm11
|
||||
; nextln: movsd 96(%rcx), %xmm12
|
||||
; nextln: movsd 104(%rcx), %xmm13
|
||||
; nextln: movsd 112(%rcx), %xmm14
|
||||
; nextln: movsd 120(%rcx), %xmm15
|
||||
; nextln: movsd 128(%rcx), %xmm0
|
||||
; nextln: movsd %xmm0, rsp(0 + virtual offset)
|
||||
; nextln: movsd 136(%rcx), %xmm0
|
||||
; nextln: movsd 144(%rcx), %xmm2
|
||||
; nextln: movsd %xmm2, rsp(8 + virtual offset)
|
||||
; nextln: movsd 152(%rcx), %xmm2
|
||||
; nextln: nop len=0
|
||||
; nextln: movsd rsp(16 + virtual offset), %xmm4
|
||||
; nextln: addsd %xmm1, %xmm4
|
||||
; nextln: movsd %xmm4, rsp(16 + virtual offset)
|
||||
; nextln: movsd rsp(24 + virtual offset), %xmm1
|
||||
; nextln: addsd %xmm3, %xmm1
|
||||
; nextln: movsd rsp(32 + virtual offset), %xmm4
|
||||
; nextln: addsd %xmm5, %xmm4
|
||||
; nextln: addsd %xmm7, %xmm6
|
||||
; nextln: addsd %xmm9, %xmm8
|
||||
; nextln: addsd %xmm11, %xmm10
|
||||
; nextln: addsd %xmm13, %xmm12
|
||||
; nextln: addsd %xmm15, %xmm14
|
||||
; nextln: movsd rsp(0 + virtual offset), %xmm3
|
||||
; nextln: addsd %xmm0, %xmm3
|
||||
; nextln: movsd rsp(8 + virtual offset), %xmm0
|
||||
; nextln: addsd %xmm2, %xmm0
|
||||
; nextln: movsd rsp(16 + virtual offset), %xmm2
|
||||
; nextln: addsd %xmm1, %xmm2
|
||||
; nextln: addsd %xmm6, %xmm4
|
||||
; nextln: addsd %xmm10, %xmm8
|
||||
; nextln: addsd %xmm14, %xmm12
|
||||
; nextln: addsd %xmm0, %xmm3
|
||||
; nextln: addsd %xmm4, %xmm2
|
||||
; nextln: addsd %xmm12, %xmm8
|
||||
; nextln: addsd %xmm8, %xmm2
|
||||
; nextln: addsd %xmm3, %xmm2
|
||||
; nextln: movaps %xmm2, %xmm0
|
||||
; nextln: movdqu 0(%rsp), %xmm6
|
||||
; nextln: movdqu 16(%rsp), %xmm7
|
||||
; nextln: movdqu 32(%rsp), %xmm8
|
||||
; nextln: movdqu 48(%rsp), %xmm9
|
||||
; nextln: movdqu 64(%rsp), %xmm10
|
||||
; nextln: movdqu 80(%rsp), %xmm11
|
||||
; nextln: movdqu 96(%rsp), %xmm12
|
||||
; nextln: movdqu 112(%rsp), %xmm13
|
||||
; nextln: movdqu 128(%rsp), %xmm14
|
||||
; nextln: movdqu 144(%rsp), %xmm15
|
||||
; nextln: addq $$160, %rsp
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
test compile
|
||||
set enable_llvm_abi_extensions=true
|
||||
target x86_64
|
||||
feature "experimental_x64"
|
||||
|
||||
@@ -738,17 +739,17 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128):
|
||||
v11 = iadd.i128 v9, v10
|
||||
return v11
|
||||
|
||||
; check: movq %rsp, %rbp
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: subq $$16, %rsp
|
||||
; nextln: movq %r12, 0(%rsp)
|
||||
; nextln: movq %r13, 8(%rsp)
|
||||
; nextln: virtual_sp_offset_adjust 16
|
||||
; nextln: movq 16(%rbp), %r9
|
||||
; nextln: movq 24(%rbp), %r10
|
||||
; nextln: movq 32(%rbp), %r12
|
||||
; nextln: movq 40(%rbp), %r11
|
||||
; nextln: movq 48(%rbp), %rax
|
||||
; nextln: movq 56(%rbp), %r13
|
||||
; nextln: movq 16(%rbp), %r10
|
||||
; nextln: movq 24(%rbp), %r12
|
||||
; nextln: movq 32(%rbp), %r11
|
||||
; nextln: movq 40(%rbp), %rax
|
||||
; nextln: movq 48(%rbp), %r13
|
||||
; nextln: addq %rdx, %rdi
|
||||
; nextln: adcq %rcx, %rsi
|
||||
; nextln: xorq %rcx, %rcx
|
||||
@@ -786,10 +787,10 @@ block0(v0: i128):
|
||||
; nextln: movq %r10, 16(%rsi)
|
||||
; nextln: movq %r11, 24(%rsi)
|
||||
; nextln: movq %r12, 32(%rsi)
|
||||
; nextln: movq %r13, 48(%rsi)
|
||||
; nextln: movq %r14, 56(%rsi)
|
||||
; nextln: movq %rdi, 64(%rsi)
|
||||
; nextln: movq %rbx, 72(%rsi)
|
||||
; nextln: movq %r13, 40(%rsi)
|
||||
; nextln: movq %r14, 48(%rsi)
|
||||
; nextln: movq %rdi, 56(%rsi)
|
||||
; nextln: movq %rbx, 64(%rsi)
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
test compile
|
||||
set enable_llvm_abi_extensions=true
|
||||
target x86_64
|
||||
feature "experimental_x64"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user