* ABI: implement register arguments with constraints. Currently, Cranelift's ABI code emits a sequence of moves from physical registers into vregs at the top of the function body, one for every register-carried argument. For a number of reasons, we want to move to operand constraints instead, and remove the use of explicitly-named "pinned vregs"; this allows for better regalloc in theory, as it removes the need to "reverse-engineer" the sequence of moves. This PR alters the ABI code so that it generates a single "args" pseudo-instruction as the first instruction in the function body. This pseudo-inst defs all register arguments, and constrains them to the appropriate registers at the def-point. Subsequently the regalloc can move them wherever it needs to. Some care was taken not to have this pseudo-inst show up in post-regalloc disassemblies, but the change did cause a general regalloc "shift" in many tests, so the precise-output updates are a bit noisy. Sorry about that! A subsequent PR will handle the other half of the ABI code, namely, the callsite case, with a similar preg-to-constraint conversion. * Update based on review feedback. * Review feedback.
146 lines
2.6 KiB
Plaintext
146 lines
2.6 KiB
Plaintext
test compile precise-output
|
|
target x86_64
|
|
|
|
function %add_from_mem_u32_1(i64, i32) -> i32 {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = load.i32 v0
|
|
v3 = iadd.i32 v2, v1
|
|
return v3
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; addl %esi, 0(%rdi), %esi
|
|
; movq %rsi, %rax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %add_from_mem_u32_2(i64, i32) -> i32 {
|
|
block0(v0: i64, v1: i32):
|
|
v2 = load.i32 v0
|
|
v3 = iadd.i32 v1, v2
|
|
return v3
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; addl %esi, 0(%rdi), %esi
|
|
; movq %rsi, %rax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %add_from_mem_u64_1(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = load.i64 v0
|
|
v3 = iadd.i64 v2, v1
|
|
return v3
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; addq %rsi, 0(%rdi), %rsi
|
|
; movq %rsi, %rax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %add_from_mem_u64_2(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = load.i64 v0
|
|
v3 = iadd.i64 v1, v2
|
|
return v3
|
|
}
|
|
|
|
;; test narrow loads: 8-bit load should not merge because the `addl` is 32 bits
|
|
;; and would load 32 bits from memory, which may go beyond the end of the heap.
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; addq %rsi, 0(%rdi), %rsi
|
|
; movq %rsi, %rax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %add_from_mem_not_narrow(i64, i8) -> i8 {
|
|
block0(v0: i64, v1: i8):
|
|
v2 = load.i8 v0
|
|
v3 = iadd.i8 v2, v1
|
|
return v3
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; movzbq 0(%rdi), %rax
|
|
; addl %eax, %esi, %eax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %no_merge_if_lookback_use(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = load.i64 v0
|
|
v3 = iadd.i64 v2, v0
|
|
store.i64 v3, v1
|
|
v4 = load.i64 v3
|
|
return v4
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; movq 0(%rdi), %r9
|
|
; movq %r9, %r10
|
|
; addq %r10, %rdi, %r10
|
|
; movq %r10, 0(%rsi)
|
|
; movq 0(%r9,%rdi,1), %rax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %merge_scalar_to_vector(i64) -> i32x4 {
|
|
block0(v0: i64):
|
|
v1 = load.i32 v0
|
|
v2 = scalar_to_vector.i32x4 v1
|
|
|
|
jump block1
|
|
block1:
|
|
return v2
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; movss 0(%rdi), %xmm0
|
|
; jmp label1
|
|
; block1:
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %cmp_mem(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = load.i64 v0
|
|
v2 = icmp eq v0, v1
|
|
v3 = bint.i64 v2
|
|
return v3
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; cmpq 0(%rdi), %rdi
|
|
; setz %al
|
|
; andq %rax, $1, %rax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|