Files
wasmtime/cranelift/filetests/filetests/isa/x64/load-op.clif
Chris Fallin 2986f6b0ff ABI: implement register arguments with constraints. (#4858)
* ABI: implement register arguments with constraints.

Currently, Cranelift's ABI code emits a sequence of moves from physical
registers into vregs at the top of the function body, one for every
register-carried argument.

For a number of reasons, we want to move to operand constraints instead,
and remove the use of explicitly-named "pinned vregs"; this allows for
better regalloc in theory, as it removes the need to "reverse-engineer"
the sequence of moves.

This PR alters the ABI code so that it generates a single "args"
pseudo-instruction as the first instruction in the function body. This
pseudo-inst defs all register arguments, and constrains them to the
appropriate registers at the def-point. Subsequently the regalloc can
move them wherever it needs to.

Some care was taken not to have this pseudo-inst show up in
post-regalloc disassemblies, but the change did cause a general regalloc
"shift" in many tests, so the precise-output updates are a bit noisy.
Sorry about that!

A subsequent PR will handle the other half of the ABI code, namely, the
callsite case, with a similar preg-to-constraint conversion.

* Update based on review feedback.

* Review feedback.
2022-09-08 18:03:14 -07:00

146 lines
2.6 KiB
Plaintext

test compile precise-output
target x86_64
function %add_from_mem_u32_1(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = load.i32 v0
v3 = iadd.i32 v2, v1
return v3
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; addl %esi, 0(%rdi), %esi
; movq %rsi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %add_from_mem_u32_2(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = load.i32 v0
v3 = iadd.i32 v1, v2
return v3
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; addl %esi, 0(%rdi), %esi
; movq %rsi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %add_from_mem_u64_1(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v0
v3 = iadd.i64 v2, v1
return v3
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; addq %rsi, 0(%rdi), %rsi
; movq %rsi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %add_from_mem_u64_2(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v0
v3 = iadd.i64 v1, v2
return v3
}
;; test narrow loads: 8-bit load should not merge because the `addl` is 32 bits
;; and would load 32 bits from memory, which may go beyond the end of the heap.
; pushq %rbp
; movq %rsp, %rbp
; block0:
; addq %rsi, 0(%rdi), %rsi
; movq %rsi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %add_from_mem_not_narrow(i64, i8) -> i8 {
block0(v0: i64, v1: i8):
v2 = load.i8 v0
v3 = iadd.i8 v2, v1
return v3
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzbq 0(%rdi), %rax
; addl %eax, %esi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %no_merge_if_lookback_use(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v0
v3 = iadd.i64 v2, v0
store.i64 v3, v1
v4 = load.i64 v3
return v4
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq 0(%rdi), %r9
; movq %r9, %r10
; addq %r10, %rdi, %r10
; movq %r10, 0(%rsi)
; movq 0(%r9,%rdi,1), %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %merge_scalar_to_vector(i64) -> i32x4 {
block0(v0: i64):
v1 = load.i32 v0
v2 = scalar_to_vector.i32x4 v1
jump block1
block1:
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movss 0(%rdi), %xmm0
; jmp label1
; block1:
; movq %rbp, %rsp
; popq %rbp
; ret
function %cmp_mem(i64) -> i64 {
block0(v0: i64):
v1 = load.i64 v0
v2 = icmp eq v0, v1
v3 = bint.i64 v2
return v3
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cmpq 0(%rdi), %rdi
; setz %al
; andq %rax, $1, %rax
; movq %rbp, %rsp
; popq %rbp
; ret