* ABI: implement register arguments with constraints. Currently, Cranelift's ABI code emits a sequence of moves from physical registers into vregs at the top of the function body, one for every register-carried argument. For a number of reasons, we want to move to operand constraints instead, and remove the use of explicitly-named "pinned vregs"; this allows for better regalloc in theory, as it removes the need to "reverse-engineer" the sequence of moves. This PR alters the ABI code so that it generates a single "args" pseudo-instruction as the first instruction in the function body. This pseudo-inst defs all register arguments, and constrains them to the appropriate registers at the def-point. Subsequently the regalloc can move them wherever it needs to. Some care was taken not to have this pseudo-inst show up in post-regalloc disassemblies, but the change did cause a general regalloc "shift" in many tests, so the precise-output updates are a bit noisy. Sorry about that! A subsequent PR will handle the other half of the ABI code, namely, the callsite case, with a similar preg-to-constraint conversion. * Update based on review feedback. * Review feedback.
82 lines
1.5 KiB
Plaintext
82 lines
1.5 KiB
Plaintext
test compile precise-output
|
|
set unwind_info=false
|
|
target aarch64
|
|
|
|
function %average_rounding_i8x8(i8x8, i8x8) -> i8x8 {
|
|
block0(v0: i8x8, v1: i8x8):
|
|
v2 = avg_round v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; urhadd v0.8b, v0.8b, v1.8b
|
|
; ret
|
|
|
|
function %average_rounding_i8x16(i8x16, i8x16) -> i8x16 {
|
|
block0(v0: i8x16, v1: i8x16):
|
|
v2 = avg_round v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; urhadd v0.16b, v0.16b, v1.16b
|
|
; ret
|
|
|
|
function %average_rounding_i16x4(i16x4, i16x4) -> i16x4 {
|
|
block0(v0: i16x4, v1: i16x4):
|
|
v2 = avg_round v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; urhadd v0.4h, v0.4h, v1.4h
|
|
; ret
|
|
|
|
function %average_rounding_i16x8(i16x8, i16x8) -> i16x8 {
|
|
block0(v0: i16x8, v1: i16x8):
|
|
v2 = avg_round v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; urhadd v0.8h, v0.8h, v1.8h
|
|
; ret
|
|
|
|
function %average_rounding_i32x2(i32x2, i32x2) -> i32x2 {
|
|
block0(v0: i32x2, v1: i32x2):
|
|
v2 = avg_round v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; urhadd v0.2s, v0.2s, v1.2s
|
|
; ret
|
|
|
|
function %average_rounding_i32x4(i32x4, i32x4) -> i32x4 {
|
|
block0(v0: i32x4, v1: i32x4):
|
|
v2 = avg_round v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; urhadd v0.4s, v0.4s, v1.4s
|
|
; ret
|
|
|
|
function %average_rounding_i64x2(i64x2, i64x2) -> i64x2 {
|
|
block0(v0: i64x2, v1: i64x2):
|
|
v2 = avg_round v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; movz x5, #1
|
|
; dup v5.2d, x5
|
|
; orr v16.16b, v0.16b, v1.16b
|
|
; and v18.16b, v16.16b, v5.16b
|
|
; ushr v20.2d, v0.2d, #1
|
|
; ushr v22.2d, v1.2d, #1
|
|
; add v24.2d, v20.2d, v22.2d
|
|
; add v0.2d, v18.2d, v24.2d
|
|
; ret
|
|
|