* ABI: implement register arguments with constraints. Currently, Cranelift's ABI code emits a sequence of moves from physical registers into vregs at the top of the function body, one for every register-carried argument. For a number of reasons, we want to move to operand constraints instead, and remove the use of explicitly-named "pinned vregs"; this allows for better regalloc in theory, as it removes the need to "reverse-engineer" the sequence of moves. This PR alters the ABI code so that it generates a single "args" pseudo-instruction as the first instruction in the function body. This pseudo-inst defs all register arguments, and constrains them to the appropriate registers at the def-point. Subsequently the regalloc can move them wherever it needs to. Some care was taken not to have this pseudo-inst show up in post-regalloc disassemblies, but the change did cause a general regalloc "shift" in many tests, so the precise-output updates are a bit noisy. Sorry about that! A subsequent PR will handle the other half of the ABI code, namely, the callsite case, with a similar preg-to-constraint conversion. * Update based on review feedback. * Review feedback.
496 lines
7.1 KiB
Plaintext
496 lines
7.1 KiB
Plaintext
test compile precise-output
|
|
set unwind_info=false
|
|
target aarch64
|
|
|
|
function %f1(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = iadd.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; add x0, x0, x1
|
|
; ret
|
|
|
|
function %f2(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = isub.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sub x0, x0, x1
|
|
; ret
|
|
|
|
function %f3(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = imul.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; madd x0, x0, x1, xzr
|
|
; ret
|
|
|
|
function %f4(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = umulhi.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; umulh x0, x0, x1
|
|
; ret
|
|
|
|
function %f5(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = smulhi.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; smulh x0, x0, x1
|
|
; ret
|
|
|
|
function %f6(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = sdiv.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; cbnz x1, 8 ; udf
|
|
; adds xzr, x1, #1
|
|
; ccmp x0, #1, #nzcv, eq
|
|
; b.vc 8 ; udf
|
|
; sdiv x0, x0, x1
|
|
; ret
|
|
|
|
function %f7(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = iconst.i64 2
|
|
v2 = sdiv.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; movz w3, #2
|
|
; sdiv x0, x0, x3
|
|
; ret
|
|
|
|
function %f8(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = udiv.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; cbnz x1, 8 ; udf
|
|
; udiv x0, x0, x1
|
|
; ret
|
|
|
|
function %f9(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = iconst.i64 2
|
|
v2 = udiv.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; orr x3, xzr, #2
|
|
; udiv x0, x0, x3
|
|
; ret
|
|
|
|
function %f10(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = srem.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; cbnz x1, 8 ; udf
|
|
; sdiv x5, x0, x1
|
|
; msub x0, x5, x1, x0
|
|
; ret
|
|
|
|
function %f11(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = urem.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; cbnz x1, 8 ; udf
|
|
; udiv x5, x0, x1
|
|
; msub x0, x5, x1, x0
|
|
; ret
|
|
|
|
function %f12(i32, i32) -> i32 {
|
|
block0(v0: i32, v1: i32):
|
|
v2 = sdiv.i32 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sxtw x4, w0
|
|
; sxtw x6, w1
|
|
; cbnz x6, 8 ; udf
|
|
; adds wzr, w6, #1
|
|
; ccmp w4, #1, #nzcv, eq
|
|
; b.vc 8 ; udf
|
|
; sdiv x0, x4, x6
|
|
; ret
|
|
|
|
function %f13(i32) -> i32 {
|
|
block0(v0: i32):
|
|
v1 = iconst.i32 2
|
|
v2 = sdiv.i32 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sxtw x3, w0
|
|
; movz w5, #2
|
|
; sdiv x0, x3, x5
|
|
; ret
|
|
|
|
function %f14(i32, i32) -> i32 {
|
|
block0(v0: i32, v1: i32):
|
|
v2 = udiv.i32 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; mov w4, w0
|
|
; mov w6, w1
|
|
; cbnz x6, 8 ; udf
|
|
; udiv x0, x4, x6
|
|
; ret
|
|
|
|
function %f15(i32) -> i32 {
|
|
block0(v0: i32):
|
|
v1 = iconst.i32 2
|
|
v2 = udiv.i32 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; mov w3, w0
|
|
; orr w5, wzr, #2
|
|
; udiv x0, x3, x5
|
|
; ret
|
|
|
|
function %f16(i32, i32) -> i32 {
|
|
block0(v0: i32, v1: i32):
|
|
v2 = srem.i32 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sxtw x4, w0
|
|
; sxtw x6, w1
|
|
; cbnz x6, 8 ; udf
|
|
; sdiv x9, x4, x6
|
|
; msub x0, x9, x6, x4
|
|
; ret
|
|
|
|
function %f17(i32, i32) -> i32 {
|
|
block0(v0: i32, v1: i32):
|
|
v2 = urem.i32 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; mov w4, w0
|
|
; mov w6, w1
|
|
; cbnz x6, 8 ; udf
|
|
; udiv x9, x4, x6
|
|
; msub x0, x9, x6, x4
|
|
; ret
|
|
|
|
function %f18(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = band.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; and x0, x0, x1
|
|
; ret
|
|
|
|
function %f19(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = bor.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; orr x0, x0, x1
|
|
; ret
|
|
|
|
function %f20(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = bxor.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; eor x0, x0, x1
|
|
; ret
|
|
|
|
function %f21(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = band_not.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; bic x0, x0, x1
|
|
; ret
|
|
|
|
function %f22(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = bor_not.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; orn x0, x0, x1
|
|
; ret
|
|
|
|
function %f23(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = bxor_not.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; eon x0, x0, x1
|
|
; ret
|
|
|
|
function %f24(i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64):
|
|
v2 = bnot.i64 v0
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; orn x0, xzr, x0
|
|
; ret
|
|
|
|
function %f25(i32, i32) -> i32 {
|
|
block0(v0: i32, v1: i32):
|
|
v2 = iconst.i32 53
|
|
v3 = ishl.i32 v0, v2
|
|
v4 = isub.i32 v1, v3
|
|
return v4
|
|
}
|
|
|
|
; block0:
|
|
; sub w0, w1, w0, LSL 21
|
|
; ret
|
|
|
|
function %f26(i32) -> i32 {
|
|
block0(v0: i32):
|
|
v1 = iconst.i32 -1
|
|
v2 = iadd.i32 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sub w0, w0, #1
|
|
; ret
|
|
|
|
function %f27(i32) -> i32 {
|
|
block0(v0: i32):
|
|
v1 = iconst.i32 -1
|
|
v2 = isub.i32 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; add w0, w0, #1
|
|
; ret
|
|
|
|
function %f28(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = iconst.i64 -1
|
|
v2 = isub.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; add x0, x0, #1
|
|
; ret
|
|
|
|
function %f29(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = iconst.i64 1
|
|
v2 = ineg v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; movz x3, #1
|
|
; sub x0, xzr, x3
|
|
; ret
|
|
|
|
function %f30(i8x16) -> i8x16 {
|
|
block0(v0: i8x16):
|
|
v1 = iconst.i64 1
|
|
v2 = ushr.i8x16 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; movz x3, #1
|
|
; and w5, w3, #7
|
|
; sub x7, xzr, x5
|
|
; dup v17.16b, w7
|
|
; ushl v0.16b, v0.16b, v17.16b
|
|
; ret
|
|
|
|
function %add_i128(i128, i128) -> i128 {
|
|
block0(v0: i128, v1: i128):
|
|
v2 = iadd v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; adds x0, x0, x2
|
|
; adc x1, x1, x3
|
|
; ret
|
|
|
|
function %sub_i128(i128, i128) -> i128 {
|
|
block0(v0: i128, v1: i128):
|
|
v2 = isub v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; subs x0, x0, x2
|
|
; sbc x1, x1, x3
|
|
; ret
|
|
|
|
function %mul_i128(i128, i128) -> i128 {
|
|
block0(v0: i128, v1: i128):
|
|
v2 = imul v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; umulh x7, x0, x2
|
|
; madd x9, x0, x3, x7
|
|
; madd x1, x1, x2, x9
|
|
; madd x0, x0, x2, xzr
|
|
; ret
|
|
|
|
function %add_mul_1(i32, i32, i32) -> i32 {
|
|
block0(v0: i32, v1: i32, v2: i32):
|
|
v3 = imul v1, v2
|
|
v4 = iadd v0, v3
|
|
return v4
|
|
}
|
|
|
|
; block0:
|
|
; madd w0, w1, w2, w0
|
|
; ret
|
|
|
|
function %add_mul_2(i32, i32, i32) -> i32 {
|
|
block0(v0: i32, v1: i32, v2: i32):
|
|
v3 = imul v1, v2
|
|
v4 = iadd v3, v0
|
|
return v4
|
|
}
|
|
|
|
; block0:
|
|
; madd w0, w1, w2, w0
|
|
; ret
|
|
|
|
function %msub_i32(i32, i32, i32) -> i32 {
|
|
block0(v0: i32, v1: i32, v2: i32):
|
|
v3 = imul v1, v2
|
|
v4 = isub v0, v3
|
|
return v4
|
|
}
|
|
|
|
; block0:
|
|
; msub w0, w1, w2, w0
|
|
; ret
|
|
|
|
function %msub_i64(i64, i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64, v2: i64):
|
|
v3 = imul v1, v2
|
|
v4 = isub v0, v3
|
|
return v4
|
|
}
|
|
|
|
; block0:
|
|
; msub x0, x1, x2, x0
|
|
; ret
|
|
|
|
function %imul_sub_i32(i32, i32, i32) -> i32 {
|
|
block0(v0: i32, v1: i32, v2: i32):
|
|
v3 = imul v1, v2
|
|
v4 = isub v3, v0
|
|
return v4
|
|
}
|
|
|
|
; block0:
|
|
; madd w6, w1, w2, wzr
|
|
; sub w0, w6, w0
|
|
; ret
|
|
|
|
function %imul_sub_i64(i64, i64, i64) -> i64 {
|
|
block0(v0: i64, v1: i64, v2: i64):
|
|
v3 = imul v1, v2
|
|
v4 = isub v3, v0
|
|
return v4
|
|
}
|
|
|
|
; block0:
|
|
; madd x6, x1, x2, xzr
|
|
; sub x0, x6, x0
|
|
; ret
|
|
|
|
function %srem_const (i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = iconst.i64 2
|
|
v2 = srem.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; movz w3, #2
|
|
; sdiv x5, x0, x3
|
|
; msub x0, x5, x3, x0
|
|
; ret
|
|
|
|
function %urem_const (i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = iconst.i64 2
|
|
v2 = urem.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; orr x3, xzr, #2
|
|
; udiv x5, x0, x3
|
|
; msub x0, x5, x3, x0
|
|
; ret
|
|
|
|
function %sdiv_minus_one(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = iconst.i64 -1
|
|
v2 = sdiv.i64 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; movn x3, #0
|
|
; adds xzr, x3, #1
|
|
; ccmp x0, #1, #nzcv, eq
|
|
; b.vc 8 ; udf
|
|
; sdiv x0, x0, x3
|
|
; ret
|