Files
wasmtime/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
Chris Fallin 2986f6b0ff ABI: implement register arguments with constraints. (#4858)
* ABI: implement register arguments with constraints.

Currently, Cranelift's ABI code emits a sequence of moves from physical
registers into vregs at the top of the function body, one for every
register-carried argument.

For a number of reasons, we want to move to operand constraints instead,
and remove the use of explicitly-named "pinned vregs"; this allows for
better regalloc in theory, as it removes the need to "reverse-engineer"
the sequence of moves.

This PR alters the ABI code so that it generates a single "args"
pseudo-instruction as the first instruction in the function body. This
pseudo-inst defs all register arguments, and constrains them to the
appropriate registers at the def-point. Subsequently the regalloc can
move them wherever it needs to.

Some care was taken not to have this pseudo-inst show up in
post-regalloc disassemblies, but the change did cause a general regalloc
"shift" in many tests, so the precise-output updates are a bit noisy.
Sorry about that!

A subsequent PR will handle the other half of the ABI code, namely, the
callsite case, with a similar preg-to-constraint conversion.

* Update based on review feedback.

* Review feedback.
2022-09-08 18:03:14 -07:00

496 lines
7.1 KiB
Plaintext

test compile precise-output
set unwind_info=false
target aarch64
function %f1(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iadd.i64 v0, v1
return v2
}
; block0:
; add x0, x0, x1
; ret
function %f2(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = isub.i64 v0, v1
return v2
}
; block0:
; sub x0, x0, x1
; ret
function %f3(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = imul.i64 v0, v1
return v2
}
; block0:
; madd x0, x0, x1, xzr
; ret
function %f4(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = umulhi.i64 v0, v1
return v2
}
; block0:
; umulh x0, x0, x1
; ret
function %f5(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = smulhi.i64 v0, v1
return v2
}
; block0:
; smulh x0, x0, x1
; ret
function %f6(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = sdiv.i64 v0, v1
return v2
}
; block0:
; cbnz x1, 8 ; udf
; adds xzr, x1, #1
; ccmp x0, #1, #nzcv, eq
; b.vc 8 ; udf
; sdiv x0, x0, x1
; ret
function %f7(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 2
v2 = sdiv.i64 v0, v1
return v2
}
; block0:
; movz w3, #2
; sdiv x0, x0, x3
; ret
function %f8(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = udiv.i64 v0, v1
return v2
}
; block0:
; cbnz x1, 8 ; udf
; udiv x0, x0, x1
; ret
function %f9(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 2
v2 = udiv.i64 v0, v1
return v2
}
; block0:
; orr x3, xzr, #2
; udiv x0, x0, x3
; ret
function %f10(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = srem.i64 v0, v1
return v2
}
; block0:
; cbnz x1, 8 ; udf
; sdiv x5, x0, x1
; msub x0, x5, x1, x0
; ret
function %f11(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = urem.i64 v0, v1
return v2
}
; block0:
; cbnz x1, 8 ; udf
; udiv x5, x0, x1
; msub x0, x5, x1, x0
; ret
function %f12(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = sdiv.i32 v0, v1
return v2
}
; block0:
; sxtw x4, w0
; sxtw x6, w1
; cbnz x6, 8 ; udf
; adds wzr, w6, #1
; ccmp w4, #1, #nzcv, eq
; b.vc 8 ; udf
; sdiv x0, x4, x6
; ret
function %f13(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 2
v2 = sdiv.i32 v0, v1
return v2
}
; block0:
; sxtw x3, w0
; movz w5, #2
; sdiv x0, x3, x5
; ret
function %f14(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = udiv.i32 v0, v1
return v2
}
; block0:
; mov w4, w0
; mov w6, w1
; cbnz x6, 8 ; udf
; udiv x0, x4, x6
; ret
function %f15(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 2
v2 = udiv.i32 v0, v1
return v2
}
; block0:
; mov w3, w0
; orr w5, wzr, #2
; udiv x0, x3, x5
; ret
function %f16(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = srem.i32 v0, v1
return v2
}
; block0:
; sxtw x4, w0
; sxtw x6, w1
; cbnz x6, 8 ; udf
; sdiv x9, x4, x6
; msub x0, x9, x6, x4
; ret
function %f17(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = urem.i32 v0, v1
return v2
}
; block0:
; mov w4, w0
; mov w6, w1
; cbnz x6, 8 ; udf
; udiv x9, x4, x6
; msub x0, x9, x6, x4
; ret
function %f18(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = band.i64 v0, v1
return v2
}
; block0:
; and x0, x0, x1
; ret
function %f19(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bor.i64 v0, v1
return v2
}
; block0:
; orr x0, x0, x1
; ret
function %f20(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bxor.i64 v0, v1
return v2
}
; block0:
; eor x0, x0, x1
; ret
function %f21(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = band_not.i64 v0, v1
return v2
}
; block0:
; bic x0, x0, x1
; ret
function %f22(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bor_not.i64 v0, v1
return v2
}
; block0:
; orn x0, x0, x1
; ret
function %f23(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bxor_not.i64 v0, v1
return v2
}
; block0:
; eon x0, x0, x1
; ret
function %f24(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bnot.i64 v0
return v2
}
; block0:
; orn x0, xzr, x0
; ret
function %f25(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = iconst.i32 53
v3 = ishl.i32 v0, v2
v4 = isub.i32 v1, v3
return v4
}
; block0:
; sub w0, w1, w0, LSL 21
; ret
function %f26(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 -1
v2 = iadd.i32 v0, v1
return v2
}
; block0:
; sub w0, w0, #1
; ret
function %f27(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 -1
v2 = isub.i32 v0, v1
return v2
}
; block0:
; add w0, w0, #1
; ret
function %f28(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 -1
v2 = isub.i64 v0, v1
return v2
}
; block0:
; add x0, x0, #1
; ret
function %f29(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 1
v2 = ineg v1
return v2
}
; block0:
; movz x3, #1
; sub x0, xzr, x3
; ret
function %f30(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i64 1
v2 = ushr.i8x16 v0, v1
return v2
}
; block0:
; movz x3, #1
; and w5, w3, #7
; sub x7, xzr, x5
; dup v17.16b, w7
; ushl v0.16b, v0.16b, v17.16b
; ret
function %add_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = iadd v0, v1
return v2
}
; block0:
; adds x0, x0, x2
; adc x1, x1, x3
; ret
function %sub_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = isub v0, v1
return v2
}
; block0:
; subs x0, x0, x2
; sbc x1, x1, x3
; ret
function %mul_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = imul v0, v1
return v2
}
; block0:
; umulh x7, x0, x2
; madd x9, x0, x3, x7
; madd x1, x1, x2, x9
; madd x0, x0, x2, xzr
; ret
function %add_mul_1(i32, i32, i32) -> i32 {
block0(v0: i32, v1: i32, v2: i32):
v3 = imul v1, v2
v4 = iadd v0, v3
return v4
}
; block0:
; madd w0, w1, w2, w0
; ret
function %add_mul_2(i32, i32, i32) -> i32 {
block0(v0: i32, v1: i32, v2: i32):
v3 = imul v1, v2
v4 = iadd v3, v0
return v4
}
; block0:
; madd w0, w1, w2, w0
; ret
function %msub_i32(i32, i32, i32) -> i32 {
block0(v0: i32, v1: i32, v2: i32):
v3 = imul v1, v2
v4 = isub v0, v3
return v4
}
; block0:
; msub w0, w1, w2, w0
; ret
function %msub_i64(i64, i64, i64) -> i64 {
block0(v0: i64, v1: i64, v2: i64):
v3 = imul v1, v2
v4 = isub v0, v3
return v4
}
; block0:
; msub x0, x1, x2, x0
; ret
function %imul_sub_i32(i32, i32, i32) -> i32 {
block0(v0: i32, v1: i32, v2: i32):
v3 = imul v1, v2
v4 = isub v3, v0
return v4
}
; block0:
; madd w6, w1, w2, wzr
; sub w0, w6, w0
; ret
function %imul_sub_i64(i64, i64, i64) -> i64 {
block0(v0: i64, v1: i64, v2: i64):
v3 = imul v1, v2
v4 = isub v3, v0
return v4
}
; block0:
; madd x6, x1, x2, xzr
; sub x0, x6, x0
; ret
function %srem_const (i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 2
v2 = srem.i64 v0, v1
return v2
}
; block0:
; movz w3, #2
; sdiv x5, x0, x3
; msub x0, x5, x3, x0
; ret
function %urem_const (i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 2
v2 = urem.i64 v0, v1
return v2
}
; block0:
; orr x3, xzr, #2
; udiv x5, x0, x3
; msub x0, x5, x3, x0
; ret
function %sdiv_minus_one(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 -1
v2 = sdiv.i64 v0, v1
return v2
}
; block0:
; movn x3, #0
; adds xzr, x3, #1
; ccmp x0, #1, #nzcv, eq
; b.vc 8 ; udf
; sdiv x0, x0, x3
; ret