Files
wasmtime/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
Chris Fallin 2986f6b0ff ABI: implement register arguments with constraints. (#4858)
* ABI: implement register arguments with constraints.

Currently, Cranelift's ABI code emits a sequence of moves from physical
registers into vregs at the top of the function body, one for every
register-carried argument.

For a number of reasons, we want to move to operand constraints instead,
and remove the use of explicitly-named "pinned vregs"; this allows for
better regalloc in theory, as it removes the need to "reverse-engineer"
the sequence of moves.

This PR alters the ABI code so that it generates a single "args"
pseudo-instruction as the first instruction in the function body. This
pseudo-inst defs all register arguments, and constrains them to the
appropriate registers at the def-point. Subsequently the regalloc can
move them wherever it needs to.

Some care was taken not to have this pseudo-inst show up in
post-regalloc disassemblies, but the change did cause a general regalloc
"shift" in many tests, so the precise-output updates are a bit noisy.
Sorry about that!

A subsequent PR will handle the other half of the ABI code, namely, the
callsite case, with a similar preg-to-constraint conversion.

* Update based on review feedback.

* Review feedback.
2022-09-08 18:03:14 -07:00

980 lines
13 KiB
Plaintext

test compile precise-output
set unwind_info=false
target aarch64
function %f1(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fadd v0, v1
return v2
}
; block0:
; fadd s0, s0, s1
; ret
function %f2(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fadd v0, v1
return v2
}
; block0:
; fadd d0, d0, d1
; ret
function %f3(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fsub v0, v1
return v2
}
; block0:
; fsub s0, s0, s1
; ret
function %f4(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fsub v0, v1
return v2
}
; block0:
; fsub d0, d0, d1
; ret
function %f5(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fmul v0, v1
return v2
}
; block0:
; fmul s0, s0, s1
; ret
function %f6(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fmul v0, v1
return v2
}
; block0:
; fmul d0, d0, d1
; ret
function %f7(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fdiv v0, v1
return v2
}
; block0:
; fdiv s0, s0, s1
; ret
function %f8(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fdiv v0, v1
return v2
}
; block0:
; fdiv d0, d0, d1
; ret
function %f9(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fmin v0, v1
return v2
}
; block0:
; fmin s0, s0, s1
; ret
function %f10(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fmin v0, v1
return v2
}
; block0:
; fmin d0, d0, d1
; ret
function %f11(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fmax v0, v1
return v2
}
; block0:
; fmax s0, s0, s1
; ret
function %f12(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fmax v0, v1
return v2
}
; block0:
; fmax d0, d0, d1
; ret
function %f13(f32) -> f32 {
block0(v0: f32):
v1 = sqrt v0
return v1
}
; block0:
; fsqrt s0, s0
; ret
function %f15(f64) -> f64 {
block0(v0: f64):
v1 = sqrt v0
return v1
}
; block0:
; fsqrt d0, d0
; ret
function %f16(f32) -> f32 {
block0(v0: f32):
v1 = fabs v0
return v1
}
; block0:
; fabs s0, s0
; ret
function %f17(f64) -> f64 {
block0(v0: f64):
v1 = fabs v0
return v1
}
; block0:
; fabs d0, d0
; ret
function %f18(f32) -> f32 {
block0(v0: f32):
v1 = fneg v0
return v1
}
; block0:
; fneg s0, s0
; ret
function %f19(f64) -> f64 {
block0(v0: f64):
v1 = fneg v0
return v1
}
; block0:
; fneg d0, d0
; ret
function %f20(f32) -> f64 {
block0(v0: f32):
v1 = fpromote.f64 v0
return v1
}
; block0:
; fcvt d0, s0
; ret
function %f21(f64) -> f32 {
block0(v0: f64):
v1 = fdemote.f32 v0
return v1
}
; block0:
; fcvt s0, d0
; ret
function %f22(f32) -> f32 {
block0(v0: f32):
v1 = ceil v0
return v1
}
; block0:
; frintp s0, s0
; ret
function %f22(f64) -> f64 {
block0(v0: f64):
v1 = ceil v0
return v1
}
; block0:
; frintp d0, d0
; ret
function %f23(f32) -> f32 {
block0(v0: f32):
v1 = floor v0
return v1
}
; block0:
; frintm s0, s0
; ret
function %f24(f64) -> f64 {
block0(v0: f64):
v1 = floor v0
return v1
}
; block0:
; frintm d0, d0
; ret
function %f25(f32) -> f32 {
block0(v0: f32):
v1 = trunc v0
return v1
}
; block0:
; frintz s0, s0
; ret
function %f26(f64) -> f64 {
block0(v0: f64):
v1 = trunc v0
return v1
}
; block0:
; frintz d0, d0
; ret
function %f27(f32) -> f32 {
block0(v0: f32):
v1 = nearest v0
return v1
}
; block0:
; frintn s0, s0
; ret
function %f28(f64) -> f64 {
block0(v0: f64):
v1 = nearest v0
return v1
}
; block0:
; frintn d0, d0
; ret
function %f29(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fma v0, v1, v2
return v3
}
; block0:
; fmadd s0, s0, s1, s2
; ret
function %f30(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fma v0, v1, v2
return v3
}
; block0:
; fmadd d0, d0, d1, d2
; ret
function %f31(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v5.2s, v1.2s, #31
; sli v0.2s, v0.2s, v5.2s, #31
; ret
function %f32(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr d5, d1, #63
; sli d0, d0, d5, #63
; ret
function %f33(f32) -> i32 {
block0(v0: f32):
v1 = fcvt_to_uint.i32 v0
return v1
}
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; fmov s5, #-1
; fcmp s0, s5
; b.gt 8 ; udf
; movz x10, #20352, LSL #16
; fmov s18, w10
; fcmp s0, s18
; b.lt 8 ; udf
; fcvtzu w0, s0
; ret
function %f34(f32) -> i32 {
block0(v0: f32):
v1 = fcvt_to_sint.i32 v0
return v1
}
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; movz x6, #52992, LSL #16
; fmov s6, w6
; fcmp s0, s6
; b.ge 8 ; udf
; movz x12, #20224, LSL #16
; fmov s20, w12
; fcmp s0, s20
; b.lt 8 ; udf
; fcvtzs w0, s0
; ret
function %f35(f32) -> i64 {
block0(v0: f32):
v1 = fcvt_to_uint.i64 v0
return v1
}
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; fmov s5, #-1
; fcmp s0, s5
; b.gt 8 ; udf
; movz x10, #24448, LSL #16
; fmov s18, w10
; fcmp s0, s18
; b.lt 8 ; udf
; fcvtzu x0, s0
; ret
function %f36(f32) -> i64 {
block0(v0: f32):
v1 = fcvt_to_sint.i64 v0
return v1
}
; block0:
; fcmp s0, s0
; b.vc 8 ; udf
; movz x6, #57088, LSL #16
; fmov s6, w6
; fcmp s0, s6
; b.ge 8 ; udf
; movz x12, #24320, LSL #16
; fmov s20, w12
; fcmp s0, s20
; b.lt 8 ; udf
; fcvtzs x0, s0
; ret
function %f37(f64) -> i32 {
block0(v0: f64):
v1 = fcvt_to_uint.i32 v0
return v1
}
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; fmov d5, #-1
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #16880, LSL #48
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzu w0, d0
; ret
function %f38(f64) -> i32 {
block0(v0: f64):
v1 = fcvt_to_sint.i32 v0
return v1
}
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; ldr d5, pc+8 ; b 12 ; data.f64 -2147483649
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #16864, LSL #48
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzs w0, d0
; ret
function %f39(f64) -> i64 {
block0(v0: f64):
v1 = fcvt_to_uint.i64 v0
return v1
}
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; fmov d5, #-1
; fcmp d0, d5
; b.gt 8 ; udf
; movz x10, #17392, LSL #48
; fmov d18, x10
; fcmp d0, d18
; b.lt 8 ; udf
; fcvtzu x0, d0
; ret
function %f40(f64) -> i64 {
block0(v0: f64):
v1 = fcvt_to_sint.i64 v0
return v1
}
; block0:
; fcmp d0, d0
; b.vc 8 ; udf
; movz x6, #50144, LSL #48
; fmov d6, x6
; fcmp d0, d6
; b.ge 8 ; udf
; movz x12, #17376, LSL #48
; fmov d20, x12
; fcmp d0, d20
; b.lt 8 ; udf
; fcvtzs x0, d0
; ret
function %f41(i32) -> f32 {
block0(v0: i32):
v1 = fcvt_from_uint.f32 v0
return v1
}
; block0:
; ucvtf s0, w0
; ret
function %f42(i32) -> f32 {
block0(v0: i32):
v1 = fcvt_from_sint.f32 v0
return v1
}
; block0:
; scvtf s0, w0
; ret
function %f43(i64) -> f32 {
block0(v0: i64):
v1 = fcvt_from_uint.f32 v0
return v1
}
; block0:
; ucvtf s0, x0
; ret
function %f44(i64) -> f32 {
block0(v0: i64):
v1 = fcvt_from_sint.f32 v0
return v1
}
; block0:
; scvtf s0, x0
; ret
function %f45(i32) -> f64 {
block0(v0: i32):
v1 = fcvt_from_uint.f64 v0
return v1
}
; block0:
; ucvtf d0, w0
; ret
function %f46(i32) -> f64 {
block0(v0: i32):
v1 = fcvt_from_sint.f64 v0
return v1
}
; block0:
; scvtf d0, w0
; ret
function %f47(i64) -> f64 {
block0(v0: i64):
v1 = fcvt_from_uint.f64 v0
return v1
}
; block0:
; ucvtf d0, x0
; ret
function %f48(i64) -> f64 {
block0(v0: i64):
v1 = fcvt_from_sint.f64 v0
return v1
}
; block0:
; scvtf d0, x0
; ret
function %f49(f32) -> i32 {
block0(v0: f32):
v1 = fcvt_to_uint_sat.i32 v0
return v1
}
; block0:
; movz x4, #20352, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movi v17.2s, #0
; fmax s19, s7, s17
; fcmp s0, s0
; fcsel s22, s17, s19, ne
; fcvtzu w0, s22
; ret
function %f50(f32) -> i32 {
block0(v0: f32):
v1 = fcvt_to_sint_sat.i32 v0
return v1
}
; block0:
; movz x4, #20224, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movz x10, #52992, LSL #16
; fmov s18, w10
; fmax s21, s7, s18
; movi v23.16b, #0
; fcmp s0, s0
; fcsel s26, s23, s21, ne
; fcvtzs w0, s26
; ret
function %f51(f32) -> i64 {
block0(v0: f32):
v1 = fcvt_to_uint_sat.i64 v0
return v1
}
; block0:
; movz x4, #24448, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movi v17.2s, #0
; fmax s19, s7, s17
; fcmp s0, s0
; fcsel s22, s17, s19, ne
; fcvtzu x0, s22
; ret
function %f52(f32) -> i64 {
block0(v0: f32):
v1 = fcvt_to_sint_sat.i64 v0
return v1
}
; block0:
; movz x4, #24320, LSL #16
; fmov s4, w4
; fmin s7, s0, s4
; movz x10, #57088, LSL #16
; fmov s18, w10
; fmax s21, s7, s18
; movi v23.16b, #0
; fcmp s0, s0
; fcsel s26, s23, s21, ne
; fcvtzs x0, s26
; ret
function %f53(f64) -> i32 {
block0(v0: f64):
v1 = fcvt_to_uint_sat.i32 v0
return v1
}
; block0:
; ldr d3, pc+8 ; b 12 ; data.f64 4294967295
; fmin d5, d0, d3
; movi v7.2s, #0
; fmax d17, d5, d7
; fcmp d0, d0
; fcsel d20, d7, d17, ne
; fcvtzu w0, d20
; ret
function %f54(f64) -> i32 {
block0(v0: f64):
v1 = fcvt_to_sint_sat.i32 v0
return v1
}
; block0:
; ldr d3, pc+8 ; b 12 ; data.f64 2147483647
; fmin d5, d0, d3
; movz x8, #49632, LSL #48
; fmov d16, x8
; fmax d19, d5, d16
; movi v21.16b, #0
; fcmp d0, d0
; fcsel d24, d21, d19, ne
; fcvtzs w0, d24
; ret
function %f55(f64) -> i64 {
block0(v0: f64):
v1 = fcvt_to_uint_sat.i64 v0
return v1
}
; block0:
; movz x4, #17392, LSL #48
; fmov d4, x4
; fmin d7, d0, d4
; movi v17.2s, #0
; fmax d19, d7, d17
; fcmp d0, d0
; fcsel d22, d17, d19, ne
; fcvtzu x0, d22
; ret
function %f56(f64) -> i64 {
block0(v0: f64):
v1 = fcvt_to_sint_sat.i64 v0
return v1
}
; block0:
; movz x4, #17376, LSL #48
; fmov d4, x4
; fmin d7, d0, d4
; movz x10, #50144, LSL #48
; fmov d18, x10
; fmax d21, d7, d18
; movi v23.16b, #0
; fcmp d0, d0
; fcsel d26, d23, d21, ne
; fcvtzs x0, d26
; ret
function %f57(f32x2) -> f32x2 {
block0(v0: f32x2):
v1 = sqrt v0
return v1
}
; block0:
; fsqrt v0.2s, v0.2s
; ret
function %f58(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = sqrt v0
return v1
}
; block0:
; fsqrt v0.4s, v0.4s
; ret
function %f59(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = sqrt v0
return v1
}
; block0:
; fsqrt v0.2d, v0.2d
; ret
function %f60(f32x2) -> f32x2 {
block0(v0: f32x2):
v1 = fneg v0
return v1
}
; block0:
; fneg v0.2s, v0.2s
; ret
function %f61(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = fneg v0
return v1
}
; block0:
; fneg v0.4s, v0.4s
; ret
function %f62(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = fneg v0
return v1
}
; block0:
; fneg v0.2d, v0.2d
; ret
function %f63(f32x2) -> f32x2 {
block0(v0: f32x2):
v1 = fabs v0
return v1
}
; block0:
; fabs v0.2s, v0.2s
; ret
function %f64(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = fabs v0
return v1
}
; block0:
; fabs v0.4s, v0.4s
; ret
function %f65(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = fabs v0
return v1
}
; block0:
; fabs v0.2d, v0.2d
; ret
function %f66(f32x2) -> f32x2 {
block0(v0: f32x2):
v1 = ceil v0
return v1
}
; block0:
; frintp v0.2s, v0.2s
; ret
function %f67(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = ceil v0
return v1
}
; block0:
; frintp v0.4s, v0.4s
; ret
function %f68(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = ceil v0
return v1
}
; block0:
; frintp v0.2d, v0.2d
; ret
function %f69(f32x2) -> f32x2 {
block0(v0: f32x2):
v1 = floor v0
return v1
}
; block0:
; frintm v0.2s, v0.2s
; ret
function %f70(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = floor v0
return v1
}
; block0:
; frintm v0.4s, v0.4s
; ret
function %f71(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = floor v0
return v1
}
; block0:
; frintm v0.2d, v0.2d
; ret
function %f72(f32x2) -> f32x2 {
block0(v0: f32x2):
v1 = trunc v0
return v1
}
; block0:
; frintz v0.2s, v0.2s
; ret
function %f73(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = trunc v0
return v1
}
; block0:
; frintz v0.4s, v0.4s
; ret
function %f74(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = trunc v0
return v1
}
; block0:
; frintz v0.2d, v0.2d
; ret
function %f75(f32x2) -> f32x2 {
block0(v0: f32x2):
v1 = nearest v0
return v1
}
; block0:
; frintn v0.2s, v0.2s
; ret
function %f76(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = nearest v0
return v1
}
; block0:
; frintn v0.4s, v0.4s
; ret
function %f77(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = nearest v0
return v1
}
; block0:
; frintn v0.2d, v0.2d
; ret
function %f78(f32x4, f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4, v2: f32x4):
v3 = fma v0, v1, v2
return v3
}
; block0:
; fmla v2.4s, v2.4s, v0.4s, v1.4s
; mov v0.16b, v2.16b
; ret
function %f79(f32x2, f32x2, f32x2) -> f32x2 {
block0(v0: f32x2, v1: f32x2, v2: f32x2):
v3 = fma v0, v1, v2
return v3
}
; block0:
; fmla v2.2s, v2.2s, v0.2s, v1.2s
; mov v0.16b, v2.16b
; ret
function %f80(f64x2, f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2, v2: f64x2):
v3 = fma v0, v1, v2
return v3
}
; block0:
; fmla v2.2d, v2.2d, v0.2d, v1.2d
; mov v0.16b, v2.16b
; ret
function %f81(f32x2, f32x2) -> f32x2 {
block0(v0: f32x2, v1: f32x2):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v5.2s, v1.2s, #31
; sli v0.2s, v0.2s, v5.2s, #31
; ret
function %f82(f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v5.4s, v1.4s, #31
; sli v0.4s, v0.4s, v5.4s, #31
; ret
function %f83(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fcopysign v0, v1
return v2
}
; block0:
; ushr v5.2d, v1.2d, #63
; sli v0.2d, v0.2d, v5.2d, #63
; ret