This PR removes all uses of modify-operands in the aarch64 backend, replacing them with reused-input operands instead. This has the nice effect of removing a bunch of move instructions and more clearly representing inputs and outputs. This PR also removes the explicit use of pinned vregs in the aarch64 backend, instead using fixed-register constraints on the operands when insts or pseudo-inst sequences require certain registers. This is the second PR in the regalloc-semantics cleanup series; after the remaining backend (s390x) and the ABI code are cleaned up as well, we'll be able to simplify the regalloc2 frontend.
289 lines
5.2 KiB
Plaintext
289 lines
5.2 KiB
Plaintext
test compile precise-output
|
|
target aarch64
|
|
|
|
function %snarrow_i16x4(i16) -> i8x8 {
|
|
gv0 = dyn_scale_target_const.i16x4
|
|
gv1 = dyn_scale_target_const.i8x8
|
|
dt0 = i16x4*gv0
|
|
dt1 = i8x8*gv0
|
|
|
|
block0(v0: i16):
|
|
v1 = splat.dt0 v0
|
|
v2 = snarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v4.4h, w0
|
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
|
; sqxtn v0.8b, v4.8h
|
|
; ret
|
|
|
|
function %snarrow_i16x8(i16) -> i8x16 {
|
|
gv0 = dyn_scale_target_const.i16x8
|
|
gv1 = dyn_scale_target_const.i8x16
|
|
dt0 = i16x8*gv0
|
|
dt1 = i8x16*gv0
|
|
|
|
block0(v0: i16):
|
|
v1 = splat.dt0 v0
|
|
v2 = snarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v6.8h, w0
|
|
; sqxtn v0.8b, v6.8h
|
|
; sqxtn2 v0.16b, v0.16b, v6.8h
|
|
; ret
|
|
|
|
function %snarrow_i32x2(i32) -> i16x4 {
|
|
gv0 = dyn_scale_target_const.i32x2
|
|
gv1 = dyn_scale_target_const.i16x4
|
|
dt0 = i32x2*gv0
|
|
dt1 = i16x4*gv0
|
|
|
|
block0(v0: i32):
|
|
v1 = splat.dt0 v0
|
|
v2 = snarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v4.2s, w0
|
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
|
; sqxtn v0.4h, v4.4s
|
|
; ret
|
|
|
|
function %snarrow_i32x4(i32) -> i16x8 {
|
|
gv0 = dyn_scale_target_const.i32x4
|
|
gv1 = dyn_scale_target_const.i16x8
|
|
dt0 = i32x4*gv0
|
|
dt1 = i16x8*gv0
|
|
|
|
block0(v0: i32):
|
|
v1 = splat.dt0 v0
|
|
v2 = snarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v6.4s, w0
|
|
; sqxtn v0.4h, v6.4s
|
|
; sqxtn2 v0.8h, v0.8h, v6.4s
|
|
; ret
|
|
|
|
function %snarrow_i64x2(i64) -> i32x4 {
|
|
gv0 = dyn_scale_target_const.i64x2
|
|
gv1 = dyn_scale_target_const.i32x4
|
|
dt0 = i64x2*gv0
|
|
dt1 = i32x4*gv0
|
|
|
|
block0(v0: i64):
|
|
v1 = splat.dt0 v0
|
|
v2 = snarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v6.2d, x0
|
|
; sqxtn v0.2s, v6.2d
|
|
; sqxtn2 v0.4s, v0.4s, v6.2d
|
|
; ret
|
|
|
|
function %unarrow_i16x4(i16) -> i8x8 {
|
|
gv0 = dyn_scale_target_const.i16x4
|
|
gv1 = dyn_scale_target_const.i8x8
|
|
dt0 = i16x4*gv0
|
|
dt1 = i8x8*gv0
|
|
|
|
block0(v0: i16):
|
|
v1 = splat.dt0 v0
|
|
v2 = unarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v4.4h, w0
|
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
|
; sqxtun v0.8b, v4.8h
|
|
; ret
|
|
|
|
function %unarrow_i16x8(i16) -> i8x16 {
|
|
gv0 = dyn_scale_target_const.i16x8
|
|
gv1 = dyn_scale_target_const.i8x16
|
|
dt0 = i16x8*gv0
|
|
dt1 = i8x16*gv0
|
|
|
|
block0(v0: i16):
|
|
v1 = splat.dt0 v0
|
|
v2 = unarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v6.8h, w0
|
|
; sqxtun v0.8b, v6.8h
|
|
; sqxtun2 v0.16b, v0.16b, v6.8h
|
|
; ret
|
|
|
|
function %unarrow_i32x2(i32) -> i16x4 {
|
|
gv0 = dyn_scale_target_const.i32x2
|
|
gv1 = dyn_scale_target_const.i16x4
|
|
dt0 = i32x2*gv0
|
|
dt1 = i16x4*gv0
|
|
|
|
block0(v0: i32):
|
|
v1 = splat.dt0 v0
|
|
v2 = unarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v4.2s, w0
|
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
|
; sqxtun v0.4h, v4.4s
|
|
; ret
|
|
|
|
function %unarrow_i32x4(i32) -> i16x8 {
|
|
gv0 = dyn_scale_target_const.i32x4
|
|
gv1 = dyn_scale_target_const.i16x8
|
|
dt0 = i32x4*gv0
|
|
dt1 = i16x8*gv0
|
|
|
|
block0(v0: i32):
|
|
v1 = splat.dt0 v0
|
|
v2 = unarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v6.4s, w0
|
|
; sqxtun v0.4h, v6.4s
|
|
; sqxtun2 v0.8h, v0.8h, v6.4s
|
|
; ret
|
|
|
|
function %unarrow_i64x2(i64) -> i32x4 {
|
|
gv0 = dyn_scale_target_const.i64x2
|
|
gv1 = dyn_scale_target_const.i32x4
|
|
dt0 = i64x2*gv0
|
|
dt1 = i32x4*gv0
|
|
|
|
block0(v0: i64):
|
|
v1 = splat.dt0 v0
|
|
v2 = unarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v6.2d, x0
|
|
; sqxtun v0.2s, v6.2d
|
|
; sqxtun2 v0.4s, v0.4s, v6.2d
|
|
; ret
|
|
|
|
function %uunarrow_i16x4(i16) -> i8x8 {
|
|
gv0 = dyn_scale_target_const.i16x4
|
|
gv1 = dyn_scale_target_const.i8x8
|
|
dt0 = i16x4*gv0
|
|
dt1 = i8x8*gv0
|
|
|
|
block0(v0: i16):
|
|
v1 = splat.dt0 v0
|
|
v2 = uunarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v4.4h, w0
|
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
|
; uqxtn v0.8b, v4.8h
|
|
; ret
|
|
|
|
function %uunarrow_i16x8(i16) -> i8x16 {
|
|
gv0 = dyn_scale_target_const.i16x8
|
|
gv1 = dyn_scale_target_const.i8x16
|
|
dt0 = i16x8*gv0
|
|
dt1 = i8x16*gv0
|
|
|
|
block0(v0: i16):
|
|
v1 = splat.dt0 v0
|
|
v2 = uunarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v6.8h, w0
|
|
; uqxtn v0.8b, v6.8h
|
|
; uqxtn2 v0.16b, v0.16b, v6.8h
|
|
; ret
|
|
|
|
function %uunarrow_i32x2(i32) -> i16x4 {
|
|
gv0 = dyn_scale_target_const.i32x2
|
|
gv1 = dyn_scale_target_const.i16x4
|
|
dt0 = i32x2*gv0
|
|
dt1 = i16x4*gv0
|
|
|
|
block0(v0: i32):
|
|
v1 = splat.dt0 v0
|
|
v2 = uunarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v4.2s, w0
|
|
; mov v4.d[1], v4.d[1], v4.d[0]
|
|
; uqxtn v0.4h, v4.4s
|
|
; ret
|
|
|
|
function %uunarrow_i32x4(i32) -> i16x8 {
|
|
gv0 = dyn_scale_target_const.i32x4
|
|
gv1 = dyn_scale_target_const.i16x8
|
|
dt0 = i32x4*gv0
|
|
dt1 = i16x8*gv0
|
|
|
|
block0(v0: i32):
|
|
v1 = splat.dt0 v0
|
|
v2 = uunarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v6.4s, w0
|
|
; uqxtn v0.4h, v6.4s
|
|
; uqxtn2 v0.8h, v0.8h, v6.4s
|
|
; ret
|
|
|
|
function %uunarrow_i64x2(i64) -> i32x4 {
|
|
gv0 = dyn_scale_target_const.i64x2
|
|
gv1 = dyn_scale_target_const.i32x4
|
|
dt0 = i64x2*gv0
|
|
dt1 = i32x4*gv0
|
|
|
|
block0(v0: i64):
|
|
v1 = splat.dt0 v0
|
|
v2 = uunarrow.dt0 v1, v1
|
|
v3 = extract_vector v2, 0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; dup v6.2d, x0
|
|
; uqxtn v0.2s, v6.2d
|
|
; uqxtn2 v0.4s, v0.4s, v6.2d
|
|
; ret
|
|
|