This PR removes all uses of modify-operands in the aarch64 backend, replacing them with reused-input operands instead. This has the nice effect of removing a bunch of move instructions and more clearly representing inputs and outputs. This PR also removes the explicit use of pinned vregs in the aarch64 backend, instead using fixed-register constraints on the operands when insts or pseudo-inst sequences require certain registers. This is the second PR in the regalloc-semantics cleanup series; after the remaining backend (s390x) and the ABI code are cleaned up as well, we'll be able to simplify the regalloc2 frontend.
269 lines
4.5 KiB
Plaintext
269 lines
4.5 KiB
Plaintext
test compile precise-output
|
|
set unwind_info=false
|
|
target aarch64
|
|
|
|
function %snarrow_i16x4(i16x4, i16x4) -> i8x8 {
|
|
block0(v0: i16x4, v1: i16x4):
|
|
v2 = snarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
|
; sqxtn v0.8b, v0.8h
|
|
; ret
|
|
|
|
function %snarrow_i16x8(i16x8, i16x8) -> i8x16 {
|
|
block0(v0: i16x8, v1: i16x8):
|
|
v2 = snarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtn v0.8b, v0.8h
|
|
; sqxtn2 v0.16b, v0.16b, v1.8h
|
|
; ret
|
|
|
|
function %snarrow_i32x2(i32x2, i32x2) -> i16x4 {
|
|
block0(v0: i32x2, v1: i32x2):
|
|
v2 = snarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
|
; sqxtn v0.4h, v0.4s
|
|
; ret
|
|
|
|
function %snarrow_i32x4(i32x4, i32x4) -> i16x8 {
|
|
block0(v0: i32x4, v1: i32x4):
|
|
v2 = snarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtn v0.4h, v0.4s
|
|
; sqxtn2 v0.8h, v0.8h, v1.4s
|
|
; ret
|
|
|
|
function %snarrow_i64x2(i64x2, i64x2) -> i32x4 {
|
|
block0(v0: i64x2, v1: i64x2):
|
|
v2 = snarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtn v0.2s, v0.2d
|
|
; sqxtn2 v0.4s, v0.4s, v1.2d
|
|
; ret
|
|
|
|
function %unarrow_i16x4(i16x4, i16x4) -> i8x8 {
|
|
block0(v0: i16x4, v1: i16x4):
|
|
v2 = unarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
|
; sqxtun v0.8b, v0.8h
|
|
; ret
|
|
|
|
function %unarrow_i16x8(i16x8, i16x8) -> i8x16 {
|
|
block0(v0: i16x8, v1: i16x8):
|
|
v2 = unarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtun v0.8b, v0.8h
|
|
; sqxtun2 v0.16b, v0.16b, v1.8h
|
|
; ret
|
|
|
|
function %unarrow_i32x2(i32x2, i32x2) -> i16x4 {
|
|
block0(v0: i32x2, v1: i32x2):
|
|
v2 = unarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
|
; sqxtun v0.4h, v0.4s
|
|
; ret
|
|
|
|
function %unarrow_i32x4(i32x4, i32x4) -> i16x8 {
|
|
block0(v0: i32x4, v1: i32x4):
|
|
v2 = unarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtun v0.4h, v0.4s
|
|
; sqxtun2 v0.8h, v0.8h, v1.4s
|
|
; ret
|
|
|
|
function %unarrow_i64x2(i64x2, i64x2) -> i32x4 {
|
|
block0(v0: i64x2, v1: i64x2):
|
|
v2 = unarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtun v0.2s, v0.2d
|
|
; sqxtun2 v0.4s, v0.4s, v1.2d
|
|
; ret
|
|
|
|
function %uunarrow_i16x4(i16x4, i16x4) -> i8x8 {
|
|
block0(v0: i16x4, v1: i16x4):
|
|
v2 = uunarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
|
; uqxtn v0.8b, v0.8h
|
|
; ret
|
|
|
|
function %uunarrow_i16x8(i16x8, i16x8) -> i8x16 {
|
|
block0(v0: i16x8, v1: i16x8):
|
|
v2 = uunarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; uqxtn v0.8b, v0.8h
|
|
; uqxtn2 v0.16b, v0.16b, v1.8h
|
|
; ret
|
|
|
|
function %uunarrow_i32x2(i32x2, i32x2) -> i16x4 {
|
|
block0(v0: i32x2, v1: i32x2):
|
|
v2 = uunarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; mov v0.d[1], v0.d[1], v1.d[0]
|
|
; uqxtn v0.4h, v0.4s
|
|
; ret
|
|
|
|
function %uunarrow_i32x4(i32x4, i32x4) -> i16x8 {
|
|
block0(v0: i32x4, v1: i32x4):
|
|
v2 = uunarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; uqxtn v0.4h, v0.4s
|
|
; uqxtn2 v0.8h, v0.8h, v1.4s
|
|
; ret
|
|
|
|
function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 {
|
|
block0(v0: i64x2, v1: i64x2):
|
|
v2 = uunarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; uqxtn v0.2s, v0.2d
|
|
; uqxtn2 v0.4s, v0.4s, v1.2d
|
|
; ret
|
|
|
|
function %snarrow_i16x8_zero(i16x8) -> i8x16 {
|
|
block0(v0: i16x8):
|
|
v1 = vconst.i16x8 0x00
|
|
v2 = snarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtn v0.8b, v0.8h
|
|
; ret
|
|
|
|
function %snarrow_i32x4_zero(i32x4) -> i16x8 {
|
|
block0(v0: i32x4):
|
|
v1 = vconst.i32x4 0x00
|
|
v2 = snarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtn v0.4h, v0.4s
|
|
; ret
|
|
|
|
function %snarrow_i64x2_zero(i64x2) -> i32x4 {
|
|
block0(v0: i64x2):
|
|
v1 = vconst.i64x2 0x00
|
|
v2 = snarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtn v0.2s, v0.2d
|
|
; ret
|
|
|
|
function %unarrow_i16x8_zero(i16x8) -> i8x16 {
|
|
block0(v0: i16x8):
|
|
v1 = vconst.i16x8 0x00
|
|
v2 = unarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtun v0.8b, v0.8h
|
|
; ret
|
|
|
|
function %unarrow_i32x4_zero(i32x4) -> i16x8 {
|
|
block0(v0: i32x4):
|
|
v1 = vconst.i32x4 0x00
|
|
v2 = unarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtun v0.4h, v0.4s
|
|
; ret
|
|
|
|
function %unarrow_i64x2_zero(i64x2) -> i32x4 {
|
|
block0(v0: i64x2):
|
|
v1 = vconst.i64x2 0x00
|
|
v2 = unarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; sqxtun v0.2s, v0.2d
|
|
; ret
|
|
|
|
function %uunarrow_i16x8_zero(i16x8) -> i8x16 {
|
|
block0(v0: i16x8):
|
|
v1 = vconst.i16x8 0x00
|
|
v2 = uunarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; uqxtn v0.8b, v0.8h
|
|
; ret
|
|
|
|
function %uunarrow_i32x4_zero(i32x4) -> i16x8 {
|
|
block0(v0: i32x4):
|
|
v1 = vconst.i32x4 0x00
|
|
v2 = uunarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; uqxtn v0.4h, v0.4s
|
|
; ret
|
|
|
|
function %uunarrow_i64x2_zero(i64x2) -> i32x4 {
|
|
block0(v0: i64x2):
|
|
v1 = vconst.i64x2 0x00
|
|
v2 = uunarrow v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; uqxtn v0.2s, v0.2d
|
|
; ret
|
|
|