Files
wasmtime/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif
Chris Fallin ae5fe8a728 aarch64: fix up regalloc2 semantics. (#4830)
This PR removes all uses of modify-operands in the aarch64 backend,
replacing them with reused-input operands instead. This has the nice
effect of removing a bunch of move instructions and more clearly
representing inputs and outputs.

This PR also removes the explicit use of pinned vregs in the aarch64
backend, instead using fixed-register constraints on the operands when
insts or pseudo-inst sequences require certain registers.

This is the second PR in the regalloc-semantics cleanup series; after
the remaining backend (s390x) and the ABI code are cleaned up as well,
we'll be able to simplify the regalloc2 frontend.
2022-09-01 21:25:20 +00:00

269 lines
4.5 KiB
Plaintext

test compile precise-output
set unwind_info=false
target aarch64
function %snarrow_i16x4(i16x4, i16x4) -> i8x8 {
block0(v0: i16x4, v1: i16x4):
v2 = snarrow v0, v1
return v2
}
; block0:
; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtn v0.8b, v0.8h
; ret
function %snarrow_i16x8(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.8b, v0.8h
; sqxtn2 v0.16b, v0.16b, v1.8h
; ret
function %snarrow_i32x2(i32x2, i32x2) -> i16x4 {
block0(v0: i32x2, v1: i32x2):
v2 = snarrow v0, v1
return v2
}
; block0:
; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtn v0.4h, v0.4s
; ret
function %snarrow_i32x4(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.4h, v0.4s
; sqxtn2 v0.8h, v0.8h, v1.4s
; ret
function %snarrow_i64x2(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.2s, v0.2d
; sqxtn2 v0.4s, v0.4s, v1.2d
; ret
function %unarrow_i16x4(i16x4, i16x4) -> i8x8 {
block0(v0: i16x4, v1: i16x4):
v2 = unarrow v0, v1
return v2
}
; block0:
; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtun v0.8b, v0.8h
; ret
function %unarrow_i16x8(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.8b, v0.8h
; sqxtun2 v0.16b, v0.16b, v1.8h
; ret
function %unarrow_i32x2(i32x2, i32x2) -> i16x4 {
block0(v0: i32x2, v1: i32x2):
v2 = unarrow v0, v1
return v2
}
; block0:
; mov v0.d[1], v0.d[1], v1.d[0]
; sqxtun v0.4h, v0.4s
; ret
function %unarrow_i32x4(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.4h, v0.4s
; sqxtun2 v0.8h, v0.8h, v1.4s
; ret
function %unarrow_i64x2(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.2s, v0.2d
; sqxtun2 v0.4s, v0.4s, v1.2d
; ret
function %uunarrow_i16x4(i16x4, i16x4) -> i8x8 {
block0(v0: i16x4, v1: i16x4):
v2 = uunarrow v0, v1
return v2
}
; block0:
; mov v0.d[1], v0.d[1], v1.d[0]
; uqxtn v0.8b, v0.8h
; ret
function %uunarrow_i16x8(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.8b, v0.8h
; uqxtn2 v0.16b, v0.16b, v1.8h
; ret
function %uunarrow_i32x2(i32x2, i32x2) -> i16x4 {
block0(v0: i32x2, v1: i32x2):
v2 = uunarrow v0, v1
return v2
}
; block0:
; mov v0.d[1], v0.d[1], v1.d[0]
; uqxtn v0.4h, v0.4s
; ret
function %uunarrow_i32x4(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.4h, v0.4s
; uqxtn2 v0.8h, v0.8h, v1.4s
; ret
function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.2s, v0.2d
; uqxtn2 v0.4s, v0.4s, v1.2d
; ret
function %snarrow_i16x8_zero(i16x8) -> i8x16 {
block0(v0: i16x8):
v1 = vconst.i16x8 0x00
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.8b, v0.8h
; ret
function %snarrow_i32x4_zero(i32x4) -> i16x8 {
block0(v0: i32x4):
v1 = vconst.i32x4 0x00
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.4h, v0.4s
; ret
function %snarrow_i64x2_zero(i64x2) -> i32x4 {
block0(v0: i64x2):
v1 = vconst.i64x2 0x00
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.2s, v0.2d
; ret
function %unarrow_i16x8_zero(i16x8) -> i8x16 {
block0(v0: i16x8):
v1 = vconst.i16x8 0x00
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.8b, v0.8h
; ret
function %unarrow_i32x4_zero(i32x4) -> i16x8 {
block0(v0: i32x4):
v1 = vconst.i32x4 0x00
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.4h, v0.4s
; ret
function %unarrow_i64x2_zero(i64x2) -> i32x4 {
block0(v0: i64x2):
v1 = vconst.i64x2 0x00
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.2s, v0.2d
; ret
function %uunarrow_i16x8_zero(i16x8) -> i8x16 {
block0(v0: i16x8):
v1 = vconst.i16x8 0x00
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.8b, v0.8h
; ret
function %uunarrow_i32x4_zero(i32x4) -> i16x8 {
block0(v0: i32x4):
v1 = vconst.i32x4 0x00
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.4h, v0.4s
; ret
function %uunarrow_i64x2_zero(i64x2) -> i32x4 {
block0(v0: i64x2):
v1 = vconst.i64x2 0x00
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.2s, v0.2d
; ret