Files
wasmtime/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif
Chris Fallin 05cbd667c7 Cranelift: use regalloc2 constraints on caller side of ABI code. (#4892)
* Cranelift: use regalloc2 constraints on caller side of ABI code.

This PR updates the shared ABI code and backends to use register-operand
constraints rather than explicit pinned-vreg moves for register
arguments and return values.

The s390x backend was not updated, because it has its own implementation
of ABI code. Ideally we could converge back to the code shared by x64
and aarch64 (which didn't exist when s390x ported calls to ISLE, so the
current situation is underestandable, to be clear!). I'll leave this for
future work.

This PR exposed several places where regalloc2 needed to be a bit more
flexible with constraints; it requires regalloc2#74 to be merged and
pulled in.

* Update to regalloc2 0.3.3.

In addition to version bump, this required removing two asserts as
`SpillSlot`s no longer carry their class (so we can't assert that they
have the correct class).

* Review comments.

* Filetest updates.

* Add cargo-vet audit for regalloc2 0.3.2 -> 0.3.3 upgrade.

* Update to regalloc2 0.4.0.
2022-09-21 01:17:04 +00:00

275 lines
4.6 KiB
Plaintext

test compile precise-output
set unwind_info=false
target aarch64
function %snarrow_i16x4(i16x4, i16x4) -> i8x8 {
block0(v0: i16x4, v1: i16x4):
v2 = snarrow v0, v1
return v2
}
; block0:
; mov v4.16b, v0.16b
; mov v4.d[1], v4.d[1], v1.d[0]
; sqxtn v0.8b, v4.8h
; ret
function %snarrow_i16x8(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.8b, v0.8h
; sqxtn2 v0.16b, v0.16b, v1.8h
; ret
function %snarrow_i32x2(i32x2, i32x2) -> i16x4 {
block0(v0: i32x2, v1: i32x2):
v2 = snarrow v0, v1
return v2
}
; block0:
; mov v4.16b, v0.16b
; mov v4.d[1], v4.d[1], v1.d[0]
; sqxtn v0.4h, v4.4s
; ret
function %snarrow_i32x4(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.4h, v0.4s
; sqxtn2 v0.8h, v0.8h, v1.4s
; ret
function %snarrow_i64x2(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.2s, v0.2d
; sqxtn2 v0.4s, v0.4s, v1.2d
; ret
function %unarrow_i16x4(i16x4, i16x4) -> i8x8 {
block0(v0: i16x4, v1: i16x4):
v2 = unarrow v0, v1
return v2
}
; block0:
; mov v4.16b, v0.16b
; mov v4.d[1], v4.d[1], v1.d[0]
; sqxtun v0.8b, v4.8h
; ret
function %unarrow_i16x8(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.8b, v0.8h
; sqxtun2 v0.16b, v0.16b, v1.8h
; ret
function %unarrow_i32x2(i32x2, i32x2) -> i16x4 {
block0(v0: i32x2, v1: i32x2):
v2 = unarrow v0, v1
return v2
}
; block0:
; mov v4.16b, v0.16b
; mov v4.d[1], v4.d[1], v1.d[0]
; sqxtun v0.4h, v4.4s
; ret
function %unarrow_i32x4(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.4h, v0.4s
; sqxtun2 v0.8h, v0.8h, v1.4s
; ret
function %unarrow_i64x2(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.2s, v0.2d
; sqxtun2 v0.4s, v0.4s, v1.2d
; ret
function %uunarrow_i16x4(i16x4, i16x4) -> i8x8 {
block0(v0: i16x4, v1: i16x4):
v2 = uunarrow v0, v1
return v2
}
; block0:
; mov v4.16b, v0.16b
; mov v4.d[1], v4.d[1], v1.d[0]
; uqxtn v0.8b, v4.8h
; ret
function %uunarrow_i16x8(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.8b, v0.8h
; uqxtn2 v0.16b, v0.16b, v1.8h
; ret
function %uunarrow_i32x2(i32x2, i32x2) -> i16x4 {
block0(v0: i32x2, v1: i32x2):
v2 = uunarrow v0, v1
return v2
}
; block0:
; mov v4.16b, v0.16b
; mov v4.d[1], v4.d[1], v1.d[0]
; uqxtn v0.4h, v4.4s
; ret
function %uunarrow_i32x4(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.4h, v0.4s
; uqxtn2 v0.8h, v0.8h, v1.4s
; ret
function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.2s, v0.2d
; uqxtn2 v0.4s, v0.4s, v1.2d
; ret
function %snarrow_i16x8_zero(i16x8) -> i8x16 {
block0(v0: i16x8):
v1 = vconst.i16x8 0x00
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.8b, v0.8h
; ret
function %snarrow_i32x4_zero(i32x4) -> i16x8 {
block0(v0: i32x4):
v1 = vconst.i32x4 0x00
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.4h, v0.4s
; ret
function %snarrow_i64x2_zero(i64x2) -> i32x4 {
block0(v0: i64x2):
v1 = vconst.i64x2 0x00
v2 = snarrow v0, v1
return v2
}
; block0:
; sqxtn v0.2s, v0.2d
; ret
function %unarrow_i16x8_zero(i16x8) -> i8x16 {
block0(v0: i16x8):
v1 = vconst.i16x8 0x00
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.8b, v0.8h
; ret
function %unarrow_i32x4_zero(i32x4) -> i16x8 {
block0(v0: i32x4):
v1 = vconst.i32x4 0x00
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.4h, v0.4s
; ret
function %unarrow_i64x2_zero(i64x2) -> i32x4 {
block0(v0: i64x2):
v1 = vconst.i64x2 0x00
v2 = unarrow v0, v1
return v2
}
; block0:
; sqxtun v0.2s, v0.2d
; ret
function %uunarrow_i16x8_zero(i16x8) -> i8x16 {
block0(v0: i16x8):
v1 = vconst.i16x8 0x00
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.8b, v0.8h
; ret
function %uunarrow_i32x4_zero(i32x4) -> i16x8 {
block0(v0: i32x4):
v1 = vconst.i32x4 0x00
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.4h, v0.4s
; ret
function %uunarrow_i64x2_zero(i64x2) -> i32x4 {
block0(v0: i64x2):
v1 = vconst.i64x2 0x00
v2 = uunarrow v0, v1
return v2
}
; block0:
; uqxtn v0.2s, v0.2d
; ret