This PR switches the default backend on x86, for both the `cranelift-codegen` crate and for Wasmtime, to the new (`MachInst`-style, `VCode`-based) backend that has been under development and testing for some time now. The old backend is still available by default in builds with the `old-x86-backend` feature, or by requesting `BackendVariant::Legacy` from the appropriate APIs. As part of that switch, it adds some more runtime-configurable plumbing to the testing infrastructure so that tests can be run using the appropriate backend. `clif-util test` is now capable of parsing a backend selector option from filetests and instantiating the correct backend. CI has been updated so that the old x86 backend continues to run its tests, just as we used to run the new x64 backend separately. At some point, we will remove the old x86 backend entirely, once we are satisfied that the new backend has not caused any unforeseen issues and we do not need to revert.
124 lines
2.9 KiB
Plaintext
124 lines
2.9 KiB
Plaintext
test compile
|
|
set enable_simd
|
|
target x86_64 machinst has_ssse3 has_sse41
|
|
|
|
;; shuffle
|
|
|
|
function %shuffle_different_ssa_values() -> i8x16 {
|
|
block0:
|
|
v0 = vconst.i8x16 0x00
|
|
v1 = vconst.i8x16 0x01
|
|
v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0
|
|
return v2
|
|
}
|
|
; check: load_const VCodeConstant(3), %xmm1
|
|
; nextln: load_const VCodeConstant(2), %xmm0
|
|
; nextln: load_const VCodeConstant(0), %xmm2
|
|
; nextln: pshufb %xmm2, %xmm1
|
|
; nextln: load_const VCodeConstant(1), %xmm2
|
|
; nextln: pshufb %xmm2, %xmm0
|
|
; nextln: orps %xmm1, %xmm0
|
|
|
|
|
|
function %shuffle_same_ssa_value() -> i8x16 {
|
|
block0:
|
|
v1 = vconst.i8x16 0x01
|
|
v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1
|
|
return v2
|
|
}
|
|
; check: load_const VCodeConstant(1), %xmm0
|
|
; nextln: load_const VCodeConstant(0), %xmm1
|
|
; nextln: pshufb %xmm1, %xmm0
|
|
|
|
|
|
|
|
;; swizzle
|
|
|
|
function %swizzle() -> i8x16 {
|
|
block0:
|
|
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
|
v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
|
v2 = swizzle.i8x16 v0, v1
|
|
return v2
|
|
}
|
|
; check: load_const VCodeConstant(1), %xmm1
|
|
; nextln: load_const VCodeConstant(1), %xmm0
|
|
; nextln: load_const VCodeConstant(0), %xmm2
|
|
; nextln: paddusb %xmm2, %xmm0
|
|
; nextln: pshufb %xmm0, %xmm1
|
|
; nextln: movdqa %xmm1, %xmm0
|
|
|
|
|
|
|
|
;; splat
|
|
|
|
function %splat_i8(i8) -> i8x16 {
|
|
block0(v0: i8):
|
|
v1 = splat.i8x16 v0
|
|
return v1
|
|
}
|
|
; check: uninit %xmm0
|
|
; nextln: pinsrb $$0, %rdi, %xmm0
|
|
; nextln: pxor %xmm1, %xmm1
|
|
; nextln: pshufb %xmm1, %xmm0
|
|
|
|
function %splat_b16() -> b16x8 {
|
|
block0:
|
|
v0 = bconst.b16 true
|
|
v1 = splat.b16x8 v0
|
|
return v1
|
|
}
|
|
; check: uninit %xmm0
|
|
; nextln: pinsrw $$0, %rsi, %xmm0
|
|
; nextln: pinsrw $$1, %rsi, %xmm0
|
|
; nextln: pshufd $$0, %xmm0, %xmm0
|
|
|
|
function %splat_i32(i32) -> i32x4 {
|
|
block0(v0: i32):
|
|
v1 = splat.i32x4 v0
|
|
return v1
|
|
}
|
|
; check: uninit %xmm0
|
|
; nextln: pinsrd $$0, %rdi, %xmm0
|
|
; nextln: pshufd $$0, %xmm0, %xmm0
|
|
|
|
function %splat_f64(f64) -> f64x2 {
|
|
block0(v0: f64):
|
|
v1 = splat.f64x2 v0
|
|
return v1
|
|
}
|
|
; check: uninit %xmm1
|
|
; nextln: movsd %xmm0, %xmm1
|
|
; nextln: movlhps %xmm0, %xmm1
|
|
|
|
|
|
|
|
;; load*_zero
|
|
|
|
; Verify that a `load` followed by a `scalar_to_vector` (the CLIF translation of `load32_zero`) is
|
|
; lowered to a single MOVSS instruction.
|
|
function %load32_zero_coalesced(i64) -> i32x4 {
|
|
block0(v0: i64):
|
|
v1 = load.i32 v0
|
|
v2 = scalar_to_vector.i32x4 v1
|
|
; check: movss 0(%rdi), %xmm0
|
|
return v2
|
|
}
|
|
|
|
;; Verify that `scalar_to_vector` (used by `load32_zero`), lowers as expected.
|
|
function %load32_zero_int(i32) -> i32x4 {
|
|
block0(v0: i32):
|
|
v1 = scalar_to_vector.i32x4 v0
|
|
; check: movd %edi, %xmm0
|
|
return v1
|
|
}
|
|
function %load32_zero_float(f32) -> f32x4 {
|
|
block0(v0: f32):
|
|
v1 = scalar_to_vector.f32x4 v0
|
|
; regex: MOV=movap*
|
|
; check: pushq
|
|
; not: $MOV
|
|
; check: ret
|
|
return v1
|
|
}
|