This is a cherry-pick of a long-ago commit, 2d46637. The original message reads: > Now that `SyntheticAmode` can refer to constants, there is no longer a > need for a separate instruction format--standard load instructions will > work. Since then, the transition to ISLE and the use of `XmmLoadConst` in many more places makes this change a larger diff than the original. The basic idea is the same, though: the extra indirection of `Inst::XMmLoadConst` is removed and replaced by a direct use of `VCodeConstant` as a `SyntheticAmode`. This has no effect on codegen, but the CLIF output is now clearer in that the actual instruction is displayed (e.g., `movdqu`) instead of a made-up instruction (`load_const`).
178 lines
3.4 KiB
Plaintext
178 lines
3.4 KiB
Plaintext
test compile precise-output
|
|
set enable_simd
|
|
target x86_64 has_sse3 has_ssse3 has_sse41
|
|
|
|
;; shuffle
|
|
|
|
function %shuffle_different_ssa_values() -> i8x16 {
|
|
block0:
|
|
v0 = vconst.i8x16 0x00
|
|
v1 = vconst.i8x16 0x01
|
|
v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ;; pick the second lane of v1, the rest use the first lane of v0
|
|
return v2
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; movdqu const(3), %xmm0
|
|
; movdqu const(2), %xmm5
|
|
; movdqu const(0), %xmm3
|
|
; pshufb %xmm0, %xmm3, %xmm0
|
|
; movdqu const(1), %xmm7
|
|
; pshufb %xmm5, %xmm7, %xmm5
|
|
; por %xmm0, %xmm5, %xmm0
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %shuffle_same_ssa_value() -> i8x16 {
|
|
block0:
|
|
v1 = vconst.i8x16 0x01
|
|
v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ;; pick the fourth lane of v1 and the rest from the first lane of v1
|
|
return v2
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; movdqu const(1), %xmm0
|
|
; movdqu const(0), %xmm2
|
|
; pshufb %xmm0, %xmm2, %xmm0
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %swizzle() -> i8x16 {
|
|
block0:
|
|
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
|
v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
|
v2 = swizzle.i8x16 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; movdqu const(1), %xmm0
|
|
; movdqu const(1), %xmm3
|
|
; movdqu const(0), %xmm4
|
|
; paddusb %xmm3, %xmm4, %xmm3
|
|
; pshufb %xmm0, %xmm3, %xmm0
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %splat_i8(i8) -> i8x16 {
|
|
block0(v0: i8):
|
|
v1 = splat.i8x16 v0
|
|
return v1
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; uninit %xmm0
|
|
; pinsrb $0, %xmm0, %rdi, %xmm0
|
|
; pxor %xmm7, %xmm7, %xmm7
|
|
; pshufb %xmm0, %xmm7, %xmm0
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %splat_b16() -> b16x8 {
|
|
block0:
|
|
v0 = bconst.b16 true
|
|
v1 = splat.b16x8 v0
|
|
return v1
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; movl $65535, %edi
|
|
; uninit %xmm5
|
|
; pinsrw $0, %xmm5, %rdi, %xmm5
|
|
; pinsrw $1, %xmm5, %rdi, %xmm5
|
|
; pshufd $0, %xmm5, %xmm0
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %splat_i32(i32) -> i32x4 {
|
|
block0(v0: i32):
|
|
v1 = splat.i32x4 v0
|
|
return v1
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; uninit %xmm4
|
|
; pinsrd $0, %xmm4, %rdi, %xmm4
|
|
; pshufd $0, %xmm4, %xmm0
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %splat_f64(f64) -> f64x2 {
|
|
block0(v0: f64):
|
|
v1 = splat.f64x2 v0
|
|
return v1
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; movdqa %xmm0, %xmm6
|
|
; uninit %xmm0
|
|
; movdqa %xmm6, %xmm7
|
|
; movsd %xmm0, %xmm7, %xmm0
|
|
; movlhps %xmm0, %xmm7, %xmm0
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %load32_zero_coalesced(i64) -> i32x4 {
|
|
block0(v0: i64):
|
|
v1 = load.i32 v0
|
|
v2 = scalar_to_vector.i32x4 v1
|
|
return v2
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; movss 0(%rdi), %xmm0
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %load32_zero_int(i32) -> i32x4 {
|
|
block0(v0: i32):
|
|
v1 = scalar_to_vector.i32x4 v0
|
|
return v1
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; movd %edi, %xmm0
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %load32_zero_float(f32) -> f32x4 {
|
|
block0(v0: f32):
|
|
v1 = scalar_to_vector.f32x4 v0
|
|
return v1
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|