test compile set enable_simd target x86_64 machinst has_ssse3 has_sse41 ;; shuffle function %shuffle_different_ssa_values() -> i8x16 { block0: v0 = vconst.i8x16 0x00 v1 = vconst.i8x16 0x01 v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0 return v2 } ; check: load_const VCodeConstant(3), %xmm1 ; nextln: load_const VCodeConstant(2), %xmm0 ; nextln: load_const VCodeConstant(0), %xmm2 ; nextln: pshufb %xmm2, %xmm1 ; nextln: load_const VCodeConstant(1), %xmm2 ; nextln: pshufb %xmm2, %xmm0 ; nextln: orps %xmm1, %xmm0 function %shuffle_same_ssa_value() -> i8x16 { block0: v1 = vconst.i8x16 0x01 v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1 return v2 } ; check: load_const VCodeConstant(1), %xmm0 ; nextln: load_const VCodeConstant(0), %xmm1 ; nextln: pshufb %xmm1, %xmm0 ;; swizzle function %swizzle() -> i8x16 { block0: v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] v2 = swizzle.i8x16 v0, v1 return v2 } ; check: load_const VCodeConstant(1), %xmm1 ; nextln: load_const VCodeConstant(1), %xmm0 ; nextln: load_const VCodeConstant(0), %xmm2 ; nextln: paddusb %xmm2, %xmm0 ; nextln: pshufb %xmm0, %xmm1 ; nextln: movdqa %xmm1, %xmm0 ;; splat function %splat_i8(i8) -> i8x16 { block0(v0: i8): v1 = splat.i8x16 v0 return v1 } ; check: uninit %xmm0 ; nextln: pinsrb $$0, %rdi, %xmm0 ; nextln: pxor %xmm1, %xmm1 ; nextln: pshufb %xmm1, %xmm0 function %splat_b16() -> b16x8 { block0: v0 = bconst.b16 true v1 = splat.b16x8 v0 return v1 } ; check: uninit %xmm0 ; nextln: pinsrw $$0, %rsi, %xmm0 ; nextln: pinsrw $$1, %rsi, %xmm0 ; nextln: pshufd $$0, %xmm0, %xmm0 function %splat_i32(i32) -> i32x4 { block0(v0: i32): v1 = splat.i32x4 v0 return v1 } ; check: uninit %xmm0 ; nextln: pinsrd $$0, %rdi, %xmm0 ; nextln: pshufd $$0, %xmm0, %xmm0 function %splat_f64(f64) -> f64x2 { block0(v0: f64): v1 = splat.f64x2 v0 return v1 } ; check: uninit %xmm1 ; nextln: movsd %xmm0, %xmm1 ; nextln: movlhps %xmm0, %xmm1 ;; load*_zero ; Verify that a `load` followed by a `scalar_to_vector` (the CLIF translation of `load32_zero`) is ; lowered to a single MOVSS instruction. function %load32_zero_coalesced(i64) -> i32x4 { block0(v0: i64): v1 = load.i32 v0 v2 = scalar_to_vector.i32x4 v1 ; check: movss 0(%rdi), %xmm0 return v2 } ;; Verify that `scalar_to_vector` (used by `load32_zero`), lowers as expected. function %load32_zero_int(i32) -> i32x4 { block0(v0: i32): v1 = scalar_to_vector.i32x4 v0 ; check: movd %edi, %xmm0 return v1 } function %load32_zero_float(f32) -> f32x4 { block0(v0: f32): v1 = scalar_to_vector.f32x4 v0 ; regex: MOV=movap* ; check: pushq ; not: $MOV ; check: ret return v1 }