Only i16x8 and i32x4 are encoded in this commit mainly because i8x16 and i64x2 do not have simple encodings in x86. i64x2 is not required by the SIMD spec and there is discussion (https://github.com/WebAssembly/simd/pull/98#issuecomment-530092217) about removing i8x16.
167 lines
3.7 KiB
Plaintext
167 lines
3.7 KiB
Plaintext
test run
|
|
test binemit
|
|
test legalizer
|
|
set enable_simd
|
|
target x86_64 skylake
|
|
|
|
function %iadd_i32x4() -> b1 {
|
|
ebb0:
|
|
[-, %xmm0] v0 = vconst.i32x4 [1 1 1 1]
|
|
[-, %xmm1] v1 = vconst.i32x4 [1 2 3 4]
|
|
[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fe c1
|
|
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, 2
|
|
|
|
v5 = extractlane v2, 3
|
|
v6 = icmp_imm eq v5, 5
|
|
; TODO replace extractlanes with vector comparison
|
|
|
|
v7 = band v4, v6
|
|
return v7
|
|
}
|
|
|
|
; run
|
|
|
|
function %iadd_i8x16_with_overflow() -> b1 {
|
|
ebb0:
|
|
[-, %xmm0] v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255]
|
|
[-, %xmm7] v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
|
|
[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fc c7
|
|
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, 1
|
|
; TODO replace extractlane with vector comparison
|
|
|
|
return v4
|
|
}
|
|
|
|
; run
|
|
|
|
function %iadd_i16x8(i16x8, i16x8) -> i16x8 {
|
|
ebb0(v0: i16x8 [%xmm1], v1: i16x8 [%xmm2]):
|
|
[-, %xmm1] v2 = iadd v0, v1 ; bin: 66 0f fd ca
|
|
return v2
|
|
}
|
|
|
|
function %iadd_i64x2(i64x2, i64x2) -> i64x2 {
|
|
ebb0(v0: i64x2 [%xmm3], v1: i64x2 [%xmm4]):
|
|
[-, %xmm3] v2 = iadd v0, v1 ; bin: 66 0f d4 dc
|
|
return v2
|
|
}
|
|
|
|
function %isub_i32x4() -> b1 {
|
|
ebb0:
|
|
[-, %xmm3] v0 = vconst.i32x4 [1 1 1 1]
|
|
[-, %xmm5] v1 = vconst.i32x4 [1 2 3 4]
|
|
[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f fa dd
|
|
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, 0
|
|
|
|
v5 = extractlane v2, 1
|
|
v6 = icmp_imm eq v5, 0xffffffff
|
|
; TODO replace extractlanes with vector comparison
|
|
|
|
v7 = band v4, v6
|
|
return v7
|
|
}
|
|
|
|
; run
|
|
|
|
function %isub_i64x2(i64x2, i64x2) -> i64x2 {
|
|
ebb0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm1]):
|
|
[-, %xmm0] v2 = isub v0, v1 ; bin: 66 0f fb c1
|
|
return v2
|
|
}
|
|
|
|
function %isub_i16x8(i16x8, i16x8) -> i16x8 {
|
|
ebb0(v0: i16x8 [%xmm3], v1: i16x8 [%xmm4]):
|
|
[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f f9 dc
|
|
return v2
|
|
}
|
|
|
|
function %isub_i8x16(i8x16, i8x16) -> i8x16 {
|
|
ebb0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm4]):
|
|
[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f f8 dc
|
|
return v2
|
|
}
|
|
|
|
function %ineg_i32x4() -> b1 {
|
|
ebb0:
|
|
v0 = vconst.i32x4 [1 1 1 1]
|
|
v2 = ineg v0
|
|
; check: v5 = vconst.i32x4 0x00
|
|
; nextln: v2 = isub v5, v0
|
|
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, -1
|
|
|
|
return v4 ; bin: c3
|
|
}
|
|
; run
|
|
|
|
function %ineg_legalized() {
|
|
ebb0:
|
|
v0 = vconst.i8x16 0x00
|
|
v1 = ineg v0
|
|
; check: v6 = vconst.i8x16 0x00
|
|
; nextln: v1 = isub v6, v0
|
|
|
|
v2 = raw_bitcast.i16x8 v0
|
|
v3 = ineg v2
|
|
; check: v7 = vconst.i16x8 0x00
|
|
; nextln: v3 = isub v7, v2
|
|
|
|
v4 = raw_bitcast.i64x2 v0
|
|
v5 = ineg v4
|
|
; check: v8 = vconst.i64x2 0x00
|
|
; nextln: v5 = isub v8, v4
|
|
|
|
return ; bin: c3
|
|
}
|
|
|
|
function %imul_i32x4() -> b1 {
|
|
ebb0:
|
|
[-, %xmm0] v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01
|
|
[-, %xmm1] v1 = vconst.i32x4 [2 2 2 2]
|
|
[-, %xmm0] v2 = imul v0, v1 ; bin: 66 0f 38 40 c1
|
|
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, -2
|
|
|
|
v5 = extractlane v2, 1
|
|
v6 = icmp_imm eq v5, 0
|
|
|
|
v7 = extractlane v2, 3
|
|
v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped)
|
|
|
|
v9 = band v4, v6
|
|
v10 = band v8, v9
|
|
return v10
|
|
}
|
|
; run
|
|
|
|
function %imul_i16x8() -> b1 {
|
|
ebb0:
|
|
[-, %xmm1] v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff
|
|
[-, %xmm2] v1 = vconst.i16x8 [2 2 2 2 0 0 0 0]
|
|
[-, %xmm1] v2 = imul v0, v1 ; bin: 66 0f d5 ca
|
|
|
|
v3 = extractlane v2, 0
|
|
v4 = icmp_imm eq v3, 0xfffe ; TODO -2 will not work here and below because v3 is being
|
|
; uextend-ed, not sextend-ed
|
|
|
|
v5 = extractlane v2, 1
|
|
v6 = icmp_imm eq v5, 0
|
|
|
|
v7 = extractlane v2, 3
|
|
v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe
|
|
|
|
v9 = band v4, v6
|
|
v10 = band v8, v9
|
|
|
|
return v4
|
|
}
|
|
; run
|