diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif similarity index 78% rename from cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif rename to cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif index c9d6c4c372..cc2d7f03e1 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif @@ -1,6 +1,4 @@ -test run test binemit -test legalizer set enable_simd target x86_64 skylake @@ -15,14 +13,11 @@ ebb0: v5 = extractlane v2, 3 v6 = icmp_imm eq v5, 5 - ; TODO replace extractlanes with vector comparison v7 = band v4, v6 return v7 } -; run - function %iadd_i8x16_with_overflow() -> b1 { ebb0: [-, %xmm0] v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255] @@ -31,13 +26,10 @@ ebb0: v3 = extractlane v2, 0 v4 = icmp_imm eq v3, 1 - ; TODO replace extractlane with vector comparison return v4 } -; run - function %iadd_i16x8(i16x8, i16x8) -> i16x8 { ebb0(v0: i16x8 [%xmm1], v1: i16x8 [%xmm2]): [-, %xmm1] v2 = iadd v0, v1 ; bin: 66 0f fd ca @@ -61,14 +53,11 @@ ebb0: v5 = extractlane v2, 1 v6 = icmp_imm eq v5, 0xffffffff - ; TODO replace extractlanes with vector comparison v7 = band v4, v6 return v7 } -; run - function %isub_i64x2(i64x2, i64x2) -> i64x2 { ebb0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm1]): [-, %xmm0] v2 = isub v0, v1 ; bin: 66 0f fb c1 @@ -87,40 +76,6 @@ ebb0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm4]): return v2 } -function %ineg_i32x4() -> b1 { -ebb0: - v0 = vconst.i32x4 [1 1 1 1] - v2 = ineg v0 - ; check: v5 = vconst.i32x4 0x00 - ; nextln: v2 = isub v5, v0 - - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, -1 - - return v4 ; bin: c3 -} -; run - -function %ineg_legalized() { -ebb0: - v0 = vconst.i8x16 0x00 - v1 = ineg v0 - ; check: v6 = vconst.i8x16 0x00 - ; nextln: v1 = isub v6, v0 - - v2 = raw_bitcast.i16x8 v0 - v3 = ineg v2 - ; check: v7 = vconst.i16x8 0x00 - ; nextln: v3 = isub v7, v2 - - v4 = raw_bitcast.i64x2 v0 - v5 = ineg v4 - ; check: v8 = vconst.i64x2 0x00 - ; nextln: v5 = isub v8, v4 - - return ; bin: c3 -} - function %imul_i32x4() -> b1 { ebb0: [-, %xmm0] v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01 @@ -140,7 +95,7 @@ ebb0: v10 = band v8, v9 return v10 } -; run + function %imul_i16x8() -> b1 { ebb0: @@ -149,8 +104,8 @@ ebb0: [-, %xmm1] v2 = imul v0, v1 ; bin: 66 0f d5 ca v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, 0xfffe ; TODO -2 will not work here and below because v3 is being - ; uextend-ed, not sextend-ed + v4 = icmp_imm eq v3, 0xfffe ; 0xfffe == -2; -2 will not work here and below because v3 is + ; being uextend-ed, not sextend-ed v5 = extractlane v2, 1 v6 = icmp_imm eq v5, 0 @@ -163,7 +118,7 @@ ebb0: return v4 } -; run + function %sadd_sat_i8x16() -> b1 { ebb0: @@ -176,7 +131,7 @@ ebb0: return v4 } -; run + function %uadd_sat_i16x8() -> b1 { ebb0: @@ -189,16 +144,16 @@ ebb0: return v4 } -; run + function %sub_sat_i8x16() -> b1 { ebb0: -[-, %xmm2] v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 120 == 0x80 == -128 +[-, %xmm2] v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128 [-, %xmm3] v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] [-, %xmm2] v2 = ssub_sat v0, v1 ; bin: 66 0f e8 d3 v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, 0x80 ; still -128, TODO it's unclear why I can't use -128 here + v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128 ; now re-use 0x80 as an unsigned 128 [-, %xmm2] v5 = usub_sat v0, v2 ; bin: 66 0f d8 d2 @@ -208,7 +163,7 @@ ebb0: v8 = band v4, v7 return v8 } -; run + function %sub_sat_i16x8() { ebb0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif new file mode 100644 index 0000000000..6155204899 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif @@ -0,0 +1,36 @@ +test legalizer +set enable_simd +target x86_64 skylake + +function %ineg_i32x4() -> b1 { +ebb0: + v0 = vconst.i32x4 [1 1 1 1] + v2 = ineg v0 + ; check: v5 = vconst.i32x4 0x00 + ; nextln: v2 = isub v5, v0 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, -1 + + return v4 +} + +function %ineg_legalized() { +ebb0: + v0 = vconst.i8x16 0x00 + v1 = ineg v0 + ; check: v6 = vconst.i8x16 0x00 + ; nextln: v1 = isub v6, v0 + + v2 = raw_bitcast.i16x8 v0 + v3 = ineg v2 + ; check: v7 = vconst.i16x8 0x00 + ; nextln: v3 = isub v7, v2 + + v4 = raw_bitcast.i64x2 v0 + v5 = ineg v4 + ; check: v8 = vconst.i64x2 0x00 + ; nextln: v5 = isub v8, v4 + + return +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif new file mode 100644 index 0000000000..22bcf11bdd --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif @@ -0,0 +1,155 @@ +test run +set enable_simd +target x86_64 skylake + +function %iadd_i32x4() -> b1 { +ebb0: + v0 = vconst.i32x4 [1 1 1 1] + v1 = vconst.i32x4 [1 2 3 4] + v2 = iadd v0, v1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 2 + + v5 = extractlane v2, 3 + v6 = icmp_imm eq v5, 5 + ; TODO replace extractlanes with vector comparison + + v7 = band v4, v6 + return v7 +} +; run + +function %iadd_i8x16_with_overflow() -> b1 { +ebb0: + v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255] + v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] + v2 = iadd v0, v1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 1 + ; TODO replace extractlane with vector comparison + + return v4 +} +; run + +function %isub_i32x4() -> b1 { +ebb0: + v0 = vconst.i32x4 [1 1 1 1] + v1 = vconst.i32x4 [1 2 3 4] + v2 = isub v0, v1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0 + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0xffffffff + ; TODO replace extractlanes with vector comparison + + v7 = band v4, v6 + return v7 +} +; run + + +function %ineg_i32x4() -> b1 { +ebb0: + v0 = vconst.i32x4 [1 1 1 1] + v2 = ineg v0 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, -1 + + return v4 +} +; run + +function %imul_i32x4() -> b1 { +ebb0: + v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01 + v1 = vconst.i32x4 [2 2 2 2] + v2 = imul v0, v1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, -2 + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0 + + v7 = extractlane v2, 3 + v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped) + + v9 = band v4, v6 + v10 = band v8, v9 + return v10 +} +; run + +function %imul_i16x8() -> b1 { +ebb0: + v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff + v1 = vconst.i16x8 [2 2 2 2 0 0 0 0] + v2 = imul v0, v1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0xfffe ; 0xfffe == -2; -2 will not work here and below because v3 is + ; being uextend-ed, not sextend-ed + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0 + + v7 = extractlane v2, 3 + v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe + + v9 = band v4, v6 + v10 = band v8, v9 + + return v4 +} +; run + +function %sadd_sat_i8x16() -> b1 { +ebb0: + v0 = vconst.i8x16 [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + + v2 = sadd_sat v0, v1 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 127 + + return v4 +} +; run + +function %uadd_sat_i16x8() -> b1 { +ebb0: + v0 = vconst.i16x8 [-1 0 0 0 0 0 0 0] + v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] + + v2 = uadd_sat v0, v1 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 65535 + + return v4 +} +; run + +function %sub_sat_i8x16() -> b1 { +ebb0: + v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128 + v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + + v2 = ssub_sat v0, v1 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128 + + ; now re-use 0x80 as an unsigned 128 + v5 = usub_sat v0, v2 + v6 = extractlane v5, 0 + v7 = icmp_imm eq v6, 0 + + v8 = band v4, v7 + return v8 +} +; run