From 057a0cf44eacf00dfa2a572b68095496604ef35e Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Wed, 18 Mar 2020 09:35:06 -0700 Subject: [PATCH] Organize SIMD arithmetic filetest; add REX-inducing register assignments --- .../isa/x86/simd-arithmetic-binemit.clif | 234 ++++-------------- .../isa/x86/simd-arithmetic-run.clif | 6 +- .../isa/x86/simd-comparison-binemit.clif | 12 +- 3 files changed, 59 insertions(+), 193 deletions(-) diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif index 6e5f7520e3..e5b5e4b28a 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif @@ -2,179 +2,57 @@ test binemit set enable_simd target x86_64 skylake -function %iadd_i32x4() -> b1 { -block0: -[-, %xmm0] v0 = vconst.i32x4 [1 1 1 1] -[-, %xmm1] v1 = vconst.i32x4 [1 2 3 4] -[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fe c1 +function %arithmetic_i8x16(i8x16, i8x16) { +block0(v0: i8x16 [%xmm6], v1: i8x16 [%xmm2]): +[-, %xmm6] v2 = iadd v0, v1 ; bin: 66 0f fc f2 +[-, %xmm6] v3 = isub v0, v1 ; bin: 66 0f f8 f2 +[-, %xmm6] v4 = sadd_sat v0, v1 ; bin: 66 0f ec f2 +[-, %xmm6] v5 = ssub_sat v0, v1 ; bin: 66 0f e8 f2 +[-, %xmm6] v6 = usub_sat v0, v1 ; bin: 66 0f d8 f2 +[-, %xmm6] v7 = avg_round v0, v1 ; bin: 66 0f e0 f2 - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, 2 - - v5 = extractlane v2, 3 - v6 = icmp_imm eq v5, 5 - - v7 = band v4, v6 - return v7 -} - -function %iadd_i8x16_with_overflow() -> b1 { -block0: -[-, %xmm0] v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255] -[-, %xmm7] v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] -[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fc c7 - - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, 1 - - return v4 -} - -function %iadd_i16x8(i16x8, i16x8) -> i16x8 { -block0(v0: i16x8 [%xmm1], v1: i16x8 [%xmm2]): -[-, %xmm1] v2 = iadd v0, v1 ; bin: 66 0f fd ca - return v2 -} - -function %iadd_i64x2(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm3], v1: i64x2 [%xmm4]): -[-, %xmm3] v2 = iadd v0, v1 ; bin: 66 0f d4 dc - return v2 -} - -function %isub_i32x4() -> b1 { -block0: -[-, %xmm3] v0 = vconst.i32x4 [1 1 1 1] -[-, %xmm5] v1 = vconst.i32x4 [1 2 3 4] -[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f fa dd - - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, 0 - - v5 = extractlane v2, 1 - v6 = icmp_imm eq v5, 0xffffffff - - v7 = band v4, v6 - return v7 -} - -function %isub_i64x2(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm1]): -[-, %xmm0] v2 = isub v0, v1 ; bin: 66 0f fb c1 - return v2 -} - -function %isub_i16x8(i16x8, i16x8) -> i16x8 { -block0(v0: i16x8 [%xmm3], v1: i16x8 [%xmm4]): -[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f f9 dc - return v2 -} - -function %isub_i8x16(i8x16, i8x16) -> i8x16 { -block0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm4]): -[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f f8 dc - return v2 -} - -function %imul_i32x4() -> b1 { -block0: -[-, %xmm0] v0 = vconst.i32x4 [-1 0 1 0x80_00_00_01] -[-, %xmm1] v1 = vconst.i32x4 [2 2 2 2] -[-, %xmm0] v2 = imul v0, v1 ; bin: 66 0f 38 40 c1 - - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, -2 - - v5 = extractlane v2, 1 - v6 = icmp_imm eq v5, 0 - - v7 = extractlane v2, 3 - v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped) - - v9 = band v4, v6 - v10 = band v8, v9 - return v10 -} - - -function %imul_i16x8() -> b1 { -block0: -[-, %xmm1] v0 = vconst.i16x8 [-1 0 1 0x7f_ff 0 0 0 0] -[-, %xmm2] v1 = vconst.i16x8 [2 2 2 2 0 0 0 0] -[-, %xmm1] v2 = imul v0, v1 ; bin: 66 0f d5 ca - - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, 0xfffe ; 0xfffe == -2; -2 will not work here and below because v3 is - ; being uextend-ed, not sextend-ed - - v5 = extractlane v2, 1 - v6 = icmp_imm eq v5, 0 - - v7 = extractlane v2, 3 - v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe - - v9 = band v4, v6 - v10 = band v8, v9 - - return v4 -} - - -function %sadd_sat_i8x16() -> b1 { -block0: -[-, %xmm2] v0 = vconst.i8x16 [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] -[-, %xmm3] v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] - -[-, %xmm2] v2 = sadd_sat v0, v1 ; bin: 66 0f ec d3 - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, 127 - - return v4 -} - - -function %uadd_sat_i16x8() -> b1 { -block0: -[-, %xmm2] v0 = vconst.i16x8 [-1 0 0 0 0 0 0 0] -[-, %xmm3] v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] - -[-, %xmm2] v2 = uadd_sat v0, v1 ; bin: 66 0f dd d3 - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, 65535 - - return v4 -} - - -function %sub_sat_i8x16() -> b1 { -block0: -[-, %xmm2] v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128 -[-, %xmm3] v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] - -[-, %xmm2] v2 = ssub_sat v0, v1 ; bin: 66 0f e8 d3 - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128 - - ; now re-use 0x80 as an unsigned 128 -[-, %xmm2] v5 = usub_sat v0, v2 ; bin: 66 0f d8 d2 - v6 = extractlane v5, 0 - v7 = icmp_imm eq v6, 0 - - v8 = band v4, v7 - return v8 -} - - -function %sub_sat_i16x8() { -block0: -[-, %xmm3] v0 = vconst.i16x8 [0 0 0 0 0 0 0 0] -[-, %xmm5] v1 = vconst.i16x8 [1 1 1 1 1 1 1 1] -[-, %xmm3] v2 = ssub_sat v0, v1 ; bin: 66 0f e9 dd -[-, %xmm3] v3 = usub_sat v0, v1 ; bin: 66 0f d9 dd return } -function %float_arithmetic_f32x4(f32x4, f32x4) { +function %arithmetic_i16x8(i16x8, i16x8) { +block0(v0: i16x8 [%xmm3], v1: i16x8 [%xmm5]): +[-, %xmm3] v2 = iadd v0, v1 ; bin: 66 0f fd dd +[-, %xmm3] v3 = isub v0, v1 ; bin: 66 0f f9 dd +[-, %xmm3] v4 = imul v0, v1 ; bin: 66 0f d5 dd +[-, %xmm3] v5 = uadd_sat v0, v1 ; bin: 66 0f dd dd +[-, %xmm3] v6 = ssub_sat v0, v1 ; bin: 66 0f e9 dd +[-, %xmm3] v7 = usub_sat v0, v1 ; bin: 66 0f d9 dd +[-, %xmm3] v8 = avg_round v0, v1 ; bin: 66 0f e3 dd + + return +} + +function %arithmetic_i32x4(i32x4, i32x4) { +block0(v0: i32x4 [%xmm0], v1: i32x4 [%xmm1]): +[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fe c1 +[-, %xmm0] v3 = isub v0, v1 ; bin: 66 0f fa c1 +[-, %xmm0] v4 = imul v0, v1 ; bin: 66 0f 38 40 c1 + + return +} + +function %arithmetic_i64x2(i64x2, i64x2) { +block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm2]): +[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f d4 c2 +[-, %xmm0] v3 = isub v0, v1 ; bin: 66 0f fb c2 + + return +} + +function %arithmetic_i64x2_rex(i64x2, i64x2) { +block0(v0: i64x2 [%xmm8], v1: i64x2 [%xmm10]): +[-, %xmm8] v2 = iadd v0, v1 ; bin: 66 45 0f d4 c2 +[-, %xmm8] v3 = isub v0, v1 ; bin: 66 45 0f fb c2 + + return +} + +function %arithmetic_f32x4(f32x4, f32x4) { block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm5]): [-, %xmm3] v2 = fadd v0, v1 ; bin: 0f 58 dd [-, %xmm3] v3 = fsub v0, v1 ; bin: 0f 5c dd @@ -186,7 +64,7 @@ block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm5]): return } -function %float_arithmetic_f32x4_rex(f32x4, f32x4) { +function %arithmetic_f32x4_rex(f32x4, f32x4) { block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm10]): [-, %xmm3] v2 = fadd v0, v1 ; bin: 41 0f 58 da [-, %xmm3] v3 = fsub v0, v1 ; bin: 41 0f 5c da @@ -198,7 +76,7 @@ block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm10]): return } -function %float_arithmetic_f64x2(f64x2, f64x2) { +function %arithmetic_f64x2(f64x2, f64x2) { block0(v0: f64x2 [%xmm3], v1: f64x2 [%xmm5]): [-, %xmm3] v2 = fadd v0, v1 ; bin: 66 0f 58 dd [-, %xmm3] v3 = fsub v0, v1 ; bin: 66 0f 5c dd @@ -210,7 +88,7 @@ block0(v0: f64x2 [%xmm3], v1: f64x2 [%xmm5]): return } -function %float_arithmetic_f64x2_rex(f64x2, f64x2) { +function %arithmetic_f64x2_rex(f64x2, f64x2) { block0(v0: f64x2 [%xmm11], v1: f64x2 [%xmm13]): [-, %xmm11] v2 = fadd v0, v1 ; bin: 66 45 0f 58 dd [-, %xmm11] v3 = fsub v0, v1 ; bin: 66 45 0f 5c dd @@ -221,15 +99,3 @@ block0(v0: f64x2 [%xmm11], v1: f64x2 [%xmm13]): [-, %xmm11] v8 = sqrt v0 ; bin: 66 45 0f 51 db return } - -function %average_rounding_i8x16(i8x16, i8x16) { -block0(v0: i8x16 [%xmm6], v1: i8x16 [%xmm2]): -[-, %xmm6] v2 = avg_round v0, v1 ; bin: 66 0f e0 f2 - return -} - -function %average_rounding_i16x8(i16x8, i16x8) { -block0(v0: i16x8 [%xmm6], v1: i16x8 [%xmm2]): -[-, %xmm6] v2 = avg_round v0, v1 ; bin: 66 0f e3 f2 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif index 71e1e79000..8f2b46f19e 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif @@ -34,10 +34,10 @@ block0: } ; run -function %isub_i32x4() -> b1 { +function %isub_i32x4_rex() -> b1 { block0: - v0 = vconst.i32x4 [1 1 1 1] - v1 = vconst.i32x4 [1 2 3 4] +[-,%xmm10] v0 = vconst.i32x4 [1 1 1 1] +[-,%xmm15] v1 = vconst.i32x4 [1 2 3 4] v2 = isub v0, v1 v3 = extractlane v2, 0 diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif index 70c1f2f8e5..0df2c73ccf 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif @@ -10,11 +10,11 @@ block0: return } -function %icmp_i16x8() { +function %icmp_i16x8_rex() { block0: [-, %xmm0] v0 = vconst.i16x8 0x00 -[-, %xmm7] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff -[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 75 c7 +[-, %xmm15] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff +[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 41 0f 75 c7 return } @@ -26,11 +26,11 @@ block0: return } -function %icmp_i64x2() { +function %icmp_i64x2_rex() { block0: -[-, %xmm0] v0 = vconst.i64x2 0x00 +[-, %xmm8] v0 = vconst.i64x2 0x00 [-, %xmm1] v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff -[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 38 29 c1 +[-, %xmm8] v2 = icmp eq v0, v1 ; bin: 66 44 0f 38 29 c1 return }