diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif
similarity index 78%
rename from cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif
rename to cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif
index c9d6c4c372..cc2d7f03e1 100644
--- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif
@@ -1,6 +1,4 @@
-test run
 test binemit
-test legalizer
 set enable_simd
 target x86_64 skylake
 
@@ -15,14 +13,11 @@ ebb0:
 
     v5 = extractlane v2, 3
     v6 = icmp_imm eq v5, 5
-    ; TODO replace extractlanes with vector comparison
 
     v7 = band v4, v6
     return v7
 }
 
-; run
-
 function %iadd_i8x16_with_overflow() -> b1 {
 ebb0:
 [-, %xmm0]    v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255]
@@ -31,13 +26,10 @@ ebb0:
 
     v3 = extractlane v2, 0
     v4 = icmp_imm eq v3, 1
-    ; TODO replace extractlane with vector comparison
 
     return v4
 }
 
-; run
-
 function %iadd_i16x8(i16x8, i16x8) -> i16x8 {
 ebb0(v0: i16x8 [%xmm1], v1: i16x8 [%xmm2]):
 [-, %xmm1]  v2 = iadd v0, v1      ; bin: 66 0f fd ca
@@ -61,14 +53,11 @@ ebb0:
 
     v5 = extractlane v2, 1
     v6 = icmp_imm eq v5, 0xffffffff
-    ; TODO replace extractlanes with vector comparison
 
     v7 = band v4, v6
     return v7
 }
 
-; run
-
 function %isub_i64x2(i64x2, i64x2) -> i64x2 {
 ebb0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm1]):
 [-, %xmm0]  v2 = isub v0, v1      ; bin: 66 0f fb c1
@@ -87,40 +76,6 @@ ebb0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm4]):
     return v2
 }
 
-function %ineg_i32x4() -> b1 {
-ebb0:
-    v0 = vconst.i32x4 [1 1 1 1]
-    v2 = ineg v0
-    ; check: v5 = vconst.i32x4 0x00
-    ; nextln: v2 = isub v5, v0
-
-    v3 = extractlane v2, 0
-    v4 = icmp_imm eq v3, -1
-
-    return v4 ; bin: c3
-}
-; run
-
-function %ineg_legalized() {
-ebb0:
-    v0 = vconst.i8x16 0x00
-    v1 = ineg v0
-    ; check: v6 = vconst.i8x16 0x00
-    ; nextln: v1 = isub v6, v0
-
-    v2 = raw_bitcast.i16x8 v0
-    v3 = ineg v2
-    ; check: v7 = vconst.i16x8 0x00
-    ; nextln: v3 = isub v7, v2
-
-    v4 = raw_bitcast.i64x2 v0
-    v5 = ineg v4
-    ; check: v8 = vconst.i64x2 0x00
-    ; nextln: v5 = isub v8, v4
-
-    return ; bin: c3
-}
-
 function %imul_i32x4() -> b1 {
 ebb0:
 [-, %xmm0]    v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01
@@ -140,7 +95,7 @@ ebb0:
     v10 = band v8, v9
     return v10
 }
-; run
+
 
 function %imul_i16x8() -> b1 {
 ebb0:
@@ -149,8 +104,8 @@ ebb0:
 [-, %xmm1]    v2 = imul v0, v1 ; bin: 66 0f d5 ca
 
     v3 = extractlane v2, 0
-    v4 = icmp_imm eq v3, 0xfffe ; TODO -2 will not work here and below because v3 is being
-    ; uextend-ed, not sextend-ed
+    v4 = icmp_imm eq v3, 0xfffe ; 0xfffe == -2; -2 will not work here and below because v3 is
+    ; being uextend-ed, not sextend-ed
 
     v5 = extractlane v2, 1
     v6 = icmp_imm eq v5, 0
@@ -163,7 +118,7 @@ ebb0:
 
     return v4
 }
-; run
+
 
 function %sadd_sat_i8x16() -> b1 {
 ebb0:
@@ -176,7 +131,7 @@ ebb0:
 
     return v4
 }
-; run
+
 
 function %uadd_sat_i16x8() -> b1 {
 ebb0:
@@ -189,16 +144,16 @@ ebb0:
 
     return v4
 }
-; run
+
 
 function %sub_sat_i8x16() -> b1 {
 ebb0:
-[-, %xmm2]    v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 120 == 0x80 == -128
+[-, %xmm2]    v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128
 [-, %xmm3]    v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
 
 [-, %xmm2]    v2 = ssub_sat v0, v1 ; bin: 66 0f e8 d3
     v3 = extractlane v2, 0
-    v4 = icmp_imm eq v3, 0x80 ; still -128, TODO it's unclear why I can't use -128 here
+    v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128
 
     ; now re-use 0x80 as an unsigned 128
 [-, %xmm2]    v5 = usub_sat v0, v2 ; bin: 66 0f d8 d2
@@ -208,7 +163,7 @@ ebb0:
     v8 = band v4, v7
     return v8
 }
-; run
+
 
 function %sub_sat_i16x8() {
 ebb0:
diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif
new file mode 100644
index 0000000000..6155204899
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif
@@ -0,0 +1,36 @@
+test legalizer
+set enable_simd
+target x86_64 skylake
+
+function %ineg_i32x4() -> b1 {
+ebb0:
+    v0 = vconst.i32x4 [1 1 1 1]
+    v2 = ineg v0
+    ; check: v5 = vconst.i32x4 0x00
+    ; nextln: v2 = isub v5, v0
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, -1
+
+    return v4
+}
+
+function %ineg_legalized() {
+ebb0:
+    v0 = vconst.i8x16 0x00
+    v1 = ineg v0
+    ; check: v6 = vconst.i8x16 0x00
+    ; nextln: v1 = isub v6, v0
+
+    v2 = raw_bitcast.i16x8 v0
+    v3 = ineg v2
+    ; check: v7 = vconst.i16x8 0x00
+    ; nextln: v3 = isub v7, v2
+
+    v4 = raw_bitcast.i64x2 v0
+    v5 = ineg v4
+    ; check: v8 = vconst.i64x2 0x00
+    ; nextln: v5 = isub v8, v4
+
+    return
+}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif
new file mode 100644
index 0000000000..22bcf11bdd
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif
@@ -0,0 +1,155 @@
+test run
+set enable_simd
+target x86_64 skylake
+
+function %iadd_i32x4() -> b1 {
+ebb0:
+    v0 = vconst.i32x4 [1 1 1 1]
+    v1 = vconst.i32x4 [1 2 3 4]
+    v2 = iadd v0, v1
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 2
+
+    v5 = extractlane v2, 3
+    v6 = icmp_imm eq v5, 5
+    ; TODO replace extractlanes with vector comparison
+
+    v7 = band v4, v6
+    return v7
+}
+; run
+
+function %iadd_i8x16_with_overflow() -> b1 {
+ebb0:
+    v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255]
+    v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
+    v2 = iadd v0, v1
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 1
+    ; TODO replace extractlane with vector comparison
+
+    return v4
+}
+; run
+
+function %isub_i32x4() -> b1 {
+ebb0:
+    v0 = vconst.i32x4 [1 1 1 1]
+    v1 = vconst.i32x4 [1 2 3 4]
+    v2 = isub v0, v1
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 0
+
+    v5 = extractlane v2, 1
+    v6 = icmp_imm eq v5, 0xffffffff
+    ; TODO replace extractlanes with vector comparison
+
+    v7 = band v4, v6
+    return v7
+}
+; run
+
+
+function %ineg_i32x4() -> b1 {
+ebb0:
+    v0 = vconst.i32x4 [1 1 1 1]
+    v2 = ineg v0
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, -1
+
+    return v4
+}
+; run
+
+function %imul_i32x4() -> b1 {
+ebb0:
+    v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01
+    v1 = vconst.i32x4 [2 2 2 2]
+    v2 = imul v0, v1
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, -2
+
+    v5 = extractlane v2, 1
+    v6 = icmp_imm eq v5, 0
+
+    v7 = extractlane v2, 3
+    v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped)
+
+    v9 = band v4, v6
+    v10 = band v8, v9
+    return v10
+}
+; run
+
+function %imul_i16x8() -> b1 {
+ebb0:
+    v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff
+    v1 = vconst.i16x8 [2 2 2 2 0 0 0 0]
+    v2 = imul v0, v1
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 0xfffe ; 0xfffe == -2; -2 will not work here and below because v3 is
+    ; being uextend-ed, not sextend-ed
+
+    v5 = extractlane v2, 1
+    v6 = icmp_imm eq v5, 0
+
+    v7 = extractlane v2, 3
+    v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe
+
+    v9 = band v4, v6
+    v10 = band v8, v9
+
+    return v4
+}
+; run
+
+function %sadd_sat_i8x16() -> b1 {
+ebb0:
+    v0 = vconst.i8x16 [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+    v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
+
+    v2 = sadd_sat v0, v1
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 127
+
+    return v4
+}
+; run
+
+function %uadd_sat_i16x8() -> b1 {
+ebb0:
+    v0 = vconst.i16x8 [-1 0 0 0 0 0 0 0]
+    v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1]
+
+    v2 = uadd_sat v0, v1
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 65535
+
+    return v4
+}
+; run
+
+function %sub_sat_i8x16() -> b1 {
+ebb0:
+    v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128
+    v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
+
+    v2 = ssub_sat v0, v1
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128
+
+    ; now re-use 0x80 as an unsigned 128
+    v5 = usub_sat v0, v2
+    v6 = extractlane v5, 0
+    v7 = icmp_imm eq v6, 0
+
+    v8 = band v4, v7
+    return v8
+}
+; run