x64: Refactor vector_all_ones, and remove buggy sse_cmp_op (#4728)

The sse_cmp_op rule had cases that would produce SseOperand values that aren't legal to use with MInst.XmmRmR, and was only used in vector_all_ones when constructing an XmmRmR value. Additionally, vector_all_ones always called sse_cmp_op with the same type, so the other cases were redundant. The solution in this PR is to remove sse_cmp_op entirely and inline a call to x64_pcmpeqd directly in vector_all_ones, and remove the unused argument from vector_all_ones.
2022-08-17 14:30:52 -07:00
parent 1481721c9d
commit 0a71df6a37
2 changed files with 18 additions and 34 deletions
--- a/cranelift/codegen/src/isa/x64/inst.isle
+++ b/cranelift/codegen/src/isa/x64/inst.isle
@@ -1496,35 +1496,19 @@
 (decl sse_xor (Type Xmm XmmMem) Xmm)
 (rule (sse_xor ty x y) (xmm_rm_r ty (sse_xor_op ty) x y))

-;; Determine the appropriate operation to compare two vectors of the specified
-;; type.
-(decl sse_cmp_op (Type) SseOpcode)
-(rule (sse_cmp_op (multi_lane 8 16)) (SseOpcode.Pcmpeqb))
-(rule (sse_cmp_op (multi_lane 16 8)) (SseOpcode.Pcmpeqw))
-(rule (sse_cmp_op (multi_lane 32 4)) (SseOpcode.Pcmpeqd))
-(rule (sse_cmp_op (multi_lane 64 2)) (SseOpcode.Pcmpeqq))
-(rule (sse_cmp_op $F32X4) (SseOpcode.Cmpps))
-(rule (sse_cmp_op $F64X2) (SseOpcode.Cmppd))
-
-;; Generates a register value which has an all-ones pattern of the specified
-;; type.
+;; Generates a register value which has an all-ones pattern.
 ;;
 ;; Note that this is accomplished by comparing a fresh register with itself,
 ;; which for integers is always true. Also note that the comparison is always
-;; done for integers, it doesn't actually take the input `ty` into account. This
-;; is because we're comparing a fresh register to itself and we don't know the
-;; previous contents of the register. If a floating-point comparison is used
-;; then it runs the risk of comparing NaN against NaN and not actually producing
-;; an all-ones mask. By using integer comparision operations we're guaranteeed
-;; that everything is equal to itself.
-(decl vector_all_ones (Type) Xmm)
-(rule (vector_all_ones ty)
-      (let ((r WritableXmm (temp_writable_xmm))
-            (_ Unit (emit (MInst.XmmRmR (sse_cmp_op $I32X4)
-                                        r
-                                        r
-                                        r))))
-        r))
+;; done for integers. This is because we're comparing a fresh register to itself
+;; and we don't know the previous contents of the register. If a floating-point
+;; comparison is used then it runs the risk of comparing NaN against NaN and not
+;; actually producing an all-ones mask. By using integer comparision operations
+;; we're guaranteeed that everything is equal to itself.
+(decl vector_all_ones () Xmm)
+(rule (vector_all_ones)
+      (let ((r WritableXmm (temp_writable_xmm)))
+        (x64_pcmpeqd r r)))

 ;; Helper for creating an SSE register holding an `i64x2` from two `i64` values.
 (decl make_i64x2_from_lanes (GprMem GprMem) Xmm)
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -1197,13 +1197,13 @@
 ;; Special case for `f32x4.abs`.
 (rule (lower (has_type $F32X4 (fabs x)))
      (x64_andps x
-             (x64_psrld (vector_all_ones $F32X4)
+             (x64_psrld (vector_all_ones)
                    (RegMemImm.Imm 1))))

 ;; Special case for `f64x2.abs`.
 (rule (lower (has_type $F64X2 (fabs x)))
      (x64_andpd x
-             (x64_psrlq (vector_all_ones $F64X2)
+             (x64_psrlq (vector_all_ones)
                    (RegMemImm.Imm 1))))

 ;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1232,7 +1232,7 @@
 ;; Special case for vector-types where bit-negation is an xor against an
 ;; all-one value
 (rule (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))
-      (sse_xor ty x (vector_all_ones ty)))
+      (sse_xor ty x (vector_all_ones)))

 ;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -1486,7 +1486,7 @@
 ;; (PCMPEQ*) and then invert the bits (PXOR with all 1s).
 (rule (lower (icmp (IntCC.NotEqual) a @ (value_type (ty_vec128 ty)) b))
      (let ((checked Xmm (x64_pcmpeq ty a b))
-            (all_ones Xmm (vector_all_ones ty)))
+            (all_ones Xmm (vector_all_ones)))
           (x64_pxor checked all_ones)))
 ;; Signed comparisons have a single-instruction lowering, unlike their unsigned
 ;; counterparts. These latter instructions use the unsigned min/max
@@ -1503,7 +1503,7 @@
            (xmm_b Xmm (put_in_xmm b))
            (max Xmm (x64_pmaxu ty xmm_a xmm_b))
            (eq Xmm (x64_pcmpeq ty max xmm_b))
-            (all_ones Xmm (vector_all_ones ty)))
+            (all_ones Xmm (vector_all_ones)))
           (x64_pxor eq all_ones)))
 (rule (lower (icmp (IntCC.UnsignedLessThan) a @ (value_type (ty_vec128 ty)) b))
      ;; N.B.: see note above.
@@ -1511,7 +1511,7 @@
            (xmm_b Xmm (put_in_xmm b))
            (min Xmm (x64_pminu ty xmm_a xmm_b))
            (eq Xmm (x64_pcmpeq ty min xmm_b))
-            (all_ones Xmm (vector_all_ones ty)))
+            (all_ones Xmm (vector_all_ones)))
           (x64_pxor eq all_ones)))
 ;; To lower signed and unsigned *-or-equals comparisons, we find the minimum
 ;; number (PMIN[U|S]*) and compare that to one of the terms (PCMPEQ*). Note that
@@ -1533,11 +1533,11 @@
 ;; 1s), emitting one more instruction than the smaller-lane versions.
 (rule (lower (icmp (IntCC.SignedGreaterThanOrEqual) a @ (value_type $I64X2) b))
      (let ((checked Xmm (x64_pcmpgt $I64X2 b a))
-            (all_ones Xmm (vector_all_ones $I64X2)))
+            (all_ones Xmm (vector_all_ones)))
           (x64_pxor checked all_ones)))
 (rule (lower (icmp (IntCC.SignedLessThanOrEqual) a @ (value_type $I64X2) b))
      (let ((checked Xmm (x64_pcmpgt $I64X2 a b))
-            (all_ones Xmm (vector_all_ones $I64X2)))
+            (all_ones Xmm (vector_all_ones)))
           (x64_pxor checked all_ones)))
 ;; TODO: not used by WebAssembly translation
 ;; (rule (lower (icmp (IntCC.UnsignedGreaterThanOrEqual) a @ (value_type $I64X2) b))