x64: Refactor vector_all_ones, and remove buggy sse_cmp_op (#4728)
The sse_cmp_op rule had cases that would produce SseOperand values that aren't legal to use with MInst.XmmRmR, and was only used in vector_all_ones when constructing an XmmRmR value. Additionally, vector_all_ones always called sse_cmp_op with the same type, so the other cases were redundant. The solution in this PR is to remove sse_cmp_op entirely and inline a call to x64_pcmpeqd directly in vector_all_ones, and remove the unused argument from vector_all_ones.
This commit is contained in:
@@ -1496,35 +1496,19 @@
|
||||
(decl sse_xor (Type Xmm XmmMem) Xmm)
|
||||
(rule (sse_xor ty x y) (xmm_rm_r ty (sse_xor_op ty) x y))
|
||||
|
||||
;; Determine the appropriate operation to compare two vectors of the specified
|
||||
;; type.
|
||||
(decl sse_cmp_op (Type) SseOpcode)
|
||||
(rule (sse_cmp_op (multi_lane 8 16)) (SseOpcode.Pcmpeqb))
|
||||
(rule (sse_cmp_op (multi_lane 16 8)) (SseOpcode.Pcmpeqw))
|
||||
(rule (sse_cmp_op (multi_lane 32 4)) (SseOpcode.Pcmpeqd))
|
||||
(rule (sse_cmp_op (multi_lane 64 2)) (SseOpcode.Pcmpeqq))
|
||||
(rule (sse_cmp_op $F32X4) (SseOpcode.Cmpps))
|
||||
(rule (sse_cmp_op $F64X2) (SseOpcode.Cmppd))
|
||||
|
||||
;; Generates a register value which has an all-ones pattern of the specified
|
||||
;; type.
|
||||
;; Generates a register value which has an all-ones pattern.
|
||||
;;
|
||||
;; Note that this is accomplished by comparing a fresh register with itself,
|
||||
;; which for integers is always true. Also note that the comparison is always
|
||||
;; done for integers, it doesn't actually take the input `ty` into account. This
|
||||
;; is because we're comparing a fresh register to itself and we don't know the
|
||||
;; previous contents of the register. If a floating-point comparison is used
|
||||
;; then it runs the risk of comparing NaN against NaN and not actually producing
|
||||
;; an all-ones mask. By using integer comparision operations we're guaranteeed
|
||||
;; that everything is equal to itself.
|
||||
(decl vector_all_ones (Type) Xmm)
|
||||
(rule (vector_all_ones ty)
|
||||
(let ((r WritableXmm (temp_writable_xmm))
|
||||
(_ Unit (emit (MInst.XmmRmR (sse_cmp_op $I32X4)
|
||||
r
|
||||
r
|
||||
r))))
|
||||
r))
|
||||
;; done for integers. This is because we're comparing a fresh register to itself
|
||||
;; and we don't know the previous contents of the register. If a floating-point
|
||||
;; comparison is used then it runs the risk of comparing NaN against NaN and not
|
||||
;; actually producing an all-ones mask. By using integer comparision operations
|
||||
;; we're guaranteeed that everything is equal to itself.
|
||||
(decl vector_all_ones () Xmm)
|
||||
(rule (vector_all_ones)
|
||||
(let ((r WritableXmm (temp_writable_xmm)))
|
||||
(x64_pcmpeqd r r)))
|
||||
|
||||
;; Helper for creating an SSE register holding an `i64x2` from two `i64` values.
|
||||
(decl make_i64x2_from_lanes (GprMem GprMem) Xmm)
|
||||
|
||||
Reference in New Issue
Block a user