diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index e0ea0a6c7b..d72451d057 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1496,35 +1496,19 @@ (decl sse_xor (Type Xmm XmmMem) Xmm) (rule (sse_xor ty x y) (xmm_rm_r ty (sse_xor_op ty) x y)) -;; Determine the appropriate operation to compare two vectors of the specified -;; type. -(decl sse_cmp_op (Type) SseOpcode) -(rule (sse_cmp_op (multi_lane 8 16)) (SseOpcode.Pcmpeqb)) -(rule (sse_cmp_op (multi_lane 16 8)) (SseOpcode.Pcmpeqw)) -(rule (sse_cmp_op (multi_lane 32 4)) (SseOpcode.Pcmpeqd)) -(rule (sse_cmp_op (multi_lane 64 2)) (SseOpcode.Pcmpeqq)) -(rule (sse_cmp_op $F32X4) (SseOpcode.Cmpps)) -(rule (sse_cmp_op $F64X2) (SseOpcode.Cmppd)) - -;; Generates a register value which has an all-ones pattern of the specified -;; type. +;; Generates a register value which has an all-ones pattern. ;; ;; Note that this is accomplished by comparing a fresh register with itself, ;; which for integers is always true. Also note that the comparison is always -;; done for integers, it doesn't actually take the input `ty` into account. This -;; is because we're comparing a fresh register to itself and we don't know the -;; previous contents of the register. If a floating-point comparison is used -;; then it runs the risk of comparing NaN against NaN and not actually producing -;; an all-ones mask. By using integer comparision operations we're guaranteeed -;; that everything is equal to itself. -(decl vector_all_ones (Type) Xmm) -(rule (vector_all_ones ty) - (let ((r WritableXmm (temp_writable_xmm)) - (_ Unit (emit (MInst.XmmRmR (sse_cmp_op $I32X4) - r - r - r)))) - r)) +;; done for integers. This is because we're comparing a fresh register to itself +;; and we don't know the previous contents of the register. If a floating-point +;; comparison is used then it runs the risk of comparing NaN against NaN and not +;; actually producing an all-ones mask. By using integer comparision operations +;; we're guaranteeed that everything is equal to itself. +(decl vector_all_ones () Xmm) +(rule (vector_all_ones) + (let ((r WritableXmm (temp_writable_xmm))) + (x64_pcmpeqd r r))) ;; Helper for creating an SSE register holding an `i64x2` from two `i64` values. (decl make_i64x2_from_lanes (GprMem GprMem) Xmm) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index b665d66d6c..8a1e39be26 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1197,13 +1197,13 @@ ;; Special case for `f32x4.abs`. (rule (lower (has_type $F32X4 (fabs x))) (x64_andps x - (x64_psrld (vector_all_ones $F32X4) + (x64_psrld (vector_all_ones) (RegMemImm.Imm 1)))) ;; Special case for `f64x2.abs`. (rule (lower (has_type $F64X2 (fabs x))) (x64_andpd x - (x64_psrlq (vector_all_ones $F64X2) + (x64_psrlq (vector_all_ones) (RegMemImm.Imm 1)))) ;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1232,7 +1232,7 @@ ;; Special case for vector-types where bit-negation is an xor against an ;; all-one value (rule (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x))) - (sse_xor ty x (vector_all_ones ty))) + (sse_xor ty x (vector_all_ones))) ;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1486,7 +1486,7 @@ ;; (PCMPEQ*) and then invert the bits (PXOR with all 1s). (rule (lower (icmp (IntCC.NotEqual) a @ (value_type (ty_vec128 ty)) b)) (let ((checked Xmm (x64_pcmpeq ty a b)) - (all_ones Xmm (vector_all_ones ty))) + (all_ones Xmm (vector_all_ones))) (x64_pxor checked all_ones))) ;; Signed comparisons have a single-instruction lowering, unlike their unsigned ;; counterparts. These latter instructions use the unsigned min/max @@ -1503,7 +1503,7 @@ (xmm_b Xmm (put_in_xmm b)) (max Xmm (x64_pmaxu ty xmm_a xmm_b)) (eq Xmm (x64_pcmpeq ty max xmm_b)) - (all_ones Xmm (vector_all_ones ty))) + (all_ones Xmm (vector_all_ones))) (x64_pxor eq all_ones))) (rule (lower (icmp (IntCC.UnsignedLessThan) a @ (value_type (ty_vec128 ty)) b)) ;; N.B.: see note above. @@ -1511,7 +1511,7 @@ (xmm_b Xmm (put_in_xmm b)) (min Xmm (x64_pminu ty xmm_a xmm_b)) (eq Xmm (x64_pcmpeq ty min xmm_b)) - (all_ones Xmm (vector_all_ones ty))) + (all_ones Xmm (vector_all_ones))) (x64_pxor eq all_ones))) ;; To lower signed and unsigned *-or-equals comparisons, we find the minimum ;; number (PMIN[U|S]*) and compare that to one of the terms (PCMPEQ*). Note that @@ -1533,11 +1533,11 @@ ;; 1s), emitting one more instruction than the smaller-lane versions. (rule (lower (icmp (IntCC.SignedGreaterThanOrEqual) a @ (value_type $I64X2) b)) (let ((checked Xmm (x64_pcmpgt $I64X2 b a)) - (all_ones Xmm (vector_all_ones $I64X2))) + (all_ones Xmm (vector_all_ones))) (x64_pxor checked all_ones))) (rule (lower (icmp (IntCC.SignedLessThanOrEqual) a @ (value_type $I64X2) b)) (let ((checked Xmm (x64_pcmpgt $I64X2 a b)) - (all_ones Xmm (vector_all_ones $I64X2))) + (all_ones Xmm (vector_all_ones))) (x64_pxor checked all_ones))) ;; TODO: not used by WebAssembly translation ;; (rule (lower (icmp (IntCC.UnsignedGreaterThanOrEqual) a @ (value_type $I64X2) b))