cranelift: Support bnot, band, bor, bxor for x86_64. (#5036)
* Support `bnot`, `band`, `bor`, `bxor` for x86_64.
* Fix-up to handle `B{8,16,32,64}` type on bitops
* Fix-up conflict.
This commit is contained in:
@@ -1531,6 +1531,8 @@
|
||||
(decl sse_xor_op (Type) SseOpcode)
|
||||
(rule 1 (sse_xor_op $F32X4) (SseOpcode.Xorps))
|
||||
(rule 1 (sse_xor_op $F64X2) (SseOpcode.Xorpd))
|
||||
(rule 1 (sse_xor_op $F32) (SseOpcode.Xorps))
|
||||
(rule 1 (sse_xor_op $F64) (SseOpcode.Xorpd))
|
||||
|
||||
;; Priority 0 because multi_lane overlaps with the previous two explicit type
|
||||
;; patterns.
|
||||
|
||||
@@ -242,46 +242,54 @@
|
||||
;; `{i,b}64` and smaller.
|
||||
|
||||
;; And two registers.
|
||||
(rule 0 (lower (has_type (fits_in_64 ty) (band x y)))
|
||||
(rule 0 (lower (has_type ty (band x y)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_and ty x y))
|
||||
|
||||
;; And with a memory operand.
|
||||
|
||||
(rule 1 (lower (has_type (fits_in_64 ty)
|
||||
(band x (sinkable_load y))))
|
||||
(rule 1 (lower (has_type ty (band x (sinkable_load y))))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_and ty x
|
||||
(sink_load_to_gpr_mem_imm y)))
|
||||
|
||||
(rule 2 (lower (has_type (fits_in_64 ty)
|
||||
(band (sinkable_load x) y)))
|
||||
(rule 2 (lower (has_type ty (band (sinkable_load x) y)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_and ty
|
||||
y
|
||||
(sink_load_to_gpr_mem_imm x)))
|
||||
|
||||
;; And with an immediate.
|
||||
|
||||
(rule 3 (lower (has_type (fits_in_64 ty)
|
||||
(band x (simm32_from_value y))))
|
||||
(rule 3 (lower (has_type ty (band x (simm32_from_value y))))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_and ty x y))
|
||||
|
||||
(rule 4 (lower (has_type (fits_in_64 ty)
|
||||
(band (simm32_from_value x) y)))
|
||||
(rule 4 (lower (has_type ty (band (simm32_from_value x) y)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_and ty y x))
|
||||
|
||||
;; f32 and f64
|
||||
|
||||
(rule 5 (lower (has_type (ty_scalar_float ty) (band x y)))
|
||||
(sse_and ty x y))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(decl sse_and (Type Xmm XmmMem) Xmm)
|
||||
(rule (sse_and $F32X4 x y) (x64_andps x y))
|
||||
(rule (sse_and $F64X2 x y) (x64_andpd x y))
|
||||
(rule (sse_and $F32 x y) (x64_andps x y))
|
||||
(rule (sse_and $F64 x y) (x64_andpd x y))
|
||||
(rule -1 (sse_and (multi_lane _bits _lanes) x y) (x64_pand x y))
|
||||
|
||||
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes)
|
||||
(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes)
|
||||
(band x y)))
|
||||
(sse_and ty x y))
|
||||
|
||||
;; `i128`.
|
||||
|
||||
(rule 6 (lower (has_type $I128 (band x y)))
|
||||
(rule 7 (lower (has_type $I128 (band x y)))
|
||||
(let ((x_regs ValueRegs x)
|
||||
(x_lo Gpr (value_regs_get_gpr x_regs 0))
|
||||
(x_hi Gpr (value_regs_get_gpr x_regs 1))
|
||||
@@ -296,39 +304,47 @@
|
||||
;; `{i,b}64` and smaller.
|
||||
|
||||
;; Or two registers.
|
||||
(rule 0 (lower (has_type (fits_in_64 ty) (bor x y)))
|
||||
(rule 0 (lower (has_type ty (bor x y)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_or ty x y))
|
||||
|
||||
;; Or with a memory operand.
|
||||
|
||||
(rule 1 (lower (has_type (fits_in_64 ty)
|
||||
(bor x (sinkable_load y))))
|
||||
(rule 1 (lower (has_type ty (bor x (sinkable_load y))))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_or ty x
|
||||
(sink_load_to_gpr_mem_imm y)))
|
||||
|
||||
(rule 2 (lower (has_type (fits_in_64 ty)
|
||||
(bor (sinkable_load x) y)))
|
||||
(rule 2 (lower (has_type ty (bor (sinkable_load x) y)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_or ty y
|
||||
(sink_load_to_gpr_mem_imm x)))
|
||||
|
||||
;; Or with an immediate.
|
||||
|
||||
(rule 3 (lower (has_type (fits_in_64 ty)
|
||||
(bor x (simm32_from_value y))))
|
||||
(rule 3 (lower (has_type ty (bor x (simm32_from_value y))))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_or ty x y))
|
||||
|
||||
(rule 4 (lower (has_type (fits_in_64 ty)
|
||||
(bor (simm32_from_value x) y)))
|
||||
(rule 4 (lower (has_type ty (bor (simm32_from_value x) y)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_or ty y x))
|
||||
|
||||
;; f32 and f64
|
||||
|
||||
(rule 5 (lower (has_type (ty_scalar_float ty) (bor x y)))
|
||||
(sse_or ty x y))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(decl sse_or (Type Xmm XmmMem) Xmm)
|
||||
(rule (sse_or $F32X4 x y) (x64_orps x y))
|
||||
(rule (sse_or $F64X2 x y) (x64_orpd x y))
|
||||
(rule (sse_or $F32 x y) (x64_orps x y))
|
||||
(rule (sse_or $F64 x y) (x64_orpd x y))
|
||||
(rule -1 (sse_or (multi_lane _bits _lanes) x y) (x64_por x y))
|
||||
|
||||
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes)
|
||||
(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes)
|
||||
(bor x y)))
|
||||
(sse_or ty x y))
|
||||
|
||||
@@ -343,7 +359,7 @@
|
||||
(value_gprs (x64_or $I64 x_lo y_lo)
|
||||
(x64_or $I64 x_hi y_hi))))
|
||||
|
||||
(rule 6 (lower (has_type $I128 (bor x y)))
|
||||
(rule 7 (lower (has_type $I128 (bor x y)))
|
||||
(or_i128 x y))
|
||||
|
||||
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@@ -351,39 +367,45 @@
|
||||
;; `{i,b}64` and smaller.
|
||||
|
||||
;; Xor two registers.
|
||||
(rule 0 (lower (has_type (fits_in_64 ty) (bxor x y)))
|
||||
(rule 0 (lower (has_type ty (bxor x y)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_xor ty x y))
|
||||
|
||||
;; Xor with a memory operand.
|
||||
|
||||
(rule 1 (lower (has_type (fits_in_64 ty)
|
||||
(bxor x (sinkable_load y))))
|
||||
(rule 1 (lower (has_type ty (bxor x (sinkable_load y))))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_xor ty x
|
||||
(sink_load_to_gpr_mem_imm y)))
|
||||
|
||||
(rule 2 (lower (has_type (fits_in_64 ty)
|
||||
(bxor (sinkable_load x) y)))
|
||||
(rule 2 (lower (has_type ty (bxor (sinkable_load x) y)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_xor ty y
|
||||
(sink_load_to_gpr_mem_imm x)))
|
||||
|
||||
;; Xor with an immediate.
|
||||
|
||||
(rule 3 (lower (has_type (fits_in_64 ty)
|
||||
(bxor x (simm32_from_value y))))
|
||||
(rule 3 (lower (has_type ty (bxor x (simm32_from_value y))))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_xor ty x y))
|
||||
|
||||
(rule 4 (lower (has_type (fits_in_64 ty)
|
||||
(bxor (simm32_from_value x) y)))
|
||||
(rule 4 (lower (has_type ty (bxor (simm32_from_value x) y)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_xor ty y x))
|
||||
|
||||
;; f32 and f64
|
||||
|
||||
(rule 5 (lower (has_type (ty_scalar_float ty) (bxor x y)))
|
||||
(sse_xor ty x y))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y)))
|
||||
(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y)))
|
||||
(sse_xor ty x y))
|
||||
|
||||
;; `{i,b}128`.
|
||||
|
||||
(rule 6 (lower (has_type $I128 (bxor x y)))
|
||||
(rule 7 (lower (has_type $I128 (bxor x y)))
|
||||
(let ((x_regs ValueRegs x)
|
||||
(x_lo Gpr (value_regs_get_gpr x_regs 0))
|
||||
(x_hi Gpr (value_regs_get_gpr x_regs 1))
|
||||
@@ -1220,9 +1242,11 @@
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
(rule -2 (lower (has_type (fits_in_64 ty) (bnot x)))
|
||||
(rule -2 (lower (has_type ty (bnot x)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_not ty x))
|
||||
|
||||
|
||||
;; `i128`.
|
||||
|
||||
(decl i128_not (Value) ValueRegs)
|
||||
@@ -1236,6 +1260,11 @@
|
||||
(rule (lower (has_type $I128 (bnot x)))
|
||||
(i128_not x))
|
||||
|
||||
;; f32 and f64
|
||||
|
||||
(rule -3 (lower (has_type (ty_scalar_float ty) (bnot x)))
|
||||
(sse_xor ty x (vector_all_ones)))
|
||||
|
||||
;; Special case for vector-types where bit-negation is an xor against an
|
||||
;; all-one value
|
||||
(rule -1 (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))
|
||||
|
||||
Reference in New Issue
Block a user