cranelift: Support bnot, band, bor, bxor for x86_64. (#5036)

* Support `bnot`, `band`, `bor`, `bxor` for x86_64.

* Fix-up to handle `B{8,16,32,64}` type on bitops

* Fix-up conflict.
This commit is contained in:
Jun Ryung Ju
2022-11-19 00:45:54 +09:00
committed by GitHub
parent 7e4077805b
commit e5f93d9ec0
3 changed files with 67 additions and 34 deletions

View File

@@ -1531,6 +1531,8 @@
(decl sse_xor_op (Type) SseOpcode)
(rule 1 (sse_xor_op $F32X4) (SseOpcode.Xorps))
(rule 1 (sse_xor_op $F64X2) (SseOpcode.Xorpd))
(rule 1 (sse_xor_op $F32) (SseOpcode.Xorps))
(rule 1 (sse_xor_op $F64) (SseOpcode.Xorpd))
;; Priority 0 because multi_lane overlaps with the previous two explicit type
;; patterns.

View File

@@ -242,46 +242,54 @@
;; `{i,b}64` and smaller.
;; And two registers.
(rule 0 (lower (has_type (fits_in_64 ty) (band x y)))
(rule 0 (lower (has_type ty (band x y)))
(if (ty_int_ref_scalar_64 ty))
(x64_and ty x y))
;; And with a memory operand.
(rule 1 (lower (has_type (fits_in_64 ty)
(band x (sinkable_load y))))
(rule 1 (lower (has_type ty (band x (sinkable_load y))))
(if (ty_int_ref_scalar_64 ty))
(x64_and ty x
(sink_load_to_gpr_mem_imm y)))
(rule 2 (lower (has_type (fits_in_64 ty)
(band (sinkable_load x) y)))
(rule 2 (lower (has_type ty (band (sinkable_load x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_and ty
y
(sink_load_to_gpr_mem_imm x)))
;; And with an immediate.
(rule 3 (lower (has_type (fits_in_64 ty)
(band x (simm32_from_value y))))
(rule 3 (lower (has_type ty (band x (simm32_from_value y))))
(if (ty_int_ref_scalar_64 ty))
(x64_and ty x y))
(rule 4 (lower (has_type (fits_in_64 ty)
(band (simm32_from_value x) y)))
(rule 4 (lower (has_type ty (band (simm32_from_value x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_and ty y x))
;; f32 and f64
(rule 5 (lower (has_type (ty_scalar_float ty) (band x y)))
(sse_and ty x y))
;; SSE.
(decl sse_and (Type Xmm XmmMem) Xmm)
(rule (sse_and $F32X4 x y) (x64_andps x y))
(rule (sse_and $F64X2 x y) (x64_andpd x y))
(rule (sse_and $F32 x y) (x64_andps x y))
(rule (sse_and $F64 x y) (x64_andpd x y))
(rule -1 (sse_and (multi_lane _bits _lanes) x y) (x64_pand x y))
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes)
(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes)
(band x y)))
(sse_and ty x y))
;; `i128`.
(rule 6 (lower (has_type $I128 (band x y)))
(rule 7 (lower (has_type $I128 (band x y)))
(let ((x_regs ValueRegs x)
(x_lo Gpr (value_regs_get_gpr x_regs 0))
(x_hi Gpr (value_regs_get_gpr x_regs 1))
@@ -296,39 +304,47 @@
;; `{i,b}64` and smaller.
;; Or two registers.
(rule 0 (lower (has_type (fits_in_64 ty) (bor x y)))
(rule 0 (lower (has_type ty (bor x y)))
(if (ty_int_ref_scalar_64 ty))
(x64_or ty x y))
;; Or with a memory operand.
(rule 1 (lower (has_type (fits_in_64 ty)
(bor x (sinkable_load y))))
(rule 1 (lower (has_type ty (bor x (sinkable_load y))))
(if (ty_int_ref_scalar_64 ty))
(x64_or ty x
(sink_load_to_gpr_mem_imm y)))
(rule 2 (lower (has_type (fits_in_64 ty)
(bor (sinkable_load x) y)))
(rule 2 (lower (has_type ty (bor (sinkable_load x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_or ty y
(sink_load_to_gpr_mem_imm x)))
;; Or with an immediate.
(rule 3 (lower (has_type (fits_in_64 ty)
(bor x (simm32_from_value y))))
(rule 3 (lower (has_type ty (bor x (simm32_from_value y))))
(if (ty_int_ref_scalar_64 ty))
(x64_or ty x y))
(rule 4 (lower (has_type (fits_in_64 ty)
(bor (simm32_from_value x) y)))
(rule 4 (lower (has_type ty (bor (simm32_from_value x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_or ty y x))
;; f32 and f64
(rule 5 (lower (has_type (ty_scalar_float ty) (bor x y)))
(sse_or ty x y))
;; SSE.
(decl sse_or (Type Xmm XmmMem) Xmm)
(rule (sse_or $F32X4 x y) (x64_orps x y))
(rule (sse_or $F64X2 x y) (x64_orpd x y))
(rule (sse_or $F32 x y) (x64_orps x y))
(rule (sse_or $F64 x y) (x64_orpd x y))
(rule -1 (sse_or (multi_lane _bits _lanes) x y) (x64_por x y))
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes)
(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes)
(bor x y)))
(sse_or ty x y))
@@ -343,7 +359,7 @@
(value_gprs (x64_or $I64 x_lo y_lo)
(x64_or $I64 x_hi y_hi))))
(rule 6 (lower (has_type $I128 (bor x y)))
(rule 7 (lower (has_type $I128 (bor x y)))
(or_i128 x y))
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -351,39 +367,45 @@
;; `{i,b}64` and smaller.
;; Xor two registers.
(rule 0 (lower (has_type (fits_in_64 ty) (bxor x y)))
(rule 0 (lower (has_type ty (bxor x y)))
(if (ty_int_ref_scalar_64 ty))
(x64_xor ty x y))
;; Xor with a memory operand.
(rule 1 (lower (has_type (fits_in_64 ty)
(bxor x (sinkable_load y))))
(rule 1 (lower (has_type ty (bxor x (sinkable_load y))))
(if (ty_int_ref_scalar_64 ty))
(x64_xor ty x
(sink_load_to_gpr_mem_imm y)))
(rule 2 (lower (has_type (fits_in_64 ty)
(bxor (sinkable_load x) y)))
(rule 2 (lower (has_type ty (bxor (sinkable_load x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_xor ty y
(sink_load_to_gpr_mem_imm x)))
;; Xor with an immediate.
(rule 3 (lower (has_type (fits_in_64 ty)
(bxor x (simm32_from_value y))))
(rule 3 (lower (has_type ty (bxor x (simm32_from_value y))))
(if (ty_int_ref_scalar_64 ty))
(x64_xor ty x y))
(rule 4 (lower (has_type (fits_in_64 ty)
(bxor (simm32_from_value x) y)))
(rule 4 (lower (has_type ty (bxor (simm32_from_value x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_xor ty y x))
;; f32 and f64
(rule 5 (lower (has_type (ty_scalar_float ty) (bxor x y)))
(sse_xor ty x y))
;; SSE.
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y)))
(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y)))
(sse_xor ty x y))
;; `{i,b}128`.
(rule 6 (lower (has_type $I128 (bxor x y)))
(rule 7 (lower (has_type $I128 (bxor x y)))
(let ((x_regs ValueRegs x)
(x_lo Gpr (value_regs_get_gpr x_regs 0))
(x_hi Gpr (value_regs_get_gpr x_regs 1))
@@ -1220,9 +1242,11 @@
;; `i64` and smaller.
(rule -2 (lower (has_type (fits_in_64 ty) (bnot x)))
(rule -2 (lower (has_type ty (bnot x)))
(if (ty_int_ref_scalar_64 ty))
(x64_not ty x))
;; `i128`.
(decl i128_not (Value) ValueRegs)
@@ -1236,6 +1260,11 @@
(rule (lower (has_type $I128 (bnot x)))
(i128_not x))
;; f32 and f64
(rule -3 (lower (has_type (ty_scalar_float ty) (bnot x)))
(sse_xor ty x (vector_all_ones)))
;; Special case for vector-types where bit-negation is an xor against an
;; all-one value
(rule -1 (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))