cranelift: Support bnot, band, bor, bxor for x86_64. (#5036)

* Support `bnot`, `band`, `bor`, `bxor` for x86_64.

* Fix-up to handle `B{8,16,32,64}` type on bitops

* Fix-up conflict.
This commit is contained in:
Jun Ryung Ju
2022-11-19 00:45:54 +09:00
committed by GitHub
parent 7e4077805b
commit e5f93d9ec0
3 changed files with 67 additions and 34 deletions

View File

@@ -1531,6 +1531,8 @@
(decl sse_xor_op (Type) SseOpcode) (decl sse_xor_op (Type) SseOpcode)
(rule 1 (sse_xor_op $F32X4) (SseOpcode.Xorps)) (rule 1 (sse_xor_op $F32X4) (SseOpcode.Xorps))
(rule 1 (sse_xor_op $F64X2) (SseOpcode.Xorpd)) (rule 1 (sse_xor_op $F64X2) (SseOpcode.Xorpd))
(rule 1 (sse_xor_op $F32) (SseOpcode.Xorps))
(rule 1 (sse_xor_op $F64) (SseOpcode.Xorpd))
;; Priority 0 because multi_lane overlaps with the previous two explicit type ;; Priority 0 because multi_lane overlaps with the previous two explicit type
;; patterns. ;; patterns.

View File

@@ -242,46 +242,54 @@
;; `{i,b}64` and smaller. ;; `{i,b}64` and smaller.
;; And two registers. ;; And two registers.
(rule 0 (lower (has_type (fits_in_64 ty) (band x y))) (rule 0 (lower (has_type ty (band x y)))
(if (ty_int_ref_scalar_64 ty))
(x64_and ty x y)) (x64_and ty x y))
;; And with a memory operand. ;; And with a memory operand.
(rule 1 (lower (has_type (fits_in_64 ty) (rule 1 (lower (has_type ty (band x (sinkable_load y))))
(band x (sinkable_load y)))) (if (ty_int_ref_scalar_64 ty))
(x64_and ty x (x64_and ty x
(sink_load_to_gpr_mem_imm y))) (sink_load_to_gpr_mem_imm y)))
(rule 2 (lower (has_type (fits_in_64 ty) (rule 2 (lower (has_type ty (band (sinkable_load x) y)))
(band (sinkable_load x) y))) (if (ty_int_ref_scalar_64 ty))
(x64_and ty (x64_and ty
y y
(sink_load_to_gpr_mem_imm x))) (sink_load_to_gpr_mem_imm x)))
;; And with an immediate. ;; And with an immediate.
(rule 3 (lower (has_type (fits_in_64 ty) (rule 3 (lower (has_type ty (band x (simm32_from_value y))))
(band x (simm32_from_value y)))) (if (ty_int_ref_scalar_64 ty))
(x64_and ty x y)) (x64_and ty x y))
(rule 4 (lower (has_type (fits_in_64 ty) (rule 4 (lower (has_type ty (band (simm32_from_value x) y)))
(band (simm32_from_value x) y))) (if (ty_int_ref_scalar_64 ty))
(x64_and ty y x)) (x64_and ty y x))
;; f32 and f64
(rule 5 (lower (has_type (ty_scalar_float ty) (band x y)))
(sse_and ty x y))
;; SSE. ;; SSE.
(decl sse_and (Type Xmm XmmMem) Xmm) (decl sse_and (Type Xmm XmmMem) Xmm)
(rule (sse_and $F32X4 x y) (x64_andps x y)) (rule (sse_and $F32X4 x y) (x64_andps x y))
(rule (sse_and $F64X2 x y) (x64_andpd x y)) (rule (sse_and $F64X2 x y) (x64_andpd x y))
(rule (sse_and $F32 x y) (x64_andps x y))
(rule (sse_and $F64 x y) (x64_andpd x y))
(rule -1 (sse_and (multi_lane _bits _lanes) x y) (x64_pand x y)) (rule -1 (sse_and (multi_lane _bits _lanes) x y) (x64_pand x y))
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes) (rule 6 (lower (has_type ty @ (multi_lane _bits _lanes)
(band x y))) (band x y)))
(sse_and ty x y)) (sse_and ty x y))
;; `i128`. ;; `i128`.
(rule 6 (lower (has_type $I128 (band x y))) (rule 7 (lower (has_type $I128 (band x y)))
(let ((x_regs ValueRegs x) (let ((x_regs ValueRegs x)
(x_lo Gpr (value_regs_get_gpr x_regs 0)) (x_lo Gpr (value_regs_get_gpr x_regs 0))
(x_hi Gpr (value_regs_get_gpr x_regs 1)) (x_hi Gpr (value_regs_get_gpr x_regs 1))
@@ -296,39 +304,47 @@
;; `{i,b}64` and smaller. ;; `{i,b}64` and smaller.
;; Or two registers. ;; Or two registers.
(rule 0 (lower (has_type (fits_in_64 ty) (bor x y))) (rule 0 (lower (has_type ty (bor x y)))
(if (ty_int_ref_scalar_64 ty))
(x64_or ty x y)) (x64_or ty x y))
;; Or with a memory operand. ;; Or with a memory operand.
(rule 1 (lower (has_type (fits_in_64 ty) (rule 1 (lower (has_type ty (bor x (sinkable_load y))))
(bor x (sinkable_load y)))) (if (ty_int_ref_scalar_64 ty))
(x64_or ty x (x64_or ty x
(sink_load_to_gpr_mem_imm y))) (sink_load_to_gpr_mem_imm y)))
(rule 2 (lower (has_type (fits_in_64 ty) (rule 2 (lower (has_type ty (bor (sinkable_load x) y)))
(bor (sinkable_load x) y))) (if (ty_int_ref_scalar_64 ty))
(x64_or ty y (x64_or ty y
(sink_load_to_gpr_mem_imm x))) (sink_load_to_gpr_mem_imm x)))
;; Or with an immediate. ;; Or with an immediate.
(rule 3 (lower (has_type (fits_in_64 ty) (rule 3 (lower (has_type ty (bor x (simm32_from_value y))))
(bor x (simm32_from_value y)))) (if (ty_int_ref_scalar_64 ty))
(x64_or ty x y)) (x64_or ty x y))
(rule 4 (lower (has_type (fits_in_64 ty) (rule 4 (lower (has_type ty (bor (simm32_from_value x) y)))
(bor (simm32_from_value x) y))) (if (ty_int_ref_scalar_64 ty))
(x64_or ty y x)) (x64_or ty y x))
;; f32 and f64
(rule 5 (lower (has_type (ty_scalar_float ty) (bor x y)))
(sse_or ty x y))
;; SSE. ;; SSE.
(decl sse_or (Type Xmm XmmMem) Xmm) (decl sse_or (Type Xmm XmmMem) Xmm)
(rule (sse_or $F32X4 x y) (x64_orps x y)) (rule (sse_or $F32X4 x y) (x64_orps x y))
(rule (sse_or $F64X2 x y) (x64_orpd x y)) (rule (sse_or $F64X2 x y) (x64_orpd x y))
(rule (sse_or $F32 x y) (x64_orps x y))
(rule (sse_or $F64 x y) (x64_orpd x y))
(rule -1 (sse_or (multi_lane _bits _lanes) x y) (x64_por x y)) (rule -1 (sse_or (multi_lane _bits _lanes) x y) (x64_por x y))
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes) (rule 6 (lower (has_type ty @ (multi_lane _bits _lanes)
(bor x y))) (bor x y)))
(sse_or ty x y)) (sse_or ty x y))
@@ -343,7 +359,7 @@
(value_gprs (x64_or $I64 x_lo y_lo) (value_gprs (x64_or $I64 x_lo y_lo)
(x64_or $I64 x_hi y_hi)))) (x64_or $I64 x_hi y_hi))))
(rule 6 (lower (has_type $I128 (bor x y))) (rule 7 (lower (has_type $I128 (bor x y)))
(or_i128 x y)) (or_i128 x y))
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -351,39 +367,45 @@
;; `{i,b}64` and smaller. ;; `{i,b}64` and smaller.
;; Xor two registers. ;; Xor two registers.
(rule 0 (lower (has_type (fits_in_64 ty) (bxor x y))) (rule 0 (lower (has_type ty (bxor x y)))
(if (ty_int_ref_scalar_64 ty))
(x64_xor ty x y)) (x64_xor ty x y))
;; Xor with a memory operand. ;; Xor with a memory operand.
(rule 1 (lower (has_type (fits_in_64 ty) (rule 1 (lower (has_type ty (bxor x (sinkable_load y))))
(bxor x (sinkable_load y)))) (if (ty_int_ref_scalar_64 ty))
(x64_xor ty x (x64_xor ty x
(sink_load_to_gpr_mem_imm y))) (sink_load_to_gpr_mem_imm y)))
(rule 2 (lower (has_type (fits_in_64 ty) (rule 2 (lower (has_type ty (bxor (sinkable_load x) y)))
(bxor (sinkable_load x) y))) (if (ty_int_ref_scalar_64 ty))
(x64_xor ty y (x64_xor ty y
(sink_load_to_gpr_mem_imm x))) (sink_load_to_gpr_mem_imm x)))
;; Xor with an immediate. ;; Xor with an immediate.
(rule 3 (lower (has_type (fits_in_64 ty) (rule 3 (lower (has_type ty (bxor x (simm32_from_value y))))
(bxor x (simm32_from_value y)))) (if (ty_int_ref_scalar_64 ty))
(x64_xor ty x y)) (x64_xor ty x y))
(rule 4 (lower (has_type (fits_in_64 ty) (rule 4 (lower (has_type ty (bxor (simm32_from_value x) y)))
(bxor (simm32_from_value x) y))) (if (ty_int_ref_scalar_64 ty))
(x64_xor ty y x)) (x64_xor ty y x))
;; f32 and f64
(rule 5 (lower (has_type (ty_scalar_float ty) (bxor x y)))
(sse_xor ty x y))
;; SSE. ;; SSE.
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y))) (rule 6 (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y)))
(sse_xor ty x y)) (sse_xor ty x y))
;; `{i,b}128`. ;; `{i,b}128`.
(rule 6 (lower (has_type $I128 (bxor x y))) (rule 7 (lower (has_type $I128 (bxor x y)))
(let ((x_regs ValueRegs x) (let ((x_regs ValueRegs x)
(x_lo Gpr (value_regs_get_gpr x_regs 0)) (x_lo Gpr (value_regs_get_gpr x_regs 0))
(x_hi Gpr (value_regs_get_gpr x_regs 1)) (x_hi Gpr (value_regs_get_gpr x_regs 1))
@@ -1220,9 +1242,11 @@
;; `i64` and smaller. ;; `i64` and smaller.
(rule -2 (lower (has_type (fits_in_64 ty) (bnot x))) (rule -2 (lower (has_type ty (bnot x)))
(if (ty_int_ref_scalar_64 ty))
(x64_not ty x)) (x64_not ty x))
;; `i128`. ;; `i128`.
(decl i128_not (Value) ValueRegs) (decl i128_not (Value) ValueRegs)
@@ -1236,6 +1260,11 @@
(rule (lower (has_type $I128 (bnot x))) (rule (lower (has_type $I128 (bnot x)))
(i128_not x)) (i128_not x))
;; f32 and f64
(rule -3 (lower (has_type (ty_scalar_float ty) (bnot x)))
(sse_xor ty x (vector_all_ones)))
;; Special case for vector-types where bit-negation is an xor against an ;; Special case for vector-types where bit-negation is an xor against an
;; all-one value ;; all-one value
(rule -1 (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x))) (rule -1 (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))

View File

@@ -1,4 +1,6 @@
test interpret test interpret
test run
target x86_64
function %bnot_f32(f32) -> f32 { function %bnot_f32(f32) -> f32 {
block0(v0: f32): block0(v0: f32):