cranelift: Support bnot, band, bor, bxor for x86_64. (#5036)
* Support `bnot`, `band`, `bor`, `bxor` for x86_64.
* Fix-up to handle `B{8,16,32,64}` type on bitops
* Fix-up conflict.
This commit is contained in:
@@ -1531,6 +1531,8 @@
|
|||||||
(decl sse_xor_op (Type) SseOpcode)
|
(decl sse_xor_op (Type) SseOpcode)
|
||||||
(rule 1 (sse_xor_op $F32X4) (SseOpcode.Xorps))
|
(rule 1 (sse_xor_op $F32X4) (SseOpcode.Xorps))
|
||||||
(rule 1 (sse_xor_op $F64X2) (SseOpcode.Xorpd))
|
(rule 1 (sse_xor_op $F64X2) (SseOpcode.Xorpd))
|
||||||
|
(rule 1 (sse_xor_op $F32) (SseOpcode.Xorps))
|
||||||
|
(rule 1 (sse_xor_op $F64) (SseOpcode.Xorpd))
|
||||||
|
|
||||||
;; Priority 0 because multi_lane overlaps with the previous two explicit type
|
;; Priority 0 because multi_lane overlaps with the previous two explicit type
|
||||||
;; patterns.
|
;; patterns.
|
||||||
|
|||||||
@@ -242,46 +242,54 @@
|
|||||||
;; `{i,b}64` and smaller.
|
;; `{i,b}64` and smaller.
|
||||||
|
|
||||||
;; And two registers.
|
;; And two registers.
|
||||||
(rule 0 (lower (has_type (fits_in_64 ty) (band x y)))
|
(rule 0 (lower (has_type ty (band x y)))
|
||||||
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_and ty x y))
|
(x64_and ty x y))
|
||||||
|
|
||||||
;; And with a memory operand.
|
;; And with a memory operand.
|
||||||
|
|
||||||
(rule 1 (lower (has_type (fits_in_64 ty)
|
(rule 1 (lower (has_type ty (band x (sinkable_load y))))
|
||||||
(band x (sinkable_load y))))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_and ty x
|
(x64_and ty x
|
||||||
(sink_load_to_gpr_mem_imm y)))
|
(sink_load_to_gpr_mem_imm y)))
|
||||||
|
|
||||||
(rule 2 (lower (has_type (fits_in_64 ty)
|
(rule 2 (lower (has_type ty (band (sinkable_load x) y)))
|
||||||
(band (sinkable_load x) y)))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_and ty
|
(x64_and ty
|
||||||
y
|
y
|
||||||
(sink_load_to_gpr_mem_imm x)))
|
(sink_load_to_gpr_mem_imm x)))
|
||||||
|
|
||||||
;; And with an immediate.
|
;; And with an immediate.
|
||||||
|
|
||||||
(rule 3 (lower (has_type (fits_in_64 ty)
|
(rule 3 (lower (has_type ty (band x (simm32_from_value y))))
|
||||||
(band x (simm32_from_value y))))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_and ty x y))
|
(x64_and ty x y))
|
||||||
|
|
||||||
(rule 4 (lower (has_type (fits_in_64 ty)
|
(rule 4 (lower (has_type ty (band (simm32_from_value x) y)))
|
||||||
(band (simm32_from_value x) y)))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_and ty y x))
|
(x64_and ty y x))
|
||||||
|
|
||||||
|
;; f32 and f64
|
||||||
|
|
||||||
|
(rule 5 (lower (has_type (ty_scalar_float ty) (band x y)))
|
||||||
|
(sse_and ty x y))
|
||||||
|
|
||||||
;; SSE.
|
;; SSE.
|
||||||
|
|
||||||
(decl sse_and (Type Xmm XmmMem) Xmm)
|
(decl sse_and (Type Xmm XmmMem) Xmm)
|
||||||
(rule (sse_and $F32X4 x y) (x64_andps x y))
|
(rule (sse_and $F32X4 x y) (x64_andps x y))
|
||||||
(rule (sse_and $F64X2 x y) (x64_andpd x y))
|
(rule (sse_and $F64X2 x y) (x64_andpd x y))
|
||||||
|
(rule (sse_and $F32 x y) (x64_andps x y))
|
||||||
|
(rule (sse_and $F64 x y) (x64_andpd x y))
|
||||||
(rule -1 (sse_and (multi_lane _bits _lanes) x y) (x64_pand x y))
|
(rule -1 (sse_and (multi_lane _bits _lanes) x y) (x64_pand x y))
|
||||||
|
|
||||||
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes)
|
(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes)
|
||||||
(band x y)))
|
(band x y)))
|
||||||
(sse_and ty x y))
|
(sse_and ty x y))
|
||||||
|
|
||||||
;; `i128`.
|
;; `i128`.
|
||||||
|
|
||||||
(rule 6 (lower (has_type $I128 (band x y)))
|
(rule 7 (lower (has_type $I128 (band x y)))
|
||||||
(let ((x_regs ValueRegs x)
|
(let ((x_regs ValueRegs x)
|
||||||
(x_lo Gpr (value_regs_get_gpr x_regs 0))
|
(x_lo Gpr (value_regs_get_gpr x_regs 0))
|
||||||
(x_hi Gpr (value_regs_get_gpr x_regs 1))
|
(x_hi Gpr (value_regs_get_gpr x_regs 1))
|
||||||
@@ -296,39 +304,47 @@
|
|||||||
;; `{i,b}64` and smaller.
|
;; `{i,b}64` and smaller.
|
||||||
|
|
||||||
;; Or two registers.
|
;; Or two registers.
|
||||||
(rule 0 (lower (has_type (fits_in_64 ty) (bor x y)))
|
(rule 0 (lower (has_type ty (bor x y)))
|
||||||
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_or ty x y))
|
(x64_or ty x y))
|
||||||
|
|
||||||
;; Or with a memory operand.
|
;; Or with a memory operand.
|
||||||
|
|
||||||
(rule 1 (lower (has_type (fits_in_64 ty)
|
(rule 1 (lower (has_type ty (bor x (sinkable_load y))))
|
||||||
(bor x (sinkable_load y))))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_or ty x
|
(x64_or ty x
|
||||||
(sink_load_to_gpr_mem_imm y)))
|
(sink_load_to_gpr_mem_imm y)))
|
||||||
|
|
||||||
(rule 2 (lower (has_type (fits_in_64 ty)
|
(rule 2 (lower (has_type ty (bor (sinkable_load x) y)))
|
||||||
(bor (sinkable_load x) y)))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_or ty y
|
(x64_or ty y
|
||||||
(sink_load_to_gpr_mem_imm x)))
|
(sink_load_to_gpr_mem_imm x)))
|
||||||
|
|
||||||
;; Or with an immediate.
|
;; Or with an immediate.
|
||||||
|
|
||||||
(rule 3 (lower (has_type (fits_in_64 ty)
|
(rule 3 (lower (has_type ty (bor x (simm32_from_value y))))
|
||||||
(bor x (simm32_from_value y))))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_or ty x y))
|
(x64_or ty x y))
|
||||||
|
|
||||||
(rule 4 (lower (has_type (fits_in_64 ty)
|
(rule 4 (lower (has_type ty (bor (simm32_from_value x) y)))
|
||||||
(bor (simm32_from_value x) y)))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_or ty y x))
|
(x64_or ty y x))
|
||||||
|
|
||||||
|
;; f32 and f64
|
||||||
|
|
||||||
|
(rule 5 (lower (has_type (ty_scalar_float ty) (bor x y)))
|
||||||
|
(sse_or ty x y))
|
||||||
|
|
||||||
;; SSE.
|
;; SSE.
|
||||||
|
|
||||||
(decl sse_or (Type Xmm XmmMem) Xmm)
|
(decl sse_or (Type Xmm XmmMem) Xmm)
|
||||||
(rule (sse_or $F32X4 x y) (x64_orps x y))
|
(rule (sse_or $F32X4 x y) (x64_orps x y))
|
||||||
(rule (sse_or $F64X2 x y) (x64_orpd x y))
|
(rule (sse_or $F64X2 x y) (x64_orpd x y))
|
||||||
|
(rule (sse_or $F32 x y) (x64_orps x y))
|
||||||
|
(rule (sse_or $F64 x y) (x64_orpd x y))
|
||||||
(rule -1 (sse_or (multi_lane _bits _lanes) x y) (x64_por x y))
|
(rule -1 (sse_or (multi_lane _bits _lanes) x y) (x64_por x y))
|
||||||
|
|
||||||
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes)
|
(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes)
|
||||||
(bor x y)))
|
(bor x y)))
|
||||||
(sse_or ty x y))
|
(sse_or ty x y))
|
||||||
|
|
||||||
@@ -343,7 +359,7 @@
|
|||||||
(value_gprs (x64_or $I64 x_lo y_lo)
|
(value_gprs (x64_or $I64 x_lo y_lo)
|
||||||
(x64_or $I64 x_hi y_hi))))
|
(x64_or $I64 x_hi y_hi))))
|
||||||
|
|
||||||
(rule 6 (lower (has_type $I128 (bor x y)))
|
(rule 7 (lower (has_type $I128 (bor x y)))
|
||||||
(or_i128 x y))
|
(or_i128 x y))
|
||||||
|
|
||||||
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
@@ -351,39 +367,45 @@
|
|||||||
;; `{i,b}64` and smaller.
|
;; `{i,b}64` and smaller.
|
||||||
|
|
||||||
;; Xor two registers.
|
;; Xor two registers.
|
||||||
(rule 0 (lower (has_type (fits_in_64 ty) (bxor x y)))
|
(rule 0 (lower (has_type ty (bxor x y)))
|
||||||
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_xor ty x y))
|
(x64_xor ty x y))
|
||||||
|
|
||||||
;; Xor with a memory operand.
|
;; Xor with a memory operand.
|
||||||
|
|
||||||
(rule 1 (lower (has_type (fits_in_64 ty)
|
(rule 1 (lower (has_type ty (bxor x (sinkable_load y))))
|
||||||
(bxor x (sinkable_load y))))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_xor ty x
|
(x64_xor ty x
|
||||||
(sink_load_to_gpr_mem_imm y)))
|
(sink_load_to_gpr_mem_imm y)))
|
||||||
|
|
||||||
(rule 2 (lower (has_type (fits_in_64 ty)
|
(rule 2 (lower (has_type ty (bxor (sinkable_load x) y)))
|
||||||
(bxor (sinkable_load x) y)))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_xor ty y
|
(x64_xor ty y
|
||||||
(sink_load_to_gpr_mem_imm x)))
|
(sink_load_to_gpr_mem_imm x)))
|
||||||
|
|
||||||
;; Xor with an immediate.
|
;; Xor with an immediate.
|
||||||
|
|
||||||
(rule 3 (lower (has_type (fits_in_64 ty)
|
(rule 3 (lower (has_type ty (bxor x (simm32_from_value y))))
|
||||||
(bxor x (simm32_from_value y))))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_xor ty x y))
|
(x64_xor ty x y))
|
||||||
|
|
||||||
(rule 4 (lower (has_type (fits_in_64 ty)
|
(rule 4 (lower (has_type ty (bxor (simm32_from_value x) y)))
|
||||||
(bxor (simm32_from_value x) y)))
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_xor ty y x))
|
(x64_xor ty y x))
|
||||||
|
|
||||||
|
;; f32 and f64
|
||||||
|
|
||||||
|
(rule 5 (lower (has_type (ty_scalar_float ty) (bxor x y)))
|
||||||
|
(sse_xor ty x y))
|
||||||
|
|
||||||
;; SSE.
|
;; SSE.
|
||||||
|
|
||||||
(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y)))
|
(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y)))
|
||||||
(sse_xor ty x y))
|
(sse_xor ty x y))
|
||||||
|
|
||||||
;; `{i,b}128`.
|
;; `{i,b}128`.
|
||||||
|
|
||||||
(rule 6 (lower (has_type $I128 (bxor x y)))
|
(rule 7 (lower (has_type $I128 (bxor x y)))
|
||||||
(let ((x_regs ValueRegs x)
|
(let ((x_regs ValueRegs x)
|
||||||
(x_lo Gpr (value_regs_get_gpr x_regs 0))
|
(x_lo Gpr (value_regs_get_gpr x_regs 0))
|
||||||
(x_hi Gpr (value_regs_get_gpr x_regs 1))
|
(x_hi Gpr (value_regs_get_gpr x_regs 1))
|
||||||
@@ -1220,9 +1242,11 @@
|
|||||||
|
|
||||||
;; `i64` and smaller.
|
;; `i64` and smaller.
|
||||||
|
|
||||||
(rule -2 (lower (has_type (fits_in_64 ty) (bnot x)))
|
(rule -2 (lower (has_type ty (bnot x)))
|
||||||
|
(if (ty_int_ref_scalar_64 ty))
|
||||||
(x64_not ty x))
|
(x64_not ty x))
|
||||||
|
|
||||||
|
|
||||||
;; `i128`.
|
;; `i128`.
|
||||||
|
|
||||||
(decl i128_not (Value) ValueRegs)
|
(decl i128_not (Value) ValueRegs)
|
||||||
@@ -1236,6 +1260,11 @@
|
|||||||
(rule (lower (has_type $I128 (bnot x)))
|
(rule (lower (has_type $I128 (bnot x)))
|
||||||
(i128_not x))
|
(i128_not x))
|
||||||
|
|
||||||
|
;; f32 and f64
|
||||||
|
|
||||||
|
(rule -3 (lower (has_type (ty_scalar_float ty) (bnot x)))
|
||||||
|
(sse_xor ty x (vector_all_ones)))
|
||||||
|
|
||||||
;; Special case for vector-types where bit-negation is an xor against an
|
;; Special case for vector-types where bit-negation is an xor against an
|
||||||
;; all-one value
|
;; all-one value
|
||||||
(rule -1 (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))
|
(rule -1 (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
test interpret
|
test interpret
|
||||||
|
test run
|
||||||
|
target x86_64
|
||||||
|
|
||||||
function %bnot_f32(f32) -> f32 {
|
function %bnot_f32(f32) -> f32 {
|
||||||
block0(v0: f32):
|
block0(v0: f32):
|
||||||
|
|||||||
Reference in New Issue
Block a user