Legalize b{and,or,xor}_not into component instructions (#5709)
* Remove trailing whitespace in `lower.isle` files
* Legalize the `band_not` instruction into simpler form
This commit legalizes the `band_not` instruction into `band`-of-`bnot`,
or two instructions. This is intended to assist with egraph-based
optimizations where the `band_not` instruction doesn't have to be
specifically included in other bit-operation-patterns.
Lowerings of the `band_not` instruction have been moved to a
specialization of the `band` instruction.
* Legalize `bor_not` into components
Same as prior commit, but for the `bor_not` instruction.
* Legalize bxor_not into bxor-of-bnot
Same as prior commits. I think this also ended up fixing a bug in the
s390x backend where `bxor_not x y` was actually translated as `bnot
(bxor x y)` by accident given the test update changes.
* Simplify not-fused operands for riscv64
Looks like some delegated-to rules have special-cases for "if this
feature is enabled use the fused instruction" so move the clause for
testing the feature up to the lowering phase to help trigger other rules
if the feature isn't enabled. This should make the riscv64 backend more
consistent with how other backends are implemented.
* Remove B{and,or,xor}Not from cost of egraph metrics
These shouldn't ever reach egraphs now that they're legalized away.
* Add an egraph optimization for `x^-1 => ~x`
This adds a simplification node to translate xor-against-minus-1 to a
`bnot` instruction. This helps trigger various other optimizations in
the egraph implementation and also various backend lowering rules for
instructions. This is chiefly useful as wasm doesn't have a `bnot`
equivalent, so it's encoded as `x^-1`.
* Add a wasm test for end-to-end bitwise lowerings
Test that end-to-end various optimizations are being applied for input
wasm modules.
* Specifically don't self-update rustup on CI
I forget why this was here originally, but this is failing on Windows
CI. In general there's no need to update rustup, so leave it as-is.
* Cleanup some aarch64 lowering rules
Previously a 32/64 split was necessary due to the `ALUOp` being different
but that's been refactored away no so there's no longer any need for
duplicate rules.
* Narrow a x64 lowering rule
This previously made more sense when it was `band_not` and rarely used,
but be more specific in the type-filter on this rule that it's only
applicable to SIMD types with lanes.
* Simplify xor-against-minus-1 rule
No need to have the commutative version since constants are already
shuffled right for egraphs
* Optimize band-of-bnot when bnot is on the left
Use some more rules in the egraph algebraic optimizations to
canonicalize band/bor/bxor with a `bnot` operand to put the operand on
the right. That way the lowerings in the backends only have to list the
rule once, with the operand on the right, to optimize both styles of
input.
* Add commutative lowering rules
* Update cranelift/codegen/src/isa/x64/lower.isle
Co-authored-by: Jamey Sharp <jamey@minilop.net>
---------
Co-authored-by: Jamey Sharp <jamey@minilop.net>
This commit is contained in:
@@ -40,14 +40,14 @@
|
||||
(rule 2 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y)))
|
||||
(alu_rr_imm12 (select_addi ty) y x))
|
||||
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type $I128 (iadd x y)))
|
||||
(let
|
||||
( ;; low part.
|
||||
(low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0)))
|
||||
;; compute carry.
|
||||
(carry Reg (alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0)))
|
||||
;;
|
||||
;;
|
||||
(high_tmp Reg (alu_add (value_regs_get x 1) (value_regs_get y 1)))
|
||||
;; add carry.
|
||||
(high Reg (alu_add high_tmp carry)))
|
||||
@@ -158,19 +158,19 @@
|
||||
(alu_rrr (AluOPRRR.Remuw) (ext_int_if_need $false x ty) y2)))
|
||||
|
||||
(rule -1 (lower (has_type (fits_in_16 ty) (srem x y)))
|
||||
(let
|
||||
(let
|
||||
((y2 Reg (ext_int_if_need $true y ty))
|
||||
(_ InstOutput (gen_div_by_zero y2)))
|
||||
(alu_rrr (AluOPRRR.Remw) (ext_int_if_need $true x ty) y2)))
|
||||
|
||||
(rule (lower (has_type $I32 (srem x y)))
|
||||
(let
|
||||
(let
|
||||
((y2 Reg (ext_int_if_need $true y $I32))
|
||||
(_ InstOutput (gen_div_by_zero y2)))
|
||||
(alu_rrr (AluOPRRR.Remw) x y2)))
|
||||
|
||||
(rule (lower (has_type $I32 (urem x y)))
|
||||
(let
|
||||
(let
|
||||
((y2 Reg (ext_int_if_need $false y $I32))
|
||||
(_ InstOutput (gen_div_by_zero y2)))
|
||||
(alu_rrr (AluOPRRR.Remuw) x y2)))
|
||||
@@ -204,6 +204,29 @@
|
||||
(rule (lower (has_type $F64 (band x y)))
|
||||
(lower_float_binary (AluOPRRR.And) x y $F64))
|
||||
|
||||
;; Specialized lowerings for `(band x (bnot y))` which is additionally produced
|
||||
;; by Cranelift's `band_not` instruction that is legalized into the simpler
|
||||
;; forms early on.
|
||||
|
||||
(rule 3 (lower (has_type (fits_in_64 ty) (band x (bnot y))))
|
||||
(if-let $true (has_b))
|
||||
(gen_andn x y))
|
||||
(rule 4 (lower (has_type (fits_in_64 ty) (band (bnot y) x)))
|
||||
(if-let $true (has_b))
|
||||
(gen_andn x y))
|
||||
(rule 5 (lower (has_type $I128 (band x (bnot y))))
|
||||
(if-let $true (has_b))
|
||||
(let
|
||||
((low Reg (gen_andn (value_regs_get x 0) (value_regs_get y 0)))
|
||||
(high Reg (gen_andn (value_regs_get x 1) (value_regs_get y 1))))
|
||||
(value_regs low high)))
|
||||
(rule 6 (lower (has_type $I128 (band (bnot y) x)))
|
||||
(if-let $true (has_b))
|
||||
(let
|
||||
((low Reg (gen_andn (value_regs_get x 0) (value_regs_get y 0)))
|
||||
(high Reg (gen_andn (value_regs_get x 1) (value_regs_get y 1))))
|
||||
(value_regs low high)))
|
||||
|
||||
|
||||
;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule -1 (lower (has_type (fits_in_64 ty) (bor x y)))
|
||||
@@ -222,6 +245,30 @@
|
||||
(rule (lower (has_type $F64 (bor x y)))
|
||||
(lower_float_binary (AluOPRRR.Or) x y $F64))
|
||||
|
||||
;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced
|
||||
;; by Cranelift's `bor_not` instruction that is legalized into the simpler
|
||||
;; forms early on.
|
||||
|
||||
(rule 3 (lower (has_type (fits_in_64 ty) (bor x (bnot y))))
|
||||
(if-let $true (has_b))
|
||||
(gen_orn x y))
|
||||
(rule 4 (lower (has_type (fits_in_64 ty) (bor (bnot y) x)))
|
||||
(if-let $true (has_b))
|
||||
(gen_orn x y))
|
||||
|
||||
(rule 5 (lower (has_type $I128 (bor x (bnot y))))
|
||||
(if-let $true (has_b))
|
||||
(let
|
||||
((low Reg (gen_orn (value_regs_get x 0) (value_regs_get y 0)))
|
||||
(high Reg (gen_orn (value_regs_get x 1) (value_regs_get y 1))))
|
||||
(value_regs low high)))
|
||||
(rule 6 (lower (has_type $I128 (bor (bnot y) x)))
|
||||
(if-let $true (has_b))
|
||||
(let
|
||||
((low Reg (gen_orn (value_regs_get x 0) (value_regs_get y 0)))
|
||||
(high Reg (gen_orn (value_regs_get x 1) (value_regs_get y 1))))
|
||||
(value_regs low high)))
|
||||
|
||||
|
||||
;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule -1 (lower (has_type (fits_in_64 ty) (bxor x y)))
|
||||
@@ -289,15 +336,6 @@
|
||||
(lower_extend x $true (ty_bits in) (ty_bits out)))
|
||||
|
||||
|
||||
;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule (lower (has_type (fits_in_64 ty) (band_not x y)))
|
||||
(gen_andn x y))
|
||||
(rule 1 (lower (has_type $I128 (band_not x y)))
|
||||
(let
|
||||
((low Reg (gen_andn (value_regs_get x 0) (value_regs_get y 0)))
|
||||
(high Reg (gen_andn (value_regs_get x 1) (value_regs_get y 1))))
|
||||
(value_regs low high)))
|
||||
|
||||
;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule (lower (has_type (fits_in_64 ty) (popcnt x)))
|
||||
(lower_popcnt x ty))
|
||||
@@ -397,29 +435,6 @@
|
||||
(lower_i128_rotr x y))
|
||||
|
||||
|
||||
;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; notice x y order!!!
|
||||
(rule (lower (has_type (fits_in_64 ty) (bxor_not x y)))
|
||||
(gen_xor_not x y))
|
||||
(rule 1 (lower (has_type $I128 (bxor_not x y)))
|
||||
(let
|
||||
((low Reg (gen_xor_not (value_regs_get x 0) (value_regs_get y 0)))
|
||||
(high Reg (gen_xor_not (value_regs_get x 1) (value_regs_get y 1))))
|
||||
(value_regs low high)
|
||||
)
|
||||
)
|
||||
|
||||
;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule (lower (has_type (fits_in_64 ty) (bor_not x y)))
|
||||
(gen_orn x y))
|
||||
|
||||
(rule 1 (lower (has_type $I128 (bor_not x y)))
|
||||
(let
|
||||
((low Reg (gen_orn (value_regs_get x 0) (value_regs_get y 0)))
|
||||
(high Reg (gen_orn (value_regs_get x 1) (value_regs_get y 1))))
|
||||
(value_regs low high)))
|
||||
|
||||
|
||||
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule (lower (has_type (fits_in_64 ty) (cls x)))
|
||||
(lower_cls x ty))
|
||||
@@ -428,12 +443,12 @@
|
||||
|
||||
|
||||
;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (fabs x)))
|
||||
(gen_fabs x ty))
|
||||
|
||||
;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (fneg x)))
|
||||
(fpu_rrr (f_copy_neg_sign_op ty) ty x x))
|
||||
|
||||
@@ -458,35 +473,35 @@
|
||||
|
||||
;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule -1
|
||||
;;
|
||||
(lower
|
||||
;;
|
||||
(lower
|
||||
(has_type (valid_atomic_transaction ty) (atomic_rmw flags op addr x)))
|
||||
(gen_atomic (get_atomic_rmw_op ty op) addr x (atomic_amo)))
|
||||
|
||||
;;; for I8 and I16
|
||||
(rule 1
|
||||
(lower
|
||||
(lower
|
||||
(has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags op addr x)))
|
||||
(gen_atomic_rmw_loop op ty addr x))
|
||||
|
||||
;;;special for I8 and I16 max min etc.
|
||||
;;;because I need uextend or sextend the value.
|
||||
(rule 2
|
||||
(lower
|
||||
(lower
|
||||
(has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $true) addr x)))
|
||||
(gen_atomic_rmw_loop op ty addr (ext_int_if_need $true x ty)))
|
||||
|
||||
|
||||
(rule 2
|
||||
;;
|
||||
(lower
|
||||
(lower
|
||||
(has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $false) addr x)))
|
||||
;;
|
||||
(gen_atomic_rmw_loop op ty addr (ext_int_if_need $false x ty)))
|
||||
|
||||
;;;;; Rules for `AtomicRmwOp.Sub`
|
||||
(rule
|
||||
(lower
|
||||
(lower
|
||||
(has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr x)))
|
||||
(let
|
||||
((tmp WritableReg (temp_writable_reg ty))
|
||||
@@ -504,7 +519,7 @@
|
||||
|
||||
;;;;; Rules for `AtomicRmwOp.Nand`
|
||||
(rule
|
||||
(lower
|
||||
(lower
|
||||
(has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Nand) addr x)))
|
||||
(gen_atomic_rmw_loop (AtomicRmwOp.Nand) ty addr x))
|
||||
|
||||
@@ -512,13 +527,13 @@
|
||||
(extern extractor is_atomic_rmw_max_etc is_atomic_rmw_max_etc)
|
||||
|
||||
;;;;; Rules for `atomic load`;;;;;;;;;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type (valid_atomic_transaction ty) (atomic_load flags p)))
|
||||
(gen_atomic_load p ty))
|
||||
|
||||
|
||||
;;;;; Rules for `atomic store`;;;;;;;;;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (atomic_store flags src @ (value_type (valid_atomic_transaction ty)) p))
|
||||
(gen_atomic_store p ty src))
|
||||
|
||||
@@ -562,37 +577,37 @@
|
||||
|
||||
|
||||
;;;;; Rules for `for float arithmatic`
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (fadd x y)))
|
||||
(fpu_rrr (f_arithmatic_op ty (Opcode.Fadd)) ty x y))
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (fsub x y)))
|
||||
(fpu_rrr (f_arithmatic_op ty (Opcode.Fsub)) ty x y))
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (fmul x y)))
|
||||
(fpu_rrr (f_arithmatic_op ty (Opcode.Fmul)) ty x y))
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (fdiv x y)))
|
||||
(fpu_rrr (f_arithmatic_op ty (Opcode.Fdiv)) ty x y))
|
||||
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (fmin x y)))
|
||||
(gen_float_select (FloatSelectOP.Min) x y ty))
|
||||
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (fmin_pseudo x y)))
|
||||
(gen_float_select_pseudo (FloatSelectOP.Min) x y ty))
|
||||
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (fmax x y)))
|
||||
(gen_float_select (FloatSelectOP.Max) x y ty))
|
||||
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (fmax_pseudo x y)))
|
||||
(gen_float_select_pseudo (FloatSelectOP.Max) x y ty))
|
||||
|
||||
;;;;; Rules for `stack_addr`;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (stack_addr ss offset))
|
||||
(gen_stack_addr ss offset))
|
||||
|
||||
@@ -624,7 +639,7 @@
|
||||
(gen_bitselect ty c x y))
|
||||
|
||||
;;;;; Rules for `isplit`;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (isplit x))
|
||||
(let
|
||||
((t1 Reg (gen_move2 (value_regs_get x 0) $I64 $I64))
|
||||
@@ -632,7 +647,7 @@
|
||||
(output_pair t1 t2)))
|
||||
|
||||
;;;;; Rules for `iconcat`;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type $I128 (iconcat x y)))
|
||||
(let
|
||||
((t1 Reg (gen_move2 x $I64 $I64))
|
||||
@@ -716,16 +731,16 @@
|
||||
(gen_load_128 p offset flags))
|
||||
|
||||
;;;;; Rules for `istore8`;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (istore8 flags x p offset))
|
||||
(gen_store p offset (StoreOP.Sb) flags x))
|
||||
;;;;; Rules for `istore16`;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (istore16 flags x p offset))
|
||||
(gen_store p offset (StoreOP.Sh) flags x))
|
||||
|
||||
;;;;; Rules for `istore32`;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (istore32 flags x p offset))
|
||||
(gen_store p offset (StoreOP.Sw) flags x))
|
||||
|
||||
@@ -803,22 +818,22 @@
|
||||
(gen_moves v in_ty out))
|
||||
|
||||
;;;;; Rules for `ceil`;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (ceil x)))
|
||||
(gen_float_round (FloatRoundOP.Ceil) x ty)
|
||||
)
|
||||
|
||||
;;;;; Rules for `floor`;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (floor x)))
|
||||
(gen_float_round (FloatRoundOP.Floor) x ty))
|
||||
;;;;; Rules for `trunc`;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (trunc x)))
|
||||
(gen_float_round (FloatRoundOP.Trunc) x ty))
|
||||
|
||||
;;;;; Rules for `nearest`;;;;;;;;;
|
||||
(rule
|
||||
(rule
|
||||
(lower (has_type ty (nearest x)))
|
||||
(gen_float_round (FloatRoundOP.Nearest) x ty))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user