x64: Tidy up some handling of sinkable loads (#5840)

This commit refactors a bit about how sinkable loads are handled in the
x64 backend. The intention is to bring most handling around sinkable
loads up to date with the current state of the backend since things have
changed since these were originally introduced, namely automatic
conversions between types in ISLE. For example the `Value` type can be
automatically converted to `RegMem` to perform load sinking, but some
rules are still explicitly doing matching themselves.

Here I've removed explicit handling of immediates and sinkable loads
when they're the right-hand-side of an operation. These cases are
already handle by the "base case" when converting a `Value` to a
`RegMemImm`. Instead only rules explicitly for left-hand-side immediates
and sinkable loads remain. This helps cut down on the number of explicit
rules needed.

Additionally in the same manner that `Value` can be automatically
converted to `RegMem` I've added automatic conversions from
`SinkableLoad` to `RegMem` and the various other newtypes. This helps
cut down a bit on rule verbosity where `sink_load_*` is largely no
longer necessary.
This commit is contained in:
Alex Crichton
2023-02-21 12:15:08 -06:00
committed by GitHub
parent 0f51338def
commit e6a5ec3fde
2 changed files with 32 additions and 113 deletions

View File

@@ -4301,6 +4301,10 @@
(convert IntCC CC intcc_to_cc)
(convert AtomicRmwOp MachAtomicRmwOp atomic_rmw_op_to_mach_atomic_rmw_op)
(convert SinkableLoad RegMem sink_load)
(convert SinkableLoad GprMemImm sink_load_to_gpr_mem_imm)
(convert SinkableLoad XmmMem sink_load_to_xmm_mem)
(decl reg_to_xmm_mem (Reg) XmmMem)
(rule (reg_to_xmm_mem r)
(xmm_to_xmm_mem (xmm_new r)))

View File

@@ -46,29 +46,15 @@
(iadd x y)))
(x64_add ty x y))
;; Add a register and an immediate.
;; The above case handles when the rhs is an immediate or a sinkable load, but
;; additionally add lhs meets these criteria.
(rule -4 (lower (has_type (fits_in_64 ty)
(iadd x (simm32_from_value y))))
(x64_add ty x y))
(rule -3 (lower (has_type (fits_in_64 ty)
(iadd (simm32_from_value x) y)))
(x64_add ty y x))
;; Add a register and memory.
(rule -2 (lower (has_type (fits_in_64 ty)
(iadd x (sinkable_load y))))
(x64_add ty
x
(sink_load_to_gpr_mem_imm y)))
(rule -1 (lower (has_type (fits_in_64 ty)
(rule -3 (lower (has_type (fits_in_64 ty)
(iadd (sinkable_load x) y)))
(x64_add ty
y
(sink_load_to_gpr_mem_imm x)))
(x64_add ty y x))
;; SSE.
@@ -144,17 +130,6 @@
(isub x y)))
(x64_sub ty x y))
;; Sub a register and an immediate.
(rule -2 (lower (has_type (fits_in_64 ty)
(isub x (simm32_from_value y))))
(x64_sub ty x y))
;; Sub a register and memory.
(rule -1 (lower (has_type (fits_in_64 ty)
(isub x (sinkable_load y))))
(x64_sub ty x
(sink_load_to_gpr_mem_imm y)))
;; SSE.
(rule (lower (has_type (multi_lane 8 16)
@@ -216,26 +191,14 @@
(if (ty_int_ref_scalar_64 ty))
(x64_and ty x y))
;; And with a memory operand.
;; The above case automatically handles when the rhs is an immediate or a
;; sinkable load, but additionally handle the lhs here.
(rule 1 (lower (has_type ty (band x (sinkable_load y))))
(rule 1 (lower (has_type ty (band (sinkable_load x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_and ty x
(sink_load_to_gpr_mem_imm y)))
(x64_and ty y x))
(rule 2 (lower (has_type ty (band (sinkable_load x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_and ty
y
(sink_load_to_gpr_mem_imm x)))
;; And with an immediate.
(rule 3 (lower (has_type ty (band x (simm32_from_value y))))
(if (ty_int_ref_scalar_64 ty))
(x64_and ty x y))
(rule 4 (lower (has_type ty (band (simm32_from_value x) y)))
(rule 2 (lower (has_type ty (band (simm32_from_value x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_and ty y x))
@@ -308,25 +271,14 @@
(if (ty_int_ref_scalar_64 ty))
(x64_or ty x y))
;; Or with a memory operand.
;; Handle immediates/sinkable loads on the lhs in addition to the automatic
;; handling of the rhs above
(rule 1 (lower (has_type ty (bor x (sinkable_load y))))
(rule 1 (lower (has_type ty (bor (sinkable_load x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_or ty x
(sink_load_to_gpr_mem_imm y)))
(x64_or ty y x))
(rule 2 (lower (has_type ty (bor (sinkable_load x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_or ty y
(sink_load_to_gpr_mem_imm x)))
;; Or with an immediate.
(rule 3 (lower (has_type ty (bor x (simm32_from_value y))))
(if (ty_int_ref_scalar_64 ty))
(x64_or ty x y))
(rule 4 (lower (has_type ty (bor (simm32_from_value x) y)))
(rule 2 (lower (has_type ty (bor (simm32_from_value x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_or ty y x))
@@ -371,23 +323,12 @@
(if (ty_int_ref_scalar_64 ty))
(x64_xor ty x y))
;; Xor with a memory operand.
;; Handle xor with lhs immediates/sinkable loads in addition to the automatic
;; handling of the rhs above.
(rule 1 (lower (has_type ty (bxor x (sinkable_load y))))
(rule 1 (lower (has_type ty (bxor (sinkable_load x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_xor ty x
(sink_load_to_gpr_mem_imm y)))
(rule 2 (lower (has_type ty (bxor (sinkable_load x) y)))
(if (ty_int_ref_scalar_64 ty))
(x64_xor ty y
(sink_load_to_gpr_mem_imm x)))
;; Xor with an immediate.
(rule 3 (lower (has_type ty (bxor x (simm32_from_value y))))
(if (ty_int_ref_scalar_64 ty))
(x64_xor ty x y))
(x64_xor ty y x))
(rule 4 (lower (has_type ty (bxor (simm32_from_value x) y)))
(if (ty_int_ref_scalar_64 ty))
@@ -841,28 +782,15 @@
(rule -5 (lower (has_type (fits_in_64 ty) (imul x y)))
(x64_mul ty x y))
;; Multiply a register and an immediate.
(rule -3 (lower (has_type (fits_in_64 ty)
(imul x (simm32_from_value y))))
(x64_mul ty x y))
;; Handle multiplication where the lhs is an immediate or sinkable load in
;; addition to the automatic rhs handling above.
(rule -4 (lower (has_type (fits_in_64 ty)
(imul (simm32_from_value x) y)))
(x64_mul ty y x))
;; Multiply a register and a memory load.
(rule -2 (lower (has_type (fits_in_64 ty)
(imul x (sinkable_load y))))
(x64_mul ty
x
(sink_load_to_gpr_mem_imm y)))
(rule -1 (lower (has_type (fits_in_64 ty)
(rule -3 (lower (has_type (fits_in_64 ty)
(imul (sinkable_load x) y)))
(x64_mul ty y
(sink_load_to_gpr_mem_imm x)))
(x64_mul ty y x))
;; `i128`.
@@ -1459,32 +1387,19 @@
(x64_add_with_flags_paired ty a b)
(trap_if (CC.B) tc)))
;; Add a register and an immediate.
;; Handle lhs immediates/sinkable loads in addition to the automatic rhs
;; handling of above.
(rule 1 (lower (has_type (fits_in_64 ty)
(uadd_overflow_trap a (simm32_from_value b) tc)))
(with_flags
(x64_add_with_flags_paired ty a b)
(trap_if (CC.B) tc)))
(rule 2 (lower (has_type (fits_in_64 ty)
(uadd_overflow_trap (simm32_from_value a) b tc)))
(with_flags
(x64_add_with_flags_paired ty b a)
(trap_if (CC.B) tc)))
;; Add a register and memory.
(rule 3 (lower (has_type (fits_in_64 ty)
(uadd_overflow_trap a (sinkable_load b) tc)))
(with_flags
(x64_add_with_flags_paired ty a (sink_load_to_gpr_mem_imm b))
(trap_if (CC.B) tc)))
(rule 4 (lower (has_type (fits_in_64 ty)
(rule 2 (lower (has_type (fits_in_64 ty)
(uadd_overflow_trap (sinkable_load a) b tc)))
(with_flags
(x64_add_with_flags_paired ty b (sink_load_to_gpr_mem_imm a))
(x64_add_with_flags_paired ty b a)
(trap_if (CC.B) tc)))
;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -3603,9 +3518,9 @@
;; Case 3: when presented with `load + scalar_to_vector`, coalesce into a single
;; MOVSS/MOVSD instruction.
(rule 2 (lower (scalar_to_vector (and (sinkable_load src) (value_type (ty_32 _)))))
(x64_movss_load (sink_load_to_xmm_mem src)))
(x64_movss_load src))
(rule 3 (lower (scalar_to_vector (and (sinkable_load src) (value_type (ty_64 _)))))
(x64_movsd_load (sink_load_to_xmm_mem src)))
(x64_movsd_load src))
;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;