From e6a5ec3fdece449a422ce12aff98d4bc844f882f Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 21 Feb 2023 12:15:08 -0600 Subject: [PATCH] x64: Tidy up some handling of sinkable loads (#5840) This commit refactors a bit about how sinkable loads are handled in the x64 backend. The intention is to bring most handling around sinkable loads up to date with the current state of the backend since things have changed since these were originally introduced, namely automatic conversions between types in ISLE. For example the `Value` type can be automatically converted to `RegMem` to perform load sinking, but some rules are still explicitly doing matching themselves. Here I've removed explicit handling of immediates and sinkable loads when they're the right-hand-side of an operation. These cases are already handle by the "base case" when converting a `Value` to a `RegMemImm`. Instead only rules explicitly for left-hand-side immediates and sinkable loads remain. This helps cut down on the number of explicit rules needed. Additionally in the same manner that `Value` can be automatically converted to `RegMem` I've added automatic conversions from `SinkableLoad` to `RegMem` and the various other newtypes. This helps cut down a bit on rule verbosity where `sink_load_*` is largely no longer necessary. --- cranelift/codegen/src/isa/x64/inst.isle | 4 + cranelift/codegen/src/isa/x64/lower.isle | 141 +++++------------------ 2 files changed, 32 insertions(+), 113 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 3da3aa66c5..d5d8c572a9 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -4301,6 +4301,10 @@ (convert IntCC CC intcc_to_cc) (convert AtomicRmwOp MachAtomicRmwOp atomic_rmw_op_to_mach_atomic_rmw_op) +(convert SinkableLoad RegMem sink_load) +(convert SinkableLoad GprMemImm sink_load_to_gpr_mem_imm) +(convert SinkableLoad XmmMem sink_load_to_xmm_mem) + (decl reg_to_xmm_mem (Reg) XmmMem) (rule (reg_to_xmm_mem r) (xmm_to_xmm_mem (xmm_new r))) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index d8a89c2af0..2dd9fc1bfe 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -46,29 +46,15 @@ (iadd x y))) (x64_add ty x y)) -;; Add a register and an immediate. +;; The above case handles when the rhs is an immediate or a sinkable load, but +;; additionally add lhs meets these criteria. (rule -4 (lower (has_type (fits_in_64 ty) - (iadd x (simm32_from_value y)))) - (x64_add ty x y)) - -(rule -3 (lower (has_type (fits_in_64 ty) (iadd (simm32_from_value x) y))) (x64_add ty y x)) - -;; Add a register and memory. - -(rule -2 (lower (has_type (fits_in_64 ty) - (iadd x (sinkable_load y)))) - (x64_add ty - x - (sink_load_to_gpr_mem_imm y))) - -(rule -1 (lower (has_type (fits_in_64 ty) +(rule -3 (lower (has_type (fits_in_64 ty) (iadd (sinkable_load x) y))) - (x64_add ty - y - (sink_load_to_gpr_mem_imm x))) + (x64_add ty y x)) ;; SSE. @@ -144,17 +130,6 @@ (isub x y))) (x64_sub ty x y)) -;; Sub a register and an immediate. -(rule -2 (lower (has_type (fits_in_64 ty) - (isub x (simm32_from_value y)))) - (x64_sub ty x y)) - -;; Sub a register and memory. -(rule -1 (lower (has_type (fits_in_64 ty) - (isub x (sinkable_load y)))) - (x64_sub ty x - (sink_load_to_gpr_mem_imm y))) - ;; SSE. (rule (lower (has_type (multi_lane 8 16) @@ -216,26 +191,14 @@ (if (ty_int_ref_scalar_64 ty)) (x64_and ty x y)) -;; And with a memory operand. +;; The above case automatically handles when the rhs is an immediate or a +;; sinkable load, but additionally handle the lhs here. -(rule 1 (lower (has_type ty (band x (sinkable_load y)))) +(rule 1 (lower (has_type ty (band (sinkable_load x) y))) (if (ty_int_ref_scalar_64 ty)) - (x64_and ty x - (sink_load_to_gpr_mem_imm y))) + (x64_and ty y x)) -(rule 2 (lower (has_type ty (band (sinkable_load x) y))) - (if (ty_int_ref_scalar_64 ty)) - (x64_and ty - y - (sink_load_to_gpr_mem_imm x))) - -;; And with an immediate. - -(rule 3 (lower (has_type ty (band x (simm32_from_value y)))) - (if (ty_int_ref_scalar_64 ty)) - (x64_and ty x y)) - -(rule 4 (lower (has_type ty (band (simm32_from_value x) y))) +(rule 2 (lower (has_type ty (band (simm32_from_value x) y))) (if (ty_int_ref_scalar_64 ty)) (x64_and ty y x)) @@ -308,25 +271,14 @@ (if (ty_int_ref_scalar_64 ty)) (x64_or ty x y)) -;; Or with a memory operand. +;; Handle immediates/sinkable loads on the lhs in addition to the automatic +;; handling of the rhs above -(rule 1 (lower (has_type ty (bor x (sinkable_load y)))) +(rule 1 (lower (has_type ty (bor (sinkable_load x) y))) (if (ty_int_ref_scalar_64 ty)) - (x64_or ty x - (sink_load_to_gpr_mem_imm y))) + (x64_or ty y x)) -(rule 2 (lower (has_type ty (bor (sinkable_load x) y))) - (if (ty_int_ref_scalar_64 ty)) - (x64_or ty y - (sink_load_to_gpr_mem_imm x))) - -;; Or with an immediate. - -(rule 3 (lower (has_type ty (bor x (simm32_from_value y)))) - (if (ty_int_ref_scalar_64 ty)) - (x64_or ty x y)) - -(rule 4 (lower (has_type ty (bor (simm32_from_value x) y))) +(rule 2 (lower (has_type ty (bor (simm32_from_value x) y))) (if (ty_int_ref_scalar_64 ty)) (x64_or ty y x)) @@ -371,23 +323,12 @@ (if (ty_int_ref_scalar_64 ty)) (x64_xor ty x y)) -;; Xor with a memory operand. +;; Handle xor with lhs immediates/sinkable loads in addition to the automatic +;; handling of the rhs above. -(rule 1 (lower (has_type ty (bxor x (sinkable_load y)))) +(rule 1 (lower (has_type ty (bxor (sinkable_load x) y))) (if (ty_int_ref_scalar_64 ty)) - (x64_xor ty x - (sink_load_to_gpr_mem_imm y))) - -(rule 2 (lower (has_type ty (bxor (sinkable_load x) y))) - (if (ty_int_ref_scalar_64 ty)) - (x64_xor ty y - (sink_load_to_gpr_mem_imm x))) - -;; Xor with an immediate. - -(rule 3 (lower (has_type ty (bxor x (simm32_from_value y)))) - (if (ty_int_ref_scalar_64 ty)) - (x64_xor ty x y)) + (x64_xor ty y x)) (rule 4 (lower (has_type ty (bxor (simm32_from_value x) y))) (if (ty_int_ref_scalar_64 ty)) @@ -841,28 +782,15 @@ (rule -5 (lower (has_type (fits_in_64 ty) (imul x y))) (x64_mul ty x y)) -;; Multiply a register and an immediate. - -(rule -3 (lower (has_type (fits_in_64 ty) - (imul x (simm32_from_value y)))) - (x64_mul ty x y)) +;; Handle multiplication where the lhs is an immediate or sinkable load in +;; addition to the automatic rhs handling above. (rule -4 (lower (has_type (fits_in_64 ty) (imul (simm32_from_value x) y))) (x64_mul ty y x)) - -;; Multiply a register and a memory load. - -(rule -2 (lower (has_type (fits_in_64 ty) - (imul x (sinkable_load y)))) - (x64_mul ty - x - (sink_load_to_gpr_mem_imm y))) - -(rule -1 (lower (has_type (fits_in_64 ty) +(rule -3 (lower (has_type (fits_in_64 ty) (imul (sinkable_load x) y))) - (x64_mul ty y - (sink_load_to_gpr_mem_imm x))) + (x64_mul ty y x)) ;; `i128`. @@ -1459,32 +1387,19 @@ (x64_add_with_flags_paired ty a b) (trap_if (CC.B) tc))) -;; Add a register and an immediate. +;; Handle lhs immediates/sinkable loads in addition to the automatic rhs +;; handling of above. (rule 1 (lower (has_type (fits_in_64 ty) - (uadd_overflow_trap a (simm32_from_value b) tc))) - (with_flags - (x64_add_with_flags_paired ty a b) - (trap_if (CC.B) tc))) - -(rule 2 (lower (has_type (fits_in_64 ty) (uadd_overflow_trap (simm32_from_value a) b tc))) (with_flags (x64_add_with_flags_paired ty b a) (trap_if (CC.B) tc))) -;; Add a register and memory. - -(rule 3 (lower (has_type (fits_in_64 ty) - (uadd_overflow_trap a (sinkable_load b) tc))) - (with_flags - (x64_add_with_flags_paired ty a (sink_load_to_gpr_mem_imm b)) - (trap_if (CC.B) tc))) - -(rule 4 (lower (has_type (fits_in_64 ty) +(rule 2 (lower (has_type (fits_in_64 ty) (uadd_overflow_trap (sinkable_load a) b tc))) (with_flags - (x64_add_with_flags_paired ty b (sink_load_to_gpr_mem_imm a)) + (x64_add_with_flags_paired ty b a) (trap_if (CC.B) tc))) ;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -3603,9 +3518,9 @@ ;; Case 3: when presented with `load + scalar_to_vector`, coalesce into a single ;; MOVSS/MOVSD instruction. (rule 2 (lower (scalar_to_vector (and (sinkable_load src) (value_type (ty_32 _))))) - (x64_movss_load (sink_load_to_xmm_mem src))) + (x64_movss_load src)) (rule 3 (lower (scalar_to_vector (and (sinkable_load src) (value_type (ty_64 _))))) - (x64_movsd_load (sink_load_to_xmm_mem src))) + (x64_movsd_load src)) ;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;