diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 3da3aa66c5..d5d8c572a9 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -4301,6 +4301,10 @@ (convert IntCC CC intcc_to_cc) (convert AtomicRmwOp MachAtomicRmwOp atomic_rmw_op_to_mach_atomic_rmw_op) +(convert SinkableLoad RegMem sink_load) +(convert SinkableLoad GprMemImm sink_load_to_gpr_mem_imm) +(convert SinkableLoad XmmMem sink_load_to_xmm_mem) + (decl reg_to_xmm_mem (Reg) XmmMem) (rule (reg_to_xmm_mem r) (xmm_to_xmm_mem (xmm_new r))) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index d8a89c2af0..2dd9fc1bfe 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -46,29 +46,15 @@ (iadd x y))) (x64_add ty x y)) -;; Add a register and an immediate. +;; The above case handles when the rhs is an immediate or a sinkable load, but +;; additionally add lhs meets these criteria. (rule -4 (lower (has_type (fits_in_64 ty) - (iadd x (simm32_from_value y)))) - (x64_add ty x y)) - -(rule -3 (lower (has_type (fits_in_64 ty) (iadd (simm32_from_value x) y))) (x64_add ty y x)) - -;; Add a register and memory. - -(rule -2 (lower (has_type (fits_in_64 ty) - (iadd x (sinkable_load y)))) - (x64_add ty - x - (sink_load_to_gpr_mem_imm y))) - -(rule -1 (lower (has_type (fits_in_64 ty) +(rule -3 (lower (has_type (fits_in_64 ty) (iadd (sinkable_load x) y))) - (x64_add ty - y - (sink_load_to_gpr_mem_imm x))) + (x64_add ty y x)) ;; SSE. @@ -144,17 +130,6 @@ (isub x y))) (x64_sub ty x y)) -;; Sub a register and an immediate. -(rule -2 (lower (has_type (fits_in_64 ty) - (isub x (simm32_from_value y)))) - (x64_sub ty x y)) - -;; Sub a register and memory. -(rule -1 (lower (has_type (fits_in_64 ty) - (isub x (sinkable_load y)))) - (x64_sub ty x - (sink_load_to_gpr_mem_imm y))) - ;; SSE. (rule (lower (has_type (multi_lane 8 16) @@ -216,26 +191,14 @@ (if (ty_int_ref_scalar_64 ty)) (x64_and ty x y)) -;; And with a memory operand. +;; The above case automatically handles when the rhs is an immediate or a +;; sinkable load, but additionally handle the lhs here. -(rule 1 (lower (has_type ty (band x (sinkable_load y)))) +(rule 1 (lower (has_type ty (band (sinkable_load x) y))) (if (ty_int_ref_scalar_64 ty)) - (x64_and ty x - (sink_load_to_gpr_mem_imm y))) + (x64_and ty y x)) -(rule 2 (lower (has_type ty (band (sinkable_load x) y))) - (if (ty_int_ref_scalar_64 ty)) - (x64_and ty - y - (sink_load_to_gpr_mem_imm x))) - -;; And with an immediate. - -(rule 3 (lower (has_type ty (band x (simm32_from_value y)))) - (if (ty_int_ref_scalar_64 ty)) - (x64_and ty x y)) - -(rule 4 (lower (has_type ty (band (simm32_from_value x) y))) +(rule 2 (lower (has_type ty (band (simm32_from_value x) y))) (if (ty_int_ref_scalar_64 ty)) (x64_and ty y x)) @@ -308,25 +271,14 @@ (if (ty_int_ref_scalar_64 ty)) (x64_or ty x y)) -;; Or with a memory operand. +;; Handle immediates/sinkable loads on the lhs in addition to the automatic +;; handling of the rhs above -(rule 1 (lower (has_type ty (bor x (sinkable_load y)))) +(rule 1 (lower (has_type ty (bor (sinkable_load x) y))) (if (ty_int_ref_scalar_64 ty)) - (x64_or ty x - (sink_load_to_gpr_mem_imm y))) + (x64_or ty y x)) -(rule 2 (lower (has_type ty (bor (sinkable_load x) y))) - (if (ty_int_ref_scalar_64 ty)) - (x64_or ty y - (sink_load_to_gpr_mem_imm x))) - -;; Or with an immediate. - -(rule 3 (lower (has_type ty (bor x (simm32_from_value y)))) - (if (ty_int_ref_scalar_64 ty)) - (x64_or ty x y)) - -(rule 4 (lower (has_type ty (bor (simm32_from_value x) y))) +(rule 2 (lower (has_type ty (bor (simm32_from_value x) y))) (if (ty_int_ref_scalar_64 ty)) (x64_or ty y x)) @@ -371,23 +323,12 @@ (if (ty_int_ref_scalar_64 ty)) (x64_xor ty x y)) -;; Xor with a memory operand. +;; Handle xor with lhs immediates/sinkable loads in addition to the automatic +;; handling of the rhs above. -(rule 1 (lower (has_type ty (bxor x (sinkable_load y)))) +(rule 1 (lower (has_type ty (bxor (sinkable_load x) y))) (if (ty_int_ref_scalar_64 ty)) - (x64_xor ty x - (sink_load_to_gpr_mem_imm y))) - -(rule 2 (lower (has_type ty (bxor (sinkable_load x) y))) - (if (ty_int_ref_scalar_64 ty)) - (x64_xor ty y - (sink_load_to_gpr_mem_imm x))) - -;; Xor with an immediate. - -(rule 3 (lower (has_type ty (bxor x (simm32_from_value y)))) - (if (ty_int_ref_scalar_64 ty)) - (x64_xor ty x y)) + (x64_xor ty y x)) (rule 4 (lower (has_type ty (bxor (simm32_from_value x) y))) (if (ty_int_ref_scalar_64 ty)) @@ -841,28 +782,15 @@ (rule -5 (lower (has_type (fits_in_64 ty) (imul x y))) (x64_mul ty x y)) -;; Multiply a register and an immediate. - -(rule -3 (lower (has_type (fits_in_64 ty) - (imul x (simm32_from_value y)))) - (x64_mul ty x y)) +;; Handle multiplication where the lhs is an immediate or sinkable load in +;; addition to the automatic rhs handling above. (rule -4 (lower (has_type (fits_in_64 ty) (imul (simm32_from_value x) y))) (x64_mul ty y x)) - -;; Multiply a register and a memory load. - -(rule -2 (lower (has_type (fits_in_64 ty) - (imul x (sinkable_load y)))) - (x64_mul ty - x - (sink_load_to_gpr_mem_imm y))) - -(rule -1 (lower (has_type (fits_in_64 ty) +(rule -3 (lower (has_type (fits_in_64 ty) (imul (sinkable_load x) y))) - (x64_mul ty y - (sink_load_to_gpr_mem_imm x))) + (x64_mul ty y x)) ;; `i128`. @@ -1459,32 +1387,19 @@ (x64_add_with_flags_paired ty a b) (trap_if (CC.B) tc))) -;; Add a register and an immediate. +;; Handle lhs immediates/sinkable loads in addition to the automatic rhs +;; handling of above. (rule 1 (lower (has_type (fits_in_64 ty) - (uadd_overflow_trap a (simm32_from_value b) tc))) - (with_flags - (x64_add_with_flags_paired ty a b) - (trap_if (CC.B) tc))) - -(rule 2 (lower (has_type (fits_in_64 ty) (uadd_overflow_trap (simm32_from_value a) b tc))) (with_flags (x64_add_with_flags_paired ty b a) (trap_if (CC.B) tc))) -;; Add a register and memory. - -(rule 3 (lower (has_type (fits_in_64 ty) - (uadd_overflow_trap a (sinkable_load b) tc))) - (with_flags - (x64_add_with_flags_paired ty a (sink_load_to_gpr_mem_imm b)) - (trap_if (CC.B) tc))) - -(rule 4 (lower (has_type (fits_in_64 ty) +(rule 2 (lower (has_type (fits_in_64 ty) (uadd_overflow_trap (sinkable_load a) b tc))) (with_flags - (x64_add_with_flags_paired ty b (sink_load_to_gpr_mem_imm a)) + (x64_add_with_flags_paired ty b a) (trap_if (CC.B) tc))) ;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -3603,9 +3518,9 @@ ;; Case 3: when presented with `load + scalar_to_vector`, coalesce into a single ;; MOVSS/MOVSD instruction. (rule 2 (lower (scalar_to_vector (and (sinkable_load src) (value_type (ty_32 _))))) - (x64_movss_load (sink_load_to_xmm_mem src))) + (x64_movss_load src)) (rule 3 (lower (scalar_to_vector (and (sinkable_load src) (value_type (ty_64 _))))) - (x64_movsd_load (sink_load_to_xmm_mem src))) + (x64_movsd_load src)) ;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;