s390x: Remove uses of copy_reg (#6253)
* Remove uses of `copy_reg` in s390x lowerings * Update tests * Add one copy back in for smulhi to avoid an inserted move
This commit is contained in:
@@ -408,8 +408,8 @@
|
||||
(y_hi Reg (vec_extract_lane $I64X2 y 0 (zero_reg)))
|
||||
(y_lo Reg (vec_extract_lane $I64X2 y 1 (zero_reg)))
|
||||
(lo_pair RegPair (umul_wide x_lo y_lo))
|
||||
(res_lo Reg (copy_reg $I64 (regpair_lo lo_pair)))
|
||||
(res_hi_1 Reg (copy_reg $I64 (regpair_hi lo_pair)))
|
||||
(res_lo Reg (regpair_lo lo_pair))
|
||||
(res_hi_1 Reg (regpair_hi lo_pair))
|
||||
(res_hi_2 Reg (mul_reg $I64 x_lo y_hi))
|
||||
(res_hi_3 Reg (mul_reg $I64 x_hi y_lo))
|
||||
(res_hi Reg (add_reg $I64 res_hi_3 (add_reg $I64 res_hi_2 res_hi_1))))
|
||||
@@ -435,7 +435,7 @@
|
||||
;; Multiply high part unsigned, 64-bit types. (Uses umul_wide.)
|
||||
(rule (lower (has_type $I64 (umulhi x y)))
|
||||
(let ((pair RegPair (umul_wide x y)))
|
||||
(copy_reg $I64 (regpair_hi pair))))
|
||||
(regpair_hi pair)))
|
||||
|
||||
;; Multiply high part unsigned, vector types with 8-, 16-, or 32-bit elements.
|
||||
(rule (lower (has_type $I8X16 (umulhi x y))) (vec_umulhi $I8X16 x y))
|
||||
@@ -447,10 +447,10 @@
|
||||
(rule (lower (has_type $I64X2 (umulhi x y)))
|
||||
(let ((pair_0 RegPair (umul_wide (vec_extract_lane $I64X2 x 0 (zero_reg))
|
||||
(vec_extract_lane $I64X2 y 0 (zero_reg))))
|
||||
(res_0 Reg (copy_reg $I64 (regpair_hi pair_0)))
|
||||
(res_0 Reg (regpair_hi pair_0))
|
||||
(pair_1 RegPair (umul_wide (vec_extract_lane $I64X2 x 1 (zero_reg))
|
||||
(vec_extract_lane $I64X2 y 1 (zero_reg))))
|
||||
(res_1 Reg (copy_reg $I64 (regpair_hi pair_1))))
|
||||
(res_1 Reg (regpair_hi pair_1)))
|
||||
(mov_to_vec128 $I64X2 res_0 res_1)))
|
||||
|
||||
|
||||
@@ -473,7 +473,7 @@
|
||||
;; Multiply high part signed, 64-bit types. (Uses smul_wide.)
|
||||
(rule (lower (has_type $I64 (smulhi x y)))
|
||||
(let ((pair RegPair (smul_wide x y)))
|
||||
(copy_reg $I64 (regpair_hi pair))))
|
||||
(regpair_hi pair)))
|
||||
|
||||
;; Multiply high part signed, vector types with 8-, 16-, or 32-bit elements.
|
||||
(rule (lower (has_type $I8X16 (smulhi x y))) (vec_smulhi $I8X16 x y))
|
||||
@@ -488,7 +488,7 @@
|
||||
(res_0 Reg (copy_reg $I64 (regpair_hi pair_0)))
|
||||
(pair_1 RegPair (smul_wide (vec_extract_lane $I64X2 x 1 (zero_reg))
|
||||
(vec_extract_lane $I64X2 y 1 (zero_reg))))
|
||||
(res_1 Reg (copy_reg $I64 (regpair_hi pair_1))))
|
||||
(res_1 Reg (regpair_hi pair_1)))
|
||||
(mov_to_vec128 $I64X2 res_0 res_1)))
|
||||
|
||||
|
||||
@@ -547,7 +547,7 @@
|
||||
;; Emit the actual divide instruction.
|
||||
(pair RegPair (udivmod ext_ty ext_x ext_y)))
|
||||
;; The quotient can be found in the low half of the result.
|
||||
(copy_reg ty (regpair_lo pair))))
|
||||
(regpair_lo pair)))
|
||||
|
||||
;; Implement `urem`. Same as `udiv`, but finds the remainder in
|
||||
;; the high half of the result register pair instead.
|
||||
@@ -557,7 +557,7 @@
|
||||
(ext_y Reg (put_in_reg_zext32 y))
|
||||
(ext_ty Type (ty_ext32 ty))
|
||||
(pair RegPair (udivmod ext_ty ext_x ext_y)))
|
||||
(copy_reg ty (regpair_hi pair))))
|
||||
(regpair_hi pair)))
|
||||
|
||||
|
||||
;;;; Rules for `sdiv` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@@ -591,7 +591,7 @@
|
||||
;; Emit the actual divide instruction.
|
||||
(pair RegPair (sdivmod ext_ty ext_x ext_y)))
|
||||
;; The quotient can be found in the low half of the result.
|
||||
(copy_reg ty (regpair_lo pair))))
|
||||
(regpair_lo pair)))
|
||||
|
||||
;; Implement `srem`. Same as `sdiv`, but finds the remainder in
|
||||
;; the high half of the result register pair instead. Also, handle
|
||||
@@ -603,7 +603,7 @@
|
||||
(ext_ty Type (ty_ext32 ty))
|
||||
(checked_x Reg (maybe_avoid_srem_overflow OFcheck ext_ty ext_x ext_y))
|
||||
(pair RegPair (sdivmod ext_ty checked_x ext_y)))
|
||||
(copy_reg ty (regpair_hi pair))))
|
||||
(regpair_hi pair)))
|
||||
|
||||
;; Determine whether we need to perform an integer-overflow check.
|
||||
;;
|
||||
@@ -1190,7 +1190,7 @@
|
||||
(rule (clz_offset $I8 x) (add_simm16 $I8 x -56))
|
||||
(rule (clz_offset $I16 x) (add_simm16 $I16 x -48))
|
||||
(rule (clz_offset $I32 x) (add_simm16 $I32 x -32))
|
||||
(rule (clz_offset $I64 x) (copy_reg $I64 x))
|
||||
(rule (clz_offset $I64 x) x)
|
||||
|
||||
;; Count leading zeros, via FLOGR on an input zero-extended to 64 bits,
|
||||
;; with the result compensated for the extra bits.
|
||||
|
||||
Reference in New Issue
Block a user