From d9753fac2b6c2a6671bbfbc33caab9c4f5255249 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Fri, 21 Oct 2022 09:22:16 -0700 Subject: [PATCH] Remove uses of `reg_mod` from s390x (#5073) Remove uses of reg_mod from the s390x backend. This required moving away from using r0/r1 as the result registers from a few different pseudo instructions, standardizing instead on r2/r3. That change was necessary as regalloc2 will not correctly allocate registers that aren't listed in the allocatable set, which r0/r1 are not. Co-authored-by: Ulrich Weigand Co-authored-by: Chris Fallin --- cranelift/codegen/src/isa/s390x/inst.isle | 264 +++++++--------- cranelift/codegen/src/isa/s390x/inst/emit.rs | 217 ++++++++++--- .../codegen/src/isa/s390x/inst/emit_tests.rs | 151 ++++++++- cranelift/codegen/src/isa/s390x/inst/mod.rs | 298 ++++++++++++------ cranelift/codegen/src/isa/s390x/inst/regs.rs | 61 ++++ cranelift/codegen/src/isa/s390x/lower.isle | 28 +- cranelift/codegen/src/isa/s390x/lower/isle.rs | 33 +- cranelift/codegen/src/machinst/reg.rs | 12 - .../filetests/isa/s390x/arithmetic.clif | 295 +++++++++-------- .../isa/s390x/atomic_cas-little.clif | 16 +- .../filetests/filetests/isa/s390x/bitops.clif | 58 ++-- .../filetests/isa/s390x/condops.clif | 6 +- .../filetests/isa/s390x/conversions.clif | 48 +-- .../filetests/isa/s390x/div-traps.clif | 281 ++++++++++------- .../isa/s390x/floating-point-arch13.clif | 78 ++--- .../filetests/isa/s390x/floating-point.clif | 84 +++-- .../filetests/filetests/isa/s390x/icmp.clif | 8 +- .../filetests/isa/s390x/tls_elf.clif | 6 +- .../filetests/isa/s390x/vec-arithmetic.clif | 29 +- 19 files changed, 1215 insertions(+), 758 deletions(-) diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index 8bd725f878..d2f289bc45 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -83,35 +83,42 @@ (imm UImm32Shifted)) ;; A multiply operation with two register sources and a register pair destination. - ;; FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. (SMulWide + (rd WritableRegPair) (rn Reg) (rm Reg)) ;; A multiply operation with an in/out register pair, and an extra register source. ;; Only the lower half of the register pair is used as input. - ;; FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. (UMulWide + (rd WritableRegPair) + (ri Reg) (rn Reg)) ;; A divide operation with an in/out register pair, and an extra register source. ;; Only the lower half of the register pair is used as input. - ;; FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. (SDivMod32 + (rd WritableRegPair) + (ri Reg) (rn Reg)) (SDivMod64 + (rd WritableRegPair) + (ri Reg) (rn Reg)) ;; A divide operation with an in/out register pair, and an extra register source. - ;; FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. (UDivMod32 + (rd WritableRegPair) + (ri RegPair) (rn Reg)) (UDivMod64 + (rd WritableRegPair) + (ri RegPair) (rn Reg)) ;; A FLOGR operation with a register source and a register pair destination. - ;; FIXME The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. (Flogr + (rd WritableRegPair) (rn Reg)) ;; A shift instruction with a register source, a register destination, @@ -128,6 +135,7 @@ (RxSBG (op RxSBGOp) (rd WritableReg) + (ri Reg) (rn Reg) (start_bit u8) (end_bit u8) @@ -220,12 +228,14 @@ ;; A 32-bit atomic compare-and-swap operation. (AtomicCas32 (rd WritableReg) + (ri Reg) (rn Reg) (mem MemArg)) ;; A 64-bit atomic compare-and-swap operation. (AtomicCas64 (rd WritableReg) + (ri Reg) (rn Reg) (mem MemArg)) @@ -428,11 +438,13 @@ ;; A 64-bit insert instruction with a shifted 16-bit immediate. (Insert64UImm16Shifted (rd WritableReg) + (ri Reg) (imm UImm16Shifted)) ;; A 64-bit insert instruction with a shifted 32-bit immediate. (Insert64UImm32Shifted (rd WritableReg) + (ri Reg) (imm UImm32Shifted)) ;; Load 32-bit access register into GPR. @@ -444,6 +456,7 @@ ;; (Identical operation to LoadAR, but considers rd to be use/def.) (InsertAR (rd WritableReg) + (ri Reg) (ar u8)) ;; A sign- or zero-extend operation. @@ -454,28 +467,33 @@ (from_bits u8) (to_bits u8)) - ;; A 32-bit conditional move instruction. + ;; A 32-bit conditional move instruction. `ri` is the value that's used if + ;; the conditional is true, `rm` is used otherwise. (CMov32 (rd WritableReg) (cond Cond) + (ri Reg) (rm Reg)) ;; A 64-bit conditional move instruction. (CMov64 (rd WritableReg) (cond Cond) + (ri Reg) (rm Reg)) ;; A 32-bit conditional move instruction with a 16-bit signed immediate. (CMov32SImm16 (rd WritableReg) (cond Cond) + (ri Reg) (imm i16)) ;; A 64-bit conditional move instruction with a 16-bit signed immediate. (CMov64SImm16 (rd WritableReg) (cond Cond) + (ri Reg) (imm i16)) ;; A 32-bit FPU move possibly implemented as vector instruction. @@ -492,12 +510,14 @@ (FpuCMov32 (rd WritableReg) (cond Cond) + (ri Reg) (rm Reg)) ;; A 64-bit conditional move FPU instruction, possibly as vector instruction. (FpuCMov64 (rd WritableReg) (cond Cond) + (ri Reg) (rm Reg)) ;; 1-op FPU instruction implemented as vector instruction with the W bit. @@ -737,6 +757,7 @@ (VecCMov (rd WritableReg) (cond Cond) + (ri Reg) (rm Reg)) ;; A 128-bit move instruction from two GPRs to a VR. @@ -779,6 +800,7 @@ (VecLoadLane (size u32) (rd WritableReg) + (ri Reg) (mem MemArg) (lane_imm u8)) @@ -794,6 +816,7 @@ (VecLoadLaneRev (size u32) (rd WritableReg) + (ri Reg) (mem MemArg) (lane_imm u8)) @@ -825,6 +848,7 @@ (VecInsertLane (size u32) (rd WritableReg) + (ri Reg) (rn Reg) (lane_imm u8) (lane_reg Reg)) @@ -851,6 +875,7 @@ (VecInsertLaneImm (size u32) (rd WritableReg) + (ri Reg) (imm i16) (lane_imm u8)) @@ -1948,47 +1973,44 @@ ;; Helpers for register pairs ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; A writable register pair. -(type WritableRegPair (enum (WritableRegPair (hi WritableReg) (lo WritableReg)))) +(type WritableRegPair (primitive WritableRegPair)) + +;; Construct a WritableRegPair from two registers. +(decl writable_regpair (WritableReg WritableReg) WritableRegPair) +(extern constructor writable_regpair writable_regpair) ;; Allocate a writable register pair. -;; FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. (decl temp_writable_regpair () WritableRegPair) (rule (temp_writable_regpair) - (WritableRegPair.WritableRegPair (writable_gpr 0) (writable_gpr 1))) - -;; Allocate a writable register pair and initialize it as a copy of the input. -;; FIXME: Because there is only a single hard-coded regpair, the copy is a no-op. -(decl copy_writable_regpair (RegPair) WritableRegPair) -(rule (copy_writable_regpair _src) (temp_writable_regpair)) + (writable_regpair (temp_writable_reg $I64) (temp_writable_reg $I64))) ;; Retrieve the high word of the writable register pair. (decl writable_regpair_hi (WritableRegPair) WritableReg) -(rule (writable_regpair_hi (WritableRegPair.WritableRegPair hi _)) hi) +(extern constructor writable_regpair_hi writable_regpair_hi) ;; Retrieve the low word of the writable register pair. (decl writable_regpair_lo (WritableRegPair) WritableReg) -(rule (writable_regpair_lo (WritableRegPair.WritableRegPair _ lo)) lo) +(extern constructor writable_regpair_lo writable_regpair_lo) ;; A (read-only) register pair. -(type RegPair (enum (RegPair (hi Reg) (lo Reg)))) +(type RegPair (primitive RegPair)) ;; Construct a register pair from a writable register pair. (decl writable_regpair_to_regpair (WritableRegPair) RegPair) -(rule (writable_regpair_to_regpair (WritableRegPair.WritableRegPair hi lo)) - (RegPair.RegPair hi lo)) +(rule (writable_regpair_to_regpair w) + (regpair (writable_regpair_hi w) (writable_regpair_lo w))) -;; Uninitalized register pair that can be used for piecewise initialization. -(decl uninitialized_regpair () RegPair) -(rule (uninitialized_regpair) - (temp_writable_regpair)) +;; Construct a regpair from two registers. +(decl regpair (Reg Reg) RegPair) +(extern constructor regpair regpair) ;; Retrieve the high word of the register pair. (decl regpair_hi (RegPair) Reg) -(rule (regpair_hi (RegPair.RegPair hi _)) hi) +(extern constructor regpair_hi regpair_hi) ;; Retrieve the low word of the register pair. (decl regpair_lo (RegPair) Reg) -(rule (regpair_lo (RegPair.RegPair _ lo)) lo) +(extern constructor regpair_lo regpair_lo) ;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -2060,43 +2082,42 @@ (decl smul_wide (Reg Reg) RegPair) (rule (smul_wide src1 src2) (let ((dst WritableRegPair (temp_writable_regpair)) - (_ Unit (emit (MInst.SMulWide src1 src2)))) + (_ Unit (emit (MInst.SMulWide dst src1 src2)))) dst)) ;; Helper for emitting `MInst.UMulWide` instructions. (decl umul_wide (Reg Reg) RegPair) (rule (umul_wide src1 src2) (let ((dst WritableRegPair (temp_writable_regpair)) - (_ Unit (emit (MInst.Mov64 (writable_regpair_lo dst) src2))) - (_ Unit (emit (MInst.UMulWide src1)))) + (_ Unit (emit (MInst.UMulWide dst src1 src2)))) dst)) ;; Helper for emitting `MInst.SDivMod32` instructions. -(decl sdivmod32 (RegPair Reg) RegPair) +(decl sdivmod32 (Reg Reg) RegPair) (rule (sdivmod32 src1 src2) - (let ((dst WritableRegPair (copy_writable_regpair src1)) - (_ Unit (emit (MInst.SDivMod32 src2)))) + (let ((dst WritableRegPair (temp_writable_regpair)) + (_ Unit (emit (MInst.SDivMod32 dst src1 src2)))) dst)) ;; Helper for emitting `MInst.SDivMod64` instructions. -(decl sdivmod64 (RegPair Reg) RegPair) +(decl sdivmod64 (Reg Reg) RegPair) (rule (sdivmod64 src1 src2) - (let ((dst WritableRegPair (copy_writable_regpair src1)) - (_ Unit (emit (MInst.SDivMod64 src2)))) + (let ((dst WritableRegPair (temp_writable_regpair)) + (_ Unit (emit (MInst.SDivMod64 dst src1 src2)))) dst)) ;; Helper for emitting `MInst.UDivMod32` instructions. (decl udivmod32 (RegPair Reg) RegPair) (rule (udivmod32 src1 src2) - (let ((dst WritableRegPair (copy_writable_regpair src1)) - (_ Unit (emit (MInst.UDivMod32 src2)))) + (let ((dst WritableRegPair (temp_writable_regpair)) + (_ Unit (emit (MInst.UDivMod32 dst src1 src2)))) dst)) ;; Helper for emitting `MInst.UDivMod64` instructions. (decl udivmod64 (RegPair Reg) RegPair) (rule (udivmod64 src1 src2) - (let ((dst WritableRegPair (copy_writable_regpair src1)) - (_ Unit (emit (MInst.UDivMod64 src2)))) + (let ((dst WritableRegPair (temp_writable_regpair)) + (_ Unit (emit (MInst.UDivMod64 dst src1 src2)))) dst)) ;; Helper for emitting `MInst.ShiftRR` instructions. @@ -2154,15 +2175,15 @@ ;; Helper for emitting `MInst.AtomicCas32` instructions. (decl atomic_cas32 (Reg Reg MemArg) Reg) (rule (atomic_cas32 src1 src2 mem) - (let ((dst WritableReg (copy_writable_reg $I32 src1)) - (_ Unit (emit (MInst.AtomicCas32 dst src2 mem)))) + (let ((dst WritableReg (temp_writable_reg $I32)) + (_ Unit (emit (MInst.AtomicCas32 dst src1 src2 mem)))) dst)) ;; Helper for emitting `MInst.AtomicCas64` instructions. (decl atomic_cas64 (Reg Reg MemArg) Reg) (rule (atomic_cas64 src1 src2 mem) - (let ((dst WritableReg (copy_writable_reg $I64 src1)) - (_ Unit (emit (MInst.AtomicCas64 dst src2 mem)))) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AtomicCas64 dst src1 src2 mem)))) dst)) ;; Helper for emitting `MInst.Fence` instructions. @@ -2275,8 +2296,8 @@ ;; Helper for emitting `MInst.InsertAR` instructions. (decl insert_ar (Reg u8) Reg) (rule (insert_ar src ar) - (let ((dst WritableReg (copy_writable_reg $I64 src)) - (_ Unit (emit (MInst.InsertAR dst ar)))) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.InsertAR dst src ar)))) dst)) ;; Helper for emitting `MInst.FpuRR` instructions. @@ -2554,8 +2575,8 @@ ;; Helper for emitting `MInst.VecLoadLane` instructions. (decl vec_load_lane (Type Reg MemArg u8) Reg) (rule (vec_load_lane ty @ (multi_lane size _) src addr lane_imm) - (let ((dst WritableReg (copy_writable_reg ty src)) - (_ Unit (emit (MInst.VecLoadLane size dst addr lane_imm)))) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.VecLoadLane size dst src addr lane_imm)))) dst)) ;; Helper for emitting `MInst.VecLoadLaneUndef` instructions. @@ -2568,8 +2589,8 @@ ;; Helper for emitting `MInst.VecLoadLaneRev` instructions. (decl vec_load_lane_rev (Type Reg MemArg u8) Reg) (rule (vec_load_lane_rev ty @ (multi_lane size _) src addr lane_imm) - (let ((dst WritableReg (copy_writable_reg ty src)) - (_ Unit (emit (MInst.VecLoadLaneRev size dst addr lane_imm)))) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.VecLoadLaneRev size dst src addr lane_imm)))) dst)) ;; Helper for emitting `MInst.VecLoadLaneRevUndef` instructions. @@ -2592,8 +2613,8 @@ ;; Helper for emitting `MInst.VecInsertLane` instructions. (decl vec_insert_lane (Type Reg Reg u8 Reg) Reg) (rule (vec_insert_lane ty @ (multi_lane size _) src1 src2 lane_imm lane_reg) - (let ((dst WritableReg (copy_writable_reg ty src1)) - (_ Unit (emit (MInst.VecInsertLane size dst src2 lane_imm lane_reg)))) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.VecInsertLane size dst src1 src2 lane_imm lane_reg)))) dst)) ;; Helper for emitting `MInst.VecInsertLaneUndef` instructions. @@ -2613,8 +2634,8 @@ ;; Helper for emitting `MInst.VecInsertLaneImm` instructions. (decl vec_insert_lane_imm (Type Reg i16 u8) Reg) (rule (vec_insert_lane_imm ty @ (multi_lane size _) src imm lane_imm) - (let ((dst WritableReg (copy_writable_reg ty src)) - (_ Unit (emit (MInst.VecInsertLaneImm size dst imm lane_imm)))) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.VecInsertLaneImm size dst src imm lane_imm)))) dst)) ;; Helper for emitting `MInst.VecReplicateLane` instructions. @@ -2731,7 +2752,7 @@ (rule (push_rxsbg ib op (real_reg dst) r src start_bit end_bit rotate_amt) (if (same_reg dst r)) (let ((_ Unit (inst_builder_push ib - (MInst.RxSBG op dst src start_bit end_bit rotate_amt)))) + (MInst.RxSBG op dst r src start_bit end_bit rotate_amt)))) dst)) ;; Push a `MInst.UnaryRR` instruction to a sequence. @@ -2743,13 +2764,13 @@ ;; Push a `MInst.AtomicCas32` instruction to a sequence. (decl push_atomic_cas32 (VecMInstBuilder WritableReg Reg MemArg) Reg) (rule (push_atomic_cas32 ib (real_reg dst_src1) src2 mem) - (let ((_ Unit (inst_builder_push ib (MInst.AtomicCas32 dst_src1 src2 mem)))) + (let ((_ Unit (inst_builder_push ib (MInst.AtomicCas32 dst_src1 dst_src1 src2 mem)))) dst_src1)) ;; Push a `MInst.AtomicCas64` instruction to a sequence. (decl push_atomic_cas64 (VecMInstBuilder WritableReg Reg MemArg) Reg) (rule (push_atomic_cas64 ib (real_reg dst_src1) src2 mem) - (let ((_ Unit (inst_builder_push ib (MInst.AtomicCas64 dst_src1 src2 mem)))) + (let ((_ Unit (inst_builder_push ib (MInst.AtomicCas64 dst_src1 dst_src1 src2 mem)))) dst_src1)) ;; Push instructions to break out of the loop if condition is met. @@ -3003,11 +3024,11 @@ ;; Insertion, value fits in UImm16Shifted (rule 1 (emit_insert_imm dst (uimm16shifted_from_u64 n)) - (emit (MInst.Insert64UImm16Shifted dst n))) + (emit (MInst.Insert64UImm16Shifted dst dst n))) ;; Insertion, value fits in UImm32Shifted (rule (emit_insert_imm dst (uimm32shifted_from_u64 n)) - (emit (MInst.Insert64UImm32Shifted dst n))) + (emit (MInst.Insert64UImm32Shifted dst dst n))) ;; 32-bit floating-point type, any value. Loaded from literal pool. ;; TODO: use LZER to load 0.0 @@ -3063,22 +3084,6 @@ (rule 0 (vec_imm_splat (ty_vec128 ty) n) (vec_load_const_replicate ty n)) -;; Place an immediate into the low half of a register pair. -;; The high half is taken from the input. -(decl imm_regpair_lo (Type u64 RegPair) RegPair) -(rule (imm_regpair_lo ty n regpair) - (let ((dst WritableRegPair (copy_writable_regpair regpair)) - (_ Unit (emit_imm ty (writable_regpair_lo dst) n))) - dst)) - -;; Place an immediate into the high half of a register pair. -;; The low half is taken from the input. -(decl imm_regpair_hi (Type u64 RegPair) RegPair) -(rule (imm_regpair_hi ty n regpair) - (let ((dst WritableRegPair (copy_writable_regpair regpair)) - (_ Unit (emit_imm ty (writable_regpair_hi dst) n))) - dst)) - ;; Helpers for generating extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -3289,100 +3294,43 @@ (rule 2 (put_in_reg_sext64 val @ (value_type (gpr64_ty ty))) val) -;; Place `Value` into the low half of a register pair, zero-extending -;; to 32 bits if smaller. The high half is taken from the input. -(decl put_in_regpair_lo_zext32 (Value RegPair) RegPair) -(rule (put_in_regpair_lo_zext32 val regpair) - (let ((dst WritableRegPair (copy_writable_regpair regpair)) - (_ Unit (emit_put_in_reg_zext32 (writable_regpair_lo dst) val))) - dst)) - -;; Place `Value` into the low half of a register pair, sign-extending -;; to 32 bits if smaller. The high half is taken from the input. -(decl put_in_regpair_lo_sext32 (Value RegPair) RegPair) -(rule (put_in_regpair_lo_sext32 val regpair) - (let ((dst WritableRegPair (copy_writable_regpair regpair)) - (_ Unit (emit_put_in_reg_sext32 (writable_regpair_lo dst) val))) - dst)) - -;; Place `Value` into the low half of a register pair, zero-extending -;; to 64 bits if smaller. The high half is taken from the input. -(decl put_in_regpair_lo_zext64 (Value RegPair) RegPair) -(rule (put_in_regpair_lo_zext64 val regpair) - (let ((dst WritableRegPair (copy_writable_regpair regpair)) - (_ Unit (emit_put_in_reg_zext64 (writable_regpair_lo dst) val))) - dst)) - -;; Place `Value` into the low half of a register pair, sign-extending -;; to 64 bits if smaller. The high half is taken from the input. -(decl put_in_regpair_lo_sext64 (Value RegPair) RegPair) -(rule (put_in_regpair_lo_sext64 val regpair) - (let ((dst WritableRegPair (copy_writable_regpair regpair)) - (_ Unit (emit_put_in_reg_sext64 (writable_regpair_lo dst) val))) - dst)) - ;; Helpers for generating conditional moves ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Conditionally move immediate value into destination register. (Non-SSA form.) -(decl emit_cmov_imm (Type WritableReg Cond i16) ConsumesFlags) -(rule (emit_cmov_imm (gpr32_ty _ty) dst cond imm) - (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov32SImm16 dst cond imm) +(decl emit_cmov_imm (Type WritableReg Cond i16 Reg) ConsumesFlags) +(rule (emit_cmov_imm (gpr32_ty _ty) dst cond imm reg_false) + (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov32SImm16 dst cond reg_false imm) dst)) -(rule 1 (emit_cmov_imm (gpr64_ty _ty) dst cond imm) - (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov64SImm16 dst cond imm) +(rule 1 (emit_cmov_imm (gpr64_ty _ty) dst cond imm reg_false) + (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov64SImm16 dst cond reg_false imm) dst)) ;; Conditionally select between immediate and source register. (decl cmov_imm (Type Cond i16 Reg) ConsumesFlags) (rule (cmov_imm ty cond imm src) - (let ((dst WritableReg (copy_writable_reg ty src))) - (emit_cmov_imm ty dst cond imm))) - -;; Conditionally modify the low word of a register pair. -;; This cannot be ConsumesFlags since the return value is not a register. -(decl cmov_imm_regpair_lo (Type ProducesFlags Cond i16 RegPair) RegPair) -(rule (cmov_imm_regpair_lo ty producer cond imm src) - (let ((dst WritableRegPair (copy_writable_regpair src)) - (consumer ConsumesFlags (emit_cmov_imm ty (writable_regpair_lo dst) cond imm)) - (_ Reg (with_flags_reg producer consumer))) - dst)) - -;; Conditionally modify the high word of a register pair. -;; This cannot be ConsumesFlags since the return value is not a register. -(decl cmov_imm_regpair_hi (Type ProducesFlags Cond i16 RegPair) RegPair) -(rule (cmov_imm_regpair_hi ty producer cond imm src) - (let ((dst WritableRegPair (copy_writable_regpair src)) - (consumer ConsumesFlags (emit_cmov_imm ty (writable_regpair_hi dst) cond imm)) - (_ Reg (with_flags_reg producer consumer))) - dst)) + (let ((dst WritableReg (temp_writable_reg ty))) + (emit_cmov_imm ty dst cond imm src))) ;; Conditionally select between two source registers. (Non-SSA form.) -(decl emit_cmov_reg (Type WritableReg Cond Reg) ConsumesFlags) -(rule 1 (emit_cmov_reg (gpr32_ty _ty) dst cond src) - (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov32 dst cond src) +(decl emit_cmov_reg (Type WritableReg Cond Reg Reg) ConsumesFlags) +(rule 1 (emit_cmov_reg (gpr32_ty _ty) dst cond else src) + (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov32 dst cond else src) dst)) -(rule 2 (emit_cmov_reg (gpr64_ty _ty) dst cond src) - (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov64 dst cond src) +(rule 2 (emit_cmov_reg (gpr64_ty _ty) dst cond else src) + (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov64 dst cond else src) dst)) -(rule 3 (emit_cmov_reg $F32 dst cond src) - (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.FpuCMov32 dst cond src) +(rule 3 (emit_cmov_reg $F32 dst cond else src) + (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.FpuCMov32 dst cond else src) dst)) -(rule 3 (emit_cmov_reg $F64 dst cond src) - (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.FpuCMov64 dst cond src) +(rule 3 (emit_cmov_reg $F64 dst cond else src) + (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.FpuCMov64 dst cond else src) dst)) -(rule 0 (emit_cmov_reg (vr128_ty ty) dst cond src) - (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.VecCMov dst cond src) +(rule 0 (emit_cmov_reg (vr128_ty ty) dst cond else src) + (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.VecCMov dst cond else src) dst)) -;; Conditionally select between two source registers. -(decl cmov_reg (Type Cond Reg Reg) ConsumesFlags) -(rule (cmov_reg ty cond src1 src2) - (let ((dst WritableReg (copy_writable_reg ty src2))) - (emit_cmov_reg ty dst cond src1))) - - ;; Helpers for generating conditional traps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl trap_if (ProducesFlags Cond TrapCode) Reg) @@ -3461,17 +3409,17 @@ (rule (select_bool_reg ty (ProducesBool.ProducesBool producer cond) reg_true reg_false) (let ((dst WritableReg (temp_writable_reg ty)) (_ Unit (emit_producer producer)) - (_ Unit (emit_mov ty dst reg_false)) - (_ Unit (emit_consumer (emit_cmov_reg ty dst cond reg_true)))) + (_ Unit (emit_consumer (emit_cmov_reg ty dst cond reg_false reg_true)))) dst)) ;; Use a boolean condition to select between two immediate values. (decl select_bool_imm (Type ProducesBool i16 u64) Reg) (rule (select_bool_imm ty (ProducesBool.ProducesBool producer cond) imm_true imm_false) (let ((dst WritableReg (temp_writable_reg ty)) + (reg_false WritableReg (temp_writable_reg ty)) (_ Unit (emit_producer producer)) - (_ Unit (emit_imm ty dst imm_false)) - (_ Unit (emit_consumer (emit_cmov_imm ty dst cond imm_true)))) + (_ Unit (emit_imm ty reg_false imm_false)) + (_ Unit (emit_consumer (emit_cmov_imm ty dst cond imm_true reg_false)))) dst)) ;; Lower a boolean condition to the values 1/0. This rule is only used in the @@ -3857,7 +3805,7 @@ ;; The flogr instruction returns 64 for zero input by default. (rule (clz_reg 64 x) (let ((dst WritableRegPair (temp_writable_regpair)) - (_ Unit (emit (MInst.Flogr x)))) + (_ Unit (emit (MInst.Flogr dst x)))) dst)) ;; If another zero return value was requested, we need to override the flogr @@ -3866,10 +3814,10 @@ ;; conditional move, and because flogr returns a register pair. (rule -1 (clz_reg zeroval x) (let ((dst WritableRegPair (temp_writable_regpair)) - (_ Unit (emit (MInst.Flogr x))) - (_ Unit (emit (MInst.CMov64SImm16 (writable_regpair_hi dst) - (intcc_as_cond (IntCC.Equal)) zeroval)))) - dst)) + (_ Unit (emit (MInst.Flogr dst x))) + (hi WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.CMov64SImm16 hi (intcc_as_cond (IntCC.Equal)) (regpair_hi dst) zeroval)))) + (regpair hi (regpair_lo dst)))) ;; Vector count leading zeros. (decl vecop_clz (Type) VecUnaryOp) @@ -4186,7 +4134,7 @@ ;; Helpers for generating `sdivmod` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl sdivmod (Type RegPair Reg) RegPair) +(decl sdivmod (Type Reg Reg) RegPair) (rule (sdivmod $I32 x y) (sdivmod32 x y)) (rule (sdivmod $I64 x y) (sdivmod64 x y)) diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index 4bfe07e5b4..a6db0f019a 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -11,6 +11,39 @@ use crate::trace; use core::convert::TryFrom; use regalloc2::Allocation; +/// Debug macro for testing that a regpair is valid: that the high register is even, and the low +/// register is one higher than the high register. +macro_rules! debug_assert_valid_regpair { + ($hi:expr, $lo:expr) => { + if cfg!(debug_assertions) { + match ($hi.to_real_reg(), $lo.to_real_reg()) { + (Some(hi), Some(lo)) => { + assert!( + hi.hw_enc() % 2 == 0, + "High register is not even: {}", + show_reg($hi) + ); + assert_eq!( + hi.hw_enc() + 1, + lo.hw_enc(), + "Low register is not valid: {}, {}", + show_reg($hi), + show_reg($lo) + ); + } + + _ => { + panic!( + "Expected real registers for {} {}", + show_reg($hi), + show_reg($lo) + ); + } + } + } + }; +} + /// Type(s) of memory instructions available for mem_finalize. pub struct MemInstType { /// True if 12-bit unsigned displacement is supported. @@ -1654,52 +1687,87 @@ impl MachInstEmit for Inst { put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits)); } - &Inst::SMulWide { rn, rm } => { + &Inst::SMulWide { rd, rn, rm } => { let rn = allocs.next(rn); let rm = allocs.next(rm); + let rd1 = allocs.next_writable(rd.hi); + let rd2 = allocs.next_writable(rd.lo); + debug_assert_valid_regpair!(rd1.to_reg(), rd2.to_reg()); let opcode = 0xb9ec; // MGRK - put(sink, &enc_rrf_ab(opcode, gpr(0), rn, rm, 0)); + put(sink, &enc_rrf_ab(opcode, rd1.to_reg(), rn, rm, 0)); } - &Inst::UMulWide { rn } => { + &Inst::UMulWide { rd, ri, rn } => { let rn = allocs.next(rn); + let rd1 = allocs.next_writable(rd.hi); + let rd2 = allocs.next_writable(rd.lo); + debug_assert_valid_regpair!(rd1.to_reg(), rd2.to_reg()); + let ri = allocs.next(ri); + debug_assert_eq!(rd2.to_reg(), ri); let opcode = 0xb986; // MLGR - put(sink, &enc_rre(opcode, gpr(0), rn)); + put(sink, &enc_rre(opcode, rd1.to_reg(), rn)); } - &Inst::SDivMod32 { rn } => { + &Inst::SDivMod32 { rd, ri, rn } => { let rn = allocs.next(rn); + let rd1 = allocs.next_writable(rd.hi); + let rd2 = allocs.next_writable(rd.lo); + debug_assert_valid_regpair!(rd1.to_reg(), rd2.to_reg()); + let ri = allocs.next(ri); + debug_assert_eq!(rd2.to_reg(), ri); let opcode = 0xb91d; // DSGFR let trap_code = TrapCode::IntegerDivisionByZero; - put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), trap_code); + put_with_trap(sink, &enc_rre(opcode, rd1.to_reg(), rn), trap_code); } - &Inst::SDivMod64 { rn } => { + &Inst::SDivMod64 { rd, ri, rn } => { let rn = allocs.next(rn); + let rd1 = allocs.next_writable(rd.hi); + let rd2 = allocs.next_writable(rd.lo); + debug_assert_valid_regpair!(rd1.to_reg(), rd2.to_reg()); + let ri = allocs.next(ri); + debug_assert_eq!(rd2.to_reg(), ri); let opcode = 0xb90d; // DSGR let trap_code = TrapCode::IntegerDivisionByZero; - put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), trap_code); + put_with_trap(sink, &enc_rre(opcode, rd1.to_reg(), rn), trap_code); } - &Inst::UDivMod32 { rn } => { + &Inst::UDivMod32 { rd, ri, rn } => { let rn = allocs.next(rn); + let rd1 = allocs.next_writable(rd.hi); + let rd2 = allocs.next_writable(rd.lo); + debug_assert_valid_regpair!(rd1.to_reg(), rd2.to_reg()); + let ri1 = allocs.next(ri.hi); + let ri2 = allocs.next(ri.lo); + debug_assert_eq!(rd1.to_reg(), ri1); + debug_assert_eq!(rd2.to_reg(), ri2); let opcode = 0xb997; // DLR let trap_code = TrapCode::IntegerDivisionByZero; - put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), trap_code); + put_with_trap(sink, &enc_rre(opcode, rd1.to_reg(), rn), trap_code); } - &Inst::UDivMod64 { rn } => { + &Inst::UDivMod64 { rd, ri, rn } => { let rn = allocs.next(rn); + let rd1 = allocs.next_writable(rd.hi); + let rd2 = allocs.next_writable(rd.lo); + debug_assert_valid_regpair!(rd1.to_reg(), rd2.to_reg()); + let ri1 = allocs.next(ri.hi); + let ri2 = allocs.next(ri.lo); + debug_assert_eq!(rd1.to_reg(), ri1); + debug_assert_eq!(rd2.to_reg(), ri2); let opcode = 0xb987; // DLGR let trap_code = TrapCode::IntegerDivisionByZero; - put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), trap_code); + put_with_trap(sink, &enc_rre(opcode, rd1.to_reg(), rn), trap_code); } - &Inst::Flogr { rn } => { + &Inst::Flogr { rd, rn } => { let rn = allocs.next(rn); + let rd1 = allocs.next_writable(rd.hi); + let rd2 = allocs.next_writable(rd.lo); + debug_assert_valid_regpair!(rd1.to_reg(), rd2.to_reg()); let opcode = 0xb983; // FLOGR - put(sink, &enc_rre(opcode, gpr(0), rn)); + put(sink, &enc_rre(opcode, rd1.to_reg(), rn)); } &Inst::ShiftRR { @@ -1732,12 +1800,15 @@ impl MachInstEmit for Inst { &Inst::RxSBG { op, rd, + ri, rn, start_bit, end_bit, rotate_amt, } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rn = allocs.next(rn); let opcode = match op { @@ -2069,8 +2140,21 @@ impl MachInstEmit for Inst { sink.bind_label(done_label); } &Inst::CondBreak { .. } => unreachable!(), // Only valid inside a Loop. - &Inst::AtomicCas32 { rd, rn, ref mem } | &Inst::AtomicCas64 { rd, rn, ref mem } => { + &Inst::AtomicCas32 { + rd, + ri, + rn, + ref mem, + } + | &Inst::AtomicCas64 { + rd, + ri, + rn, + ref mem, + } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rn = allocs.next(rn); let mem = mem.with_allocs(&mut allocs); @@ -2280,22 +2364,28 @@ impl MachInstEmit for Inst { let opcode = 0xc01; // LGFI put(sink, &enc_ril_a(opcode, rd.to_reg(), imm as u32)); } - &Inst::CMov32 { rd, cond, rm } => { + &Inst::CMov32 { rd, cond, ri, rm } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rm = allocs.next(rm); let opcode = 0xb9f2; // LOCR put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rm, cond.bits(), 0)); } - &Inst::CMov64 { rd, cond, rm } => { + &Inst::CMov64 { rd, cond, ri, rm } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rm = allocs.next(rm); let opcode = 0xb9e2; // LOCGR put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rm, cond.bits(), 0)); } - &Inst::CMov32SImm16 { rd, cond, imm } => { + &Inst::CMov32SImm16 { rd, cond, ri, imm } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let opcode = 0xec42; // LOCHI put( @@ -2303,8 +2393,10 @@ impl MachInstEmit for Inst { &enc_rie_g(opcode, rd.to_reg(), imm as u16, cond.bits()), ); } - &Inst::CMov64SImm16 { rd, cond, imm } => { + &Inst::CMov64SImm16 { rd, cond, ri, imm } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let opcode = 0xec46; // LOCGHI put( @@ -2334,8 +2426,10 @@ impl MachInstEmit for Inst { }; put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits)); } - &Inst::Insert64UImm16Shifted { rd, imm } => { + &Inst::Insert64UImm16Shifted { rd, ri, imm } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let opcode = match imm.shift { 0 => 0xa53, // IILL @@ -2346,8 +2440,10 @@ impl MachInstEmit for Inst { }; put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits)); } - &Inst::Insert64UImm32Shifted { rd, imm } => { + &Inst::Insert64UImm32Shifted { rd, ri, imm } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let opcode = match imm.shift { 0 => 0xc09, // IILF @@ -2356,11 +2452,20 @@ impl MachInstEmit for Inst { }; put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits)); } - &Inst::LoadAR { rd, ar } | &Inst::InsertAR { rd, ar } => { + &Inst::LoadAR { rd, ar } => { let rd = allocs.next_writable(rd); let opcode = 0xb24f; // EAR put(sink, &enc_rre(opcode, rd.to_reg(), gpr(ar))); } + + &Inst::InsertAR { rd, ri, ar } => { + let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); + + let opcode = 0xb24f; // EAR + put(sink, &enc_rre(opcode, rd.to_reg(), gpr(ar))); + } &Inst::LoadSymbolReloc { rd, ref symbol_reloc, @@ -2407,8 +2512,10 @@ impl MachInstEmit for Inst { put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, 0, 0, 0)); } } - &Inst::FpuCMov32 { rd, cond, rm } => { + &Inst::FpuCMov32 { rd, cond, ri, rm } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rm = allocs.next(rm); if is_fpr(rd.to_reg()) && is_fpr(rm) { @@ -2423,8 +2530,10 @@ impl MachInstEmit for Inst { put(sink, &enc_vrr_a(opcode, rd.to_reg(), rm, 0, 0, 0)); } } - &Inst::FpuCMov64 { rd, cond, rm } => { + &Inst::FpuCMov64 { rd, cond, ri, rm } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rm = allocs.next(rm); if is_fpr(rd.to_reg()) && is_fpr(rm) { @@ -3010,8 +3119,10 @@ impl MachInstEmit for Inst { let opcode = 0xe756; // VLR put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, 0, 0, 0)); } - &Inst::VecCMov { rd, cond, rm } => { + &Inst::VecCMov { rd, cond, ri, rm } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rm = allocs.next(rm); let opcode = 0xa74; // BCR @@ -3097,20 +3208,49 @@ impl MachInstEmit for Inst { }; put(sink, &enc_vri_a(opcode, rd.to_reg(), imm as u16, m3)); } - &Inst::VecLoadLane { size, rd, - ref mem, - lane_imm, - } - | &Inst::VecLoadLaneUndef { - size, - rd, + ri, ref mem, lane_imm, } | &Inst::VecLoadLaneRev { + size, + rd, + ri, + ref mem, + lane_imm, + } => { + let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); + let mem = mem.with_allocs(&mut allocs); + + let opcode_vrx = match (self, size) { + (&Inst::VecLoadLane { .. }, 8) => 0xe700, // VLEB + (&Inst::VecLoadLane { .. }, 16) => 0xe701, // VLEH + (&Inst::VecLoadLane { .. }, 32) => 0xe703, // VLEF + (&Inst::VecLoadLane { .. }, 64) => 0xe702, // VLEG + (&Inst::VecLoadLaneRev { .. }, 16) => 0xe601, // VLEBRH + (&Inst::VecLoadLaneRev { .. }, 32) => 0xe603, // VLEBRF + (&Inst::VecLoadLaneRev { .. }, 64) => 0xe602, // VLEBRG + _ => unreachable!(), + }; + + let rd = rd.to_reg(); + mem_vrx_emit( + rd, + &mem, + opcode_vrx, + lane_imm.into(), + true, + sink, + emit_info, + state, + ); + } + &Inst::VecLoadLaneUndef { size, rd, ref mem, @@ -3126,17 +3266,10 @@ impl MachInstEmit for Inst { let mem = mem.with_allocs(&mut allocs); let (opcode_vrx, opcode_rx, opcode_rxy) = match (self, size) { - (&Inst::VecLoadLane { .. }, 8) => (0xe700, None, None), // VLEB - (&Inst::VecLoadLane { .. }, 16) => (0xe701, None, None), // VLEH - (&Inst::VecLoadLane { .. }, 32) => (0xe703, None, None), // VLEF - (&Inst::VecLoadLane { .. }, 64) => (0xe702, None, None), // VLEG (&Inst::VecLoadLaneUndef { .. }, 8) => (0xe700, None, None), // VLEB (&Inst::VecLoadLaneUndef { .. }, 16) => (0xe701, None, None), // VLEH (&Inst::VecLoadLaneUndef { .. }, 32) => (0xe703, Some(0x78), Some(0xed64)), // VLEF, LE(Y) (&Inst::VecLoadLaneUndef { .. }, 64) => (0xe702, Some(0x68), Some(0xed65)), // VLEG, LD(Y) - (&Inst::VecLoadLaneRev { .. }, 16) => (0xe601, None, None), // VLEBRH - (&Inst::VecLoadLaneRev { .. }, 32) => (0xe603, None, None), // VLEBRF - (&Inst::VecLoadLaneRev { .. }, 64) => (0xe602, None, None), // VLEBRG (&Inst::VecLoadLaneRevUndef { .. }, 16) => (0xe601, None, None), // VLEBRH (&Inst::VecLoadLaneRevUndef { .. }, 32) => (0xe603, None, None), // VLEBRF (&Inst::VecLoadLaneRevUndef { .. }, 64) => (0xe602, None, None), // VLEBRG @@ -3207,11 +3340,14 @@ impl MachInstEmit for Inst { &Inst::VecInsertLane { size, rd, + ri, rn, lane_imm, lane_reg, } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let rn = allocs.next(rn); let lane_reg = allocs.next(lane_reg); @@ -3288,10 +3424,13 @@ impl MachInstEmit for Inst { &Inst::VecInsertLaneImm { size, rd, + ri, imm, lane_imm, } => { let rd = allocs.next_writable(rd); + let ri = allocs.next(ri); + debug_assert_eq!(rd.to_reg(), ri); let opcode = match size { 8 => 0xe740, // VLEIB diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index 3abdac0638..27b0ba7dfe 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -2208,21 +2208,78 @@ fn test_s390x_binemit() { "clgite %r7, 65535", )); + let w_regpair = WritableRegPair { + hi: writable_gpr(2), + lo: writable_gpr(3), + }; + let regpair = RegPair { + hi: gpr(2), + lo: gpr(3), + }; + insns.push(( Inst::SMulWide { + rd: w_regpair, rn: gpr(5), rm: gpr(6), }, - "B9EC6005", - "mgrk %r0, %r5, %r6", + "B9EC6025", + "mgrk %r2, %r5, %r6", + )); + insns.push(( + Inst::UMulWide { + rd: w_regpair, + ri: gpr(3), + rn: gpr(5), + }, + "B9860025", + "mlgr %r2, %r5", + )); + insns.push(( + Inst::SDivMod32 { + rd: w_regpair, + ri: gpr(3), + rn: gpr(5), + }, + "B91D0025", + "dsgfr %r2, %r5", + )); + insns.push(( + Inst::SDivMod64 { + rd: w_regpair, + ri: gpr(3), + rn: gpr(5), + }, + "B90D0025", + "dsgr %r2, %r5", + )); + insns.push(( + Inst::UDivMod32 { + rd: w_regpair, + ri: regpair, + rn: gpr(5), + }, + "B9970025", + "dlr %r2, %r5", + )); + insns.push(( + Inst::UDivMod64 { + rd: w_regpair, + ri: regpair, + rn: gpr(5), + }, + "B9870025", + "dlgr %r2, %r5", )); - insns.push((Inst::UMulWide { rn: gpr(5) }, "B9860005", "mlgr %r0, %r5")); - insns.push((Inst::SDivMod32 { rn: gpr(5) }, "B91D0005", "dsgfr %r0, %r5")); - insns.push((Inst::SDivMod64 { rn: gpr(5) }, "B90D0005", "dsgr %r0, %r5")); - insns.push((Inst::UDivMod32 { rn: gpr(5) }, "B9970005", "dlr %r0, %r5")); - insns.push((Inst::UDivMod64 { rn: gpr(5) }, "B9870005", "dlgr %r0, %r5")); - insns.push((Inst::Flogr { rn: gpr(5) }, "B9830005", "flogr %r0, %r5")); + insns.push(( + Inst::Flogr { + rd: w_regpair, + rn: gpr(5), + }, + "B9830025", + "flogr %r2, %r5", + )); insns.push(( Inst::ShiftRR { @@ -2581,6 +2638,7 @@ fn test_s390x_binemit() { Inst::RxSBG { op: RxSBGOp::Insert, rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), start_bit: 8, end_bit: 32, @@ -2593,6 +2651,7 @@ fn test_s390x_binemit() { Inst::RxSBG { op: RxSBGOp::And, rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), start_bit: 8, end_bit: 32, @@ -2605,6 +2664,7 @@ fn test_s390x_binemit() { Inst::RxSBG { op: RxSBGOp::Or, rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), start_bit: 8, end_bit: 32, @@ -2617,6 +2677,7 @@ fn test_s390x_binemit() { Inst::RxSBG { op: RxSBGOp::Xor, rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), start_bit: 8, end_bit: 32, @@ -3265,6 +3326,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas32 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD12 { base: zero_reg(), @@ -3279,6 +3341,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas32 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD12 { base: zero_reg(), @@ -3293,6 +3356,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas32 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD20 { base: zero_reg(), @@ -3307,6 +3371,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas32 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD20 { base: zero_reg(), @@ -3321,6 +3386,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas32 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD12 { base: gpr(6), @@ -3335,6 +3401,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas32 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD12 { base: gpr(6), @@ -3349,6 +3416,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas32 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD20 { base: gpr(6), @@ -3363,6 +3431,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas32 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD20 { base: gpr(6), @@ -3377,6 +3446,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas64 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD20 { base: zero_reg(), @@ -3391,6 +3461,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas64 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD20 { base: zero_reg(), @@ -3405,6 +3476,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas64 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD20 { base: gpr(6), @@ -3419,6 +3491,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AtomicCas64 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD20 { base: gpr(6), @@ -6451,6 +6524,7 @@ fn test_s390x_binemit() { insns.push(( Inst::Insert64UImm16Shifted { rd: writable_gpr(8), + ri: gpr(8), imm: UImm16Shifted::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), }, "A583FFFF", @@ -6459,6 +6533,7 @@ fn test_s390x_binemit() { insns.push(( Inst::Insert64UImm16Shifted { rd: writable_gpr(8), + ri: gpr(8), imm: UImm16Shifted::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), }, "A582FFFF", @@ -6467,6 +6542,7 @@ fn test_s390x_binemit() { insns.push(( Inst::Insert64UImm16Shifted { rd: writable_gpr(8), + ri: gpr(8), imm: UImm16Shifted::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), }, "A581FFFF", @@ -6475,6 +6551,7 @@ fn test_s390x_binemit() { insns.push(( Inst::Insert64UImm16Shifted { rd: writable_gpr(8), + ri: gpr(8), imm: UImm16Shifted::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), }, "A580FFFF", @@ -6483,6 +6560,7 @@ fn test_s390x_binemit() { insns.push(( Inst::Insert64UImm32Shifted { rd: writable_gpr(8), + ri: gpr(8), imm: UImm32Shifted::maybe_from_u64(0x0000_0000_ffff_ffff).unwrap(), }, "C089FFFFFFFF", @@ -6491,6 +6569,7 @@ fn test_s390x_binemit() { insns.push(( Inst::Insert64UImm32Shifted { rd: writable_gpr(8), + ri: gpr(8), imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), }, "C088FFFFFFFF", @@ -6501,6 +6580,7 @@ fn test_s390x_binemit() { Inst::CMov32 { rd: writable_gpr(8), cond: Cond::from_mask(1), + ri: gpr(8), rm: gpr(9), }, "B9F21089", @@ -6510,6 +6590,7 @@ fn test_s390x_binemit() { Inst::CMov64 { rd: writable_gpr(8), cond: Cond::from_mask(1), + ri: gpr(8), rm: gpr(9), }, "B9E21089", @@ -6521,6 +6602,7 @@ fn test_s390x_binemit() { rd: writable_gpr(8), cond: Cond::from_mask(1), imm: -32768, + ri: gpr(8), }, "EC8180000042", "lochio %r8, -32768", @@ -6530,6 +6612,7 @@ fn test_s390x_binemit() { rd: writable_gpr(8), cond: Cond::from_mask(1), imm: 32767, + ri: gpr(8), }, "EC817FFF0042", "lochio %r8, 32767", @@ -6539,6 +6622,7 @@ fn test_s390x_binemit() { rd: writable_gpr(8), cond: Cond::from_mask(1), imm: -32768, + ri: gpr(8), }, "EC8180000046", "locghio %r8, -32768", @@ -6548,6 +6632,7 @@ fn test_s390x_binemit() { rd: writable_gpr(8), cond: Cond::from_mask(1), imm: 32767, + ri: gpr(8), }, "EC817FFF0046", "locghio %r8, 32767", @@ -6996,6 +7081,7 @@ fn test_s390x_binemit() { }, Inst::AtomicCas32 { rd: writable_gpr(4), + ri: gpr(4), rn: gpr(5), mem: MemArg::BXD12 { base: gpr(6), @@ -7046,6 +7132,7 @@ fn test_s390x_binemit() { insns.push(( Inst::FpuCMov32 { rd: writable_vr(8), + ri: vr(8), rm: vr(4), cond: Cond::from_mask(1), }, @@ -7055,6 +7142,7 @@ fn test_s390x_binemit() { insns.push(( Inst::FpuCMov32 { rd: writable_vr(8), + ri: vr(8), rm: vr(20), cond: Cond::from_mask(1), }, @@ -7064,6 +7152,7 @@ fn test_s390x_binemit() { insns.push(( Inst::FpuCMov64 { rd: writable_vr(8), + ri: vr(8), rm: vr(4), cond: Cond::from_mask(1), }, @@ -7073,6 +7162,7 @@ fn test_s390x_binemit() { insns.push(( Inst::FpuCMov64 { rd: writable_vr(8), + ri: vr(8), rm: vr(20), cond: Cond::from_mask(1), }, @@ -10851,6 +10941,7 @@ fn test_s390x_binemit() { insns.push(( Inst::VecCMov { rd: writable_vr(8), + ri: vr(8), rm: vr(20), cond: Cond::from_mask(1), }, @@ -10982,6 +11073,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 8, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -10997,6 +11089,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 8, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -11012,6 +11105,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 8, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -11027,6 +11121,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 8, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -11042,6 +11137,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 16, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -11057,6 +11153,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 16, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -11072,6 +11169,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 16, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -11087,6 +11185,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 16, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -11102,6 +11201,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 32, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -11117,6 +11217,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 32, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -11132,6 +11233,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 32, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -11147,6 +11249,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 32, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -11162,6 +11265,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 64, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -11177,6 +11281,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 64, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -11192,6 +11297,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 64, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -11207,6 +11313,7 @@ fn test_s390x_binemit() { Inst::VecLoadLane { size: 64, rd: writable_vr(17), + ri: vr(17), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -12062,6 +12169,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 16, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -12077,6 +12185,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 16, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -12092,6 +12201,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 16, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -12107,6 +12217,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 16, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -12122,6 +12233,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 32, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -12137,6 +12249,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 32, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -12152,6 +12265,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 32, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -12167,6 +12281,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 32, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -12182,6 +12297,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 64, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -12197,6 +12313,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 64, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -12212,6 +12329,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 64, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -12227,6 +12345,7 @@ fn test_s390x_binemit() { Inst::VecLoadLaneRev { size: 64, rd: writable_vr(1), + ri: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -12783,6 +12902,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 8, rd: writable_vr(8), + ri: vr(8), rn: gpr(4), lane_imm: 0, lane_reg: zero_reg(), @@ -12794,6 +12914,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 8, rd: writable_vr(8), + ri: vr(8), rn: gpr(4), lane_imm: 255, lane_reg: zero_reg(), @@ -12805,6 +12926,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 8, rd: writable_vr(24), + ri: vr(24), rn: gpr(4), lane_imm: 0, lane_reg: gpr(3), @@ -12816,6 +12938,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 16, rd: writable_vr(8), + ri: vr(8), rn: gpr(4), lane_imm: 0, lane_reg: zero_reg(), @@ -12827,6 +12950,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 16, rd: writable_vr(8), + ri: vr(8), rn: gpr(4), lane_imm: 255, lane_reg: zero_reg(), @@ -12838,6 +12962,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 16, rd: writable_vr(24), + ri: vr(24), rn: gpr(4), lane_imm: 0, lane_reg: gpr(3), @@ -12849,6 +12974,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 32, rd: writable_vr(8), + ri: vr(8), rn: gpr(4), lane_imm: 0, lane_reg: zero_reg(), @@ -12860,6 +12986,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 32, rd: writable_vr(8), + ri: vr(8), rn: gpr(4), lane_imm: 255, lane_reg: zero_reg(), @@ -12871,6 +12998,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 32, rd: writable_vr(24), + ri: vr(24), rn: gpr(4), lane_imm: 0, lane_reg: gpr(3), @@ -12882,6 +13010,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 64, rd: writable_vr(8), + ri: vr(8), rn: gpr(4), lane_imm: 0, lane_reg: zero_reg(), @@ -12893,6 +13022,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 64, rd: writable_vr(8), + ri: vr(8), rn: gpr(4), lane_imm: 255, lane_reg: zero_reg(), @@ -12904,6 +13034,7 @@ fn test_s390x_binemit() { Inst::VecInsertLane { size: 64, rd: writable_vr(24), + ri: vr(24), rn: gpr(4), lane_imm: 0, lane_reg: gpr(3), @@ -13168,6 +13299,7 @@ fn test_s390x_binemit() { Inst::VecInsertLaneImm { size: 8, rd: writable_vr(20), + ri: vr(20), imm: 0x1234, lane_imm: 15, }, @@ -13178,6 +13310,7 @@ fn test_s390x_binemit() { Inst::VecInsertLaneImm { size: 16, rd: writable_vr(20), + ri: vr(20), imm: 0x1234, lane_imm: 7, }, @@ -13188,6 +13321,7 @@ fn test_s390x_binemit() { Inst::VecInsertLaneImm { size: 32, rd: writable_vr(20), + ri: vr(20), imm: 0x1234, lane_imm: 3, }, @@ -13198,6 +13332,7 @@ fn test_s390x_binemit() { Inst::VecInsertLaneImm { size: 64, rd: writable_vr(20), + ri: vr(20), imm: 0x1234, lane_imm: 1, }, diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 08d4b85efd..cf5c34a65f 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -68,6 +68,29 @@ fn inst_size_test() { assert_eq!(32, std::mem::size_of::()); } +/// A register pair. Enum so it can be destructured in ISLE. +#[derive(Clone, Copy, Debug)] +pub struct RegPair { + pub hi: Reg, + pub lo: Reg, +} + +/// A writable register pair. Enum so it can be destructured in ISLE. +#[derive(Clone, Copy, Debug)] +pub struct WritableRegPair { + pub hi: Writable, + pub lo: Writable, +} + +impl WritableRegPair { + pub fn to_regpair(&self) -> RegPair { + RegPair { + hi: self.hi.to_reg(), + lo: self.lo.to_reg(), + } + } +} + /// Supported instruction sets #[allow(non_camel_case_types)] #[derive(Debug)] @@ -342,10 +365,18 @@ impl Inst { if let Some(imm) = UImm16Shifted::maybe_from_u64(lo) { // 16-bit shifted immediate - insts.push(Inst::Insert64UImm16Shifted { rd, imm }); + insts.push(Inst::Insert64UImm16Shifted { + rd, + ri: rd.to_reg(), + imm, + }); } else if let Some(imm) = UImm32Shifted::maybe_from_u64(lo) { // 32-bit shifted immediate - insts.push(Inst::Insert64UImm32Shifted { rd, imm }); + insts.push(Inst::Insert64UImm32Shifted { + rd, + ri: rd.to_reg(), + imm, + }); } else { unreachable!(); } @@ -508,31 +539,37 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_reuse_def(rd, 1); collector.reg_use(ri); } - &Inst::SMulWide { rn, rm, .. } => { + &Inst::SMulWide { rd, rn, rm } => { collector.reg_use(rn); collector.reg_use(rm); - collector.reg_def(writable_gpr(0)); - collector.reg_def(writable_gpr(1)); + // FIXME: The pair is hard-coded as %r2/%r3 because regalloc cannot handle pairs. If + // that changes, all the hard-coded uses of %r2/%r3 can be changed. + collector.reg_fixed_def(rd.hi, gpr(2)); + collector.reg_fixed_def(rd.lo, gpr(3)); } - &Inst::UMulWide { rn, .. } => { + &Inst::UMulWide { rd, ri, rn } => { collector.reg_use(rn); - collector.reg_def(writable_gpr(0)); - collector.reg_mod(writable_gpr(1)); + collector.reg_fixed_def(rd.hi, gpr(2)); + collector.reg_fixed_def(rd.lo, gpr(3)); + collector.reg_fixed_use(ri, gpr(3)); } - &Inst::SDivMod32 { rn, .. } | &Inst::SDivMod64 { rn, .. } => { + &Inst::SDivMod32 { rd, ri, rn } | &Inst::SDivMod64 { rd, ri, rn } => { collector.reg_use(rn); - collector.reg_def(writable_gpr(0)); - collector.reg_mod(writable_gpr(1)); + collector.reg_fixed_def(rd.hi, gpr(2)); + collector.reg_fixed_def(rd.lo, gpr(3)); + collector.reg_fixed_use(ri, gpr(3)); } - &Inst::UDivMod32 { rn, .. } | &Inst::UDivMod64 { rn, .. } => { + &Inst::UDivMod32 { rd, ri, rn } | &Inst::UDivMod64 { rd, ri, rn } => { collector.reg_use(rn); - collector.reg_mod(writable_gpr(0)); - collector.reg_mod(writable_gpr(1)); + collector.reg_fixed_def(rd.hi, gpr(2)); + collector.reg_fixed_def(rd.lo, gpr(3)); + collector.reg_fixed_use(ri.hi, gpr(2)); + collector.reg_fixed_use(ri.lo, gpr(3)); } - &Inst::Flogr { rn, .. } => { + &Inst::Flogr { rd, rn } => { collector.reg_use(rn); - collector.reg_def(writable_gpr(0)); - collector.reg_def(writable_gpr(1)); + collector.reg_fixed_def(rd.hi, gpr(2)); + collector.reg_fixed_def(rd.lo, gpr(3)); } &Inst::ShiftRR { rd, rn, shift_reg, .. @@ -543,8 +580,9 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_use(shift_reg); } } - &Inst::RxSBG { rd, rn, .. } => { - collector.reg_mod(rd); + &Inst::RxSBG { rd, ri, rn, .. } => { + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); collector.reg_use(rn); } &Inst::RxSBGTest { rd, rn, .. } => { @@ -590,12 +628,21 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC memarg_operands(mem, collector); } &Inst::AtomicCas32 { - rd, rn, ref mem, .. + rd, + ri, + rn, + ref mem, + .. } | &Inst::AtomicCas64 { - rd, rn, ref mem, .. + rd, + ri, + rn, + ref mem, + .. } => { - collector.reg_mod(rd); + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); collector.reg_use(rn); memarg_operands(mem, collector); } @@ -681,28 +728,34 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC | &Inst::Mov64UImm32Shifted { rd, .. } => { collector.reg_def(rd); } - &Inst::CMov32 { rd, rm, .. } | &Inst::CMov64 { rd, rm, .. } => { - collector.reg_mod(rd); + &Inst::CMov32 { rd, ri, rm, .. } | &Inst::CMov64 { rd, ri, rm, .. } => { + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); collector.reg_use(rm); } - &Inst::CMov32SImm16 { rd, .. } | &Inst::CMov64SImm16 { rd, .. } => { - collector.reg_mod(rd); + &Inst::CMov32SImm16 { rd, ri, .. } | &Inst::CMov64SImm16 { rd, ri, .. } => { + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); } - &Inst::Insert64UImm16Shifted { rd, .. } | &Inst::Insert64UImm32Shifted { rd, .. } => { - collector.reg_mod(rd); + &Inst::Insert64UImm16Shifted { rd, ri, .. } + | &Inst::Insert64UImm32Shifted { rd, ri, .. } => { + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); } &Inst::LoadAR { rd, .. } => { collector.reg_def(rd); } - &Inst::InsertAR { rd, .. } => { - collector.reg_mod(rd); + &Inst::InsertAR { rd, ri, .. } => { + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); } &Inst::FpuMove32 { rd, rn } | &Inst::FpuMove64 { rd, rn } => { collector.reg_def(rd); collector.reg_use(rn); } - &Inst::FpuCMov32 { rd, rm, .. } | &Inst::FpuCMov64 { rd, rm, .. } => { - collector.reg_mod(rd); + &Inst::FpuCMov32 { rd, ri, rm, .. } | &Inst::FpuCMov64 { rd, ri, rm, .. } => { + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); collector.reg_use(rm); } &Inst::FpuRR { rd, rn, .. } => { @@ -858,8 +911,9 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_def(rd); collector.reg_use(rn); } - &Inst::VecCMov { rd, rm, .. } => { - collector.reg_mod(rd); + &Inst::VecCMov { rd, ri, rm, .. } => { + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); collector.reg_use(rm); } &Inst::MovToVec128 { rd, rn, rm } => { @@ -880,8 +934,11 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC &Inst::VecImmReplicate { rd, .. } => { collector.reg_def(rd); } - &Inst::VecLoadLane { rd, ref mem, .. } => { - collector.reg_mod(rd); + &Inst::VecLoadLane { + rd, ri, ref mem, .. + } => { + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); memarg_operands(mem, collector); } &Inst::VecLoadLaneUndef { rd, ref mem, .. } => { @@ -900,14 +957,22 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_use(rd); memarg_operands(mem, collector); } - &Inst::VecLoadLaneRev { rd, ref mem, .. } => { - collector.reg_mod(rd); + &Inst::VecLoadLaneRev { + rd, ri, ref mem, .. + } => { + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); memarg_operands(mem, collector); } &Inst::VecInsertLane { - rd, rn, lane_reg, .. + rd, + ri, + rn, + lane_reg, + .. } => { - collector.reg_mod(rd); + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); collector.reg_use(rn); collector.reg_use(lane_reg); } @@ -925,8 +990,9 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_use(rn); collector.reg_use(lane_reg); } - &Inst::VecInsertLaneImm { rd, .. } => { - collector.reg_def(rd); + &Inst::VecInsertLaneImm { rd, ri, .. } => { + collector.reg_reuse_def(rd, 1); + collector.reg_use(ri); } &Inst::VecReplicateLane { rd, rn, .. } => { collector.reg_def(rd); @@ -1470,54 +1536,47 @@ impl Inst { let rd = pretty_print_reg_mod(rd, ri, allocs); format!("{} {}, {}", op, rd, imm.bits) } - &Inst::SMulWide { rn, rm } => { + &Inst::SMulWide { rd, rn, rm } => { let op = "mgrk"; let rn = pretty_print_reg(rn, allocs); let rm = pretty_print_reg(rm, allocs); - let rd = pretty_print_reg(gpr(0), allocs); - let _r1 = allocs.next(gpr(1)); + let rd = pretty_print_regpair(rd.to_regpair(), allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } - &Inst::UMulWide { rn } => { + &Inst::UMulWide { rd, ri, rn } => { let op = "mlgr"; let rn = pretty_print_reg(rn, allocs); - let rd = pretty_print_reg(gpr(0), allocs); - let _r1 = allocs.next(gpr(1)); + let rd = pretty_print_regpair_mod_lo(rd, ri, allocs); format!("{} {}, {}", op, rd, rn) } - &Inst::SDivMod32 { rn, .. } => { + &Inst::SDivMod32 { rd, ri, rn } => { let op = "dsgfr"; let rn = pretty_print_reg(rn, allocs); - let rd = pretty_print_reg(gpr(0), allocs); - let _r1 = allocs.next(gpr(1)); + let rd = pretty_print_regpair_mod_lo(rd, ri, allocs); format!("{} {}, {}", op, rd, rn) } - &Inst::SDivMod64 { rn, .. } => { + &Inst::SDivMod64 { rd, ri, rn } => { let op = "dsgr"; let rn = pretty_print_reg(rn, allocs); - let rd = pretty_print_reg(gpr(0), allocs); - let _r1 = allocs.next(gpr(1)); + let rd = pretty_print_regpair_mod_lo(rd, ri, allocs); format!("{} {}, {}", op, rd, rn) } - &Inst::UDivMod32 { rn, .. } => { + &Inst::UDivMod32 { rd, ri, rn } => { let op = "dlr"; let rn = pretty_print_reg(rn, allocs); - let rd = pretty_print_reg(gpr(0), allocs); - let _r1 = allocs.next(gpr(1)); + let rd = pretty_print_regpair_mod(rd, ri, allocs); format!("{} {}, {}", op, rd, rn) } - &Inst::UDivMod64 { rn, .. } => { + &Inst::UDivMod64 { rd, ri, rn } => { let op = "dlgr"; let rn = pretty_print_reg(rn, allocs); - let rd = pretty_print_reg(gpr(0), allocs); - let _r1 = allocs.next(gpr(1)); + let rd = pretty_print_regpair_mod(rd, ri, allocs); format!("{} {}, {}", op, rd, rn) } - &Inst::Flogr { rn } => { + &Inst::Flogr { rd, rn } => { let op = "flogr"; let rn = pretty_print_reg(rn, allocs); - let rd = pretty_print_reg(gpr(0), allocs); - let _r1 = allocs.next(gpr(1)); + let rd = pretty_print_regpair(rd.to_regpair(), allocs); format!("{} {}, {}", op, rd, rn) } &Inst::ShiftRR { @@ -1549,6 +1608,7 @@ impl Inst { &Inst::RxSBG { op, rd, + ri, rn, start_bit, end_bit, @@ -1560,7 +1620,7 @@ impl Inst { RxSBGOp::Or => "rosbg", RxSBGOp::Xor => "rxsbg", }; - let rd = pretty_print_reg(rd.to_reg(), allocs); + let rd = pretty_print_reg_mod(rd, ri, allocs); let rn = pretty_print_reg(rn, allocs); format!( "{} {}, {}, {}, {}, {}", @@ -1769,14 +1829,25 @@ impl Inst { let mem = mem.pretty_print_default(); format!("{}{} {}, {}, {}", mem_str, op, rd, rn, mem) } - &Inst::AtomicCas32 { rd, rn, ref mem } | &Inst::AtomicCas64 { rd, rn, ref mem } => { + &Inst::AtomicCas32 { + rd, + ri, + rn, + ref mem, + } + | &Inst::AtomicCas64 { + rd, + ri, + rn, + ref mem, + } => { let (opcode_rs, opcode_rsy) = match self { &Inst::AtomicCas32 { .. } => (Some("cs"), Some("csy")), &Inst::AtomicCas64 { .. } => (None, Some("csg")), _ => unreachable!(), }; - let rd = pretty_print_reg(rd.to_reg(), allocs); + let rd = pretty_print_reg_mod(rd, ri, allocs); let rn = pretty_print_reg(rn, allocs); let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show( @@ -2047,8 +2118,8 @@ impl Inst { }; format!("{} {}, {}", op, rd, imm.bits) } - &Inst::Insert64UImm16Shifted { rd, ref imm } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); + &Inst::Insert64UImm16Shifted { rd, ri, ref imm } => { + let rd = pretty_print_reg_mod(rd, ri, allocs); let op = match imm.shift { 0 => "iill", 1 => "iilh", @@ -2058,8 +2129,8 @@ impl Inst { }; format!("{} {}, {}", op, rd, imm.bits) } - &Inst::Insert64UImm32Shifted { rd, ref imm } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); + &Inst::Insert64UImm32Shifted { rd, ri, ref imm } => { + let rd = pretty_print_reg_mod(rd, ri, allocs); let op = match imm.shift { 0 => "iilf", 1 => "iihf", @@ -2067,29 +2138,43 @@ impl Inst { }; format!("{} {}, {}", op, rd, imm.bits) } - &Inst::LoadAR { rd, ar } | &Inst::InsertAR { rd, ar } => { + &Inst::LoadAR { rd, ar } => { let rd = pretty_print_reg(rd.to_reg(), allocs); format!("ear {}, %a{}", rd, ar) } - &Inst::CMov32 { rd, cond, rm } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); + &Inst::InsertAR { rd, ri, ar } => { + let rd = pretty_print_reg_mod(rd, ri, allocs); + format!("ear {}, %a{}", rd, ar) + } + &Inst::CMov32 { rd, cond, ri, rm } => { + let rd = pretty_print_reg_mod(rd, ri, allocs); let rm = pretty_print_reg(rm, allocs); let cond = cond.pretty_print_default(); format!("locr{} {}, {}", cond, rd, rm) } - &Inst::CMov64 { rd, cond, rm } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); + &Inst::CMov64 { rd, cond, ri, rm } => { + let rd = pretty_print_reg_mod(rd, ri, allocs); let rm = pretty_print_reg(rm, allocs); let cond = cond.pretty_print_default(); format!("locgr{} {}, {}", cond, rd, rm) } - &Inst::CMov32SImm16 { rd, cond, ref imm } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); + &Inst::CMov32SImm16 { + rd, + cond, + ri, + ref imm, + } => { + let rd = pretty_print_reg_mod(rd, ri, allocs); let cond = cond.pretty_print_default(); format!("lochi{} {}, {}", cond, rd, imm) } - &Inst::CMov64SImm16 { rd, cond, ref imm } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); + &Inst::CMov64SImm16 { + rd, + cond, + ri, + ref imm, + } => { + let rd = pretty_print_reg_mod(rd, ri, allocs); let cond = cond.pretty_print_default(); format!("locghi{} {}, {}", cond, rd, imm) } @@ -2111,8 +2196,9 @@ impl Inst { format!("vlr {}, {}", rd, rn) } } - &Inst::FpuCMov32 { rd, cond, rm } => { + &Inst::FpuCMov32 { rd, cond, ri, rm } => { let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); + let _ri = allocs.next(ri); let (rm, rm_fpr) = pretty_print_fpr(rm, allocs); if rd_fpr.is_some() && rm_fpr.is_some() { let cond = cond.invert().pretty_print_default(); @@ -2122,8 +2208,9 @@ impl Inst { format!("j{} 10 ; vlr {}, {}", cond, rd, rm) } } - &Inst::FpuCMov64 { rd, cond, rm } => { + &Inst::FpuCMov64 { rd, cond, ri, rm } => { let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); + let _ri = allocs.next(ri); let (rm, rm_fpr) = pretty_print_fpr(rm, allocs); if rd_fpr.is_some() && rm_fpr.is_some() { let cond = cond.invert().pretty_print_default(); @@ -2753,8 +2840,8 @@ impl Inst { let rn = pretty_print_reg(rn, allocs); format!("vlr {}, {}", rd, rn) } - &Inst::VecCMov { rd, cond, rm } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); + &Inst::VecCMov { rd, cond, ri, rm } => { + let rd = pretty_print_reg_mod(rd, ri, allocs); let rm = pretty_print_reg(rm, allocs); let cond = cond.invert().pretty_print_default(); format!("j{} 10 ; vlr {}, {}", cond, rd, rm) @@ -2830,16 +2917,46 @@ impl Inst { &Inst::VecLoadLane { size, rd, + ri, ref mem, lane_imm, } | &Inst::VecLoadLaneRev { size, rd, + ri, ref mem, lane_imm, + } => { + let opcode_vrx = match (self, size) { + (&Inst::VecLoadLane { .. }, 8) => "vleb", + (&Inst::VecLoadLane { .. }, 16) => "vleh", + (&Inst::VecLoadLane { .. }, 32) => "vlef", + (&Inst::VecLoadLane { .. }, 64) => "vleg", + (&Inst::VecLoadLaneRev { .. }, 16) => "vlebrh", + (&Inst::VecLoadLaneRev { .. }, 32) => "vlebrf", + (&Inst::VecLoadLaneRev { .. }, 64) => "vlebrg", + _ => unreachable!(), + }; + + let (rd, _) = pretty_print_fpr(rd.to_reg(), allocs); + let _ri = allocs.next(ri); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); + let mem = mem.pretty_print_default(); + format!("{}{} {}, {}, {}", mem_str, opcode_vrx, rd, mem, lane_imm) } - | &Inst::VecLoadLaneUndef { + &Inst::VecLoadLaneUndef { size, rd, ref mem, @@ -2852,13 +2969,6 @@ impl Inst { lane_imm, } => { let (opcode_vrx, opcode_rx, opcode_rxy) = match (self, size) { - (&Inst::VecLoadLane { .. }, 8) => ("vleb", None, None), - (&Inst::VecLoadLane { .. }, 16) => ("vleh", None, None), - (&Inst::VecLoadLane { .. }, 32) => ("vlef", None, None), - (&Inst::VecLoadLane { .. }, 64) => ("vleg", None, None), - (&Inst::VecLoadLaneRev { .. }, 16) => ("vlebrh", None, None), - (&Inst::VecLoadLaneRev { .. }, 32) => ("vlebrf", None, None), - (&Inst::VecLoadLaneRev { .. }, 64) => ("vlebrg", None, None), (&Inst::VecLoadLaneUndef { .. }, 8) => ("vleb", None, None), (&Inst::VecLoadLaneUndef { .. }, 16) => ("vleh", None, None), (&Inst::VecLoadLaneUndef { .. }, 32) => ("vlef", Some("le"), Some("ley")), @@ -2969,6 +3079,7 @@ impl Inst { &Inst::VecInsertLane { size, rd, + ri, rn, lane_imm, lane_reg, @@ -2980,7 +3091,7 @@ impl Inst { 64 => "vlvgg", _ => unreachable!(), }; - let rd = pretty_print_reg(rd.to_reg(), allocs); + let rd = pretty_print_reg_mod(rd, ri, allocs); let rn = pretty_print_reg(rn, allocs); let lane_reg = if lane_reg != zero_reg() { format!("({})", pretty_print_reg(lane_reg, allocs)) @@ -3048,6 +3159,7 @@ impl Inst { &Inst::VecInsertLaneImm { size, rd, + ri, imm, lane_imm, } => { @@ -3058,7 +3170,7 @@ impl Inst { 64 => "vleig", _ => unreachable!(), }; - let rd = pretty_print_reg(rd.to_reg(), allocs); + let rd = pretty_print_reg_mod(rd, ri, allocs); format!("{} {}, {}, {}", op, rd, imm, lane_imm) } &Inst::VecReplicateLane { diff --git a/cranelift/codegen/src/isa/s390x/inst/regs.rs b/cranelift/codegen/src/isa/s390x/inst/regs.rs index 16cd7a64d3..a5736c6655 100644 --- a/cranelift/codegen/src/isa/s390x/inst/regs.rs +++ b/cranelift/codegen/src/isa/s390x/inst/regs.rs @@ -5,6 +5,7 @@ use regalloc2::MachineEnv; use regalloc2::PReg; use regalloc2::VReg; +use crate::isa::s390x::inst::{RegPair, WritableRegPair}; use crate::machinst::*; use crate::settings; @@ -178,6 +179,24 @@ pub fn pretty_print_reg(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> String show_reg(reg) } +pub fn pretty_print_regpair(pair: RegPair, allocs: &mut AllocationConsumer<'_>) -> String { + let hi = allocs.next(pair.hi); + let lo = allocs.next(pair.lo); + if let Some(hi_reg) = hi.to_real_reg() { + if let Some(lo_reg) = lo.to_real_reg() { + assert!( + hi_reg.hw_enc() + 1 == lo_reg.hw_enc(), + "Invalid regpair: {} {}", + show_reg(hi), + show_reg(lo) + ); + return show_reg(hi); + } + } + + format!("{}/{}", show_reg(hi), show_reg(lo)) +} + pub fn pretty_print_reg_mod( rd: Writable, ri: Reg, @@ -192,6 +211,48 @@ pub fn pretty_print_reg_mod( } } +pub fn pretty_print_regpair_mod( + rd: WritableRegPair, + ri: RegPair, + allocs: &mut AllocationConsumer<'_>, +) -> String { + let rd_hi = allocs.next(rd.hi.to_reg()); + let rd_lo = allocs.next(rd.lo.to_reg()); + let ri_hi = allocs.next(ri.hi); + let ri_lo = allocs.next(ri.lo); + if rd_hi == ri_hi { + show_reg(rd_hi) + } else { + format!( + "{}/{}<-{}/{}", + show_reg(rd_hi), + show_reg(rd_lo), + show_reg(ri_hi), + show_reg(ri_lo) + ) + } +} + +pub fn pretty_print_regpair_mod_lo( + rd: WritableRegPair, + ri: Reg, + allocs: &mut AllocationConsumer<'_>, +) -> String { + let rd_hi = allocs.next(rd.hi.to_reg()); + let rd_lo = allocs.next(rd.lo.to_reg()); + let ri = allocs.next(ri); + if rd_lo == ri { + show_reg(rd_hi) + } else { + format!( + "{}/{}<-_/{}", + show_reg(rd_hi), + show_reg(rd_lo), + show_reg(ri), + ) + } +} + pub fn pretty_print_fpr(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> (String, Option) { let reg = allocs.next(reg); (show_reg(reg), maybe_show_fpr(reg)) diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index e541efb21f..c66c7dec69 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -528,8 +528,8 @@ ;; Load up the dividend, by loading the input (possibly zero- ;; extended) input into the low half of the register pair, ;; and setting the high half to zero. - (ext_x RegPair (put_in_regpair_lo_zext32 x - (imm_regpair_hi (ty_ext32 ty) 0 (uninitialized_regpair)))) + (ext_x RegPair (regpair (imm (ty_ext32 ty) 0) + (put_in_reg_zext32 x))) ;; Load up the divisor, zero-extended if necessary. (ext_y Reg (put_in_reg_zext32 y)) (ext_ty Type (ty_ext32 ty)) @@ -546,8 +546,8 @@ ;; the high half of the result register pair instead. (rule (lower (has_type (fits_in_64 ty) (urem x y))) (let ((DZcheck bool (zero_divisor_check_needed y)) - (ext_x RegPair (put_in_regpair_lo_zext32 x - (imm_regpair_hi ty 0 (uninitialized_regpair)))) + (ext_x RegPair (regpair (imm (ty_ext32 ty) 0) + (put_in_reg_zext32 x))) (ext_y Reg (put_in_reg_zext32 y)) (ext_ty Type (ty_ext32 ty)) (_ Reg (maybe_trap_if_zero_divisor DZcheck ext_ty ext_y)) @@ -600,9 +600,8 @@ ;; explicit division-by-zero and/or integer-overflow checks. (DZcheck bool (zero_divisor_check_needed y)) (OFcheck bool (div_overflow_check_needed y)) - ;; Load up the dividend (sign-extended to 64-bit) into the low - ;; half of a register pair (the high half remains uninitialized). - (ext_x RegPair (put_in_regpair_lo_sext64 x (uninitialized_regpair))) + ;; Load up the dividend (sign-extended to 64-bit) + (ext_x Reg (put_in_reg_sext64 x)) ;; Load up the divisor (sign-extended if necessary). (ext_y Reg (put_in_reg_sext32 y)) (ext_ty Type (ty_ext32 ty)) @@ -621,11 +620,11 @@ (rule (lower (has_type (fits_in_64 ty) (srem x y))) (let ((DZcheck bool (zero_divisor_check_needed y)) (OFcheck bool (div_overflow_check_needed y)) - (ext_x RegPair (put_in_regpair_lo_sext64 x (uninitialized_regpair))) + (ext_x Reg (put_in_reg_sext64 x)) (ext_y Reg (put_in_reg_sext32 y)) (ext_ty Type (ty_ext32 ty)) (_ Reg (maybe_trap_if_zero_divisor DZcheck ext_ty ext_y)) - (checked_x RegPair (maybe_avoid_srem_overflow OFcheck ext_ty ext_x ext_y)) + (checked_x Reg (maybe_avoid_srem_overflow OFcheck ext_ty ext_x ext_y)) (pair RegPair (sdivmod ext_ty checked_x ext_y))) (copy_reg ty (regpair_hi pair)))) @@ -659,12 +658,11 @@ ;; if ((divisor ^ INT_MAX) & dividend) == -1 { trap } ;; ;; instead, using a single conditional trap instruction. -(decl maybe_trap_if_sdiv_overflow (bool Type Type RegPair Reg) Reg) +(decl maybe_trap_if_sdiv_overflow (bool Type Type Reg Reg) Reg) (rule (maybe_trap_if_sdiv_overflow $false ext_ty _ _ _) (invalid_reg)) (rule (maybe_trap_if_sdiv_overflow $true ext_ty ty x y) (let ((int_max Reg (imm ext_ty (int_max ty))) - (reg Reg (and_reg ext_ty (xor_reg ext_ty int_max - (regpair_lo x)) y))) + (reg Reg (and_reg ext_ty (xor_reg ext_ty int_max x) y))) (icmps_simm16_and_trap ext_ty reg -1 (intcc_as_cond (IntCC.Equal)) (trap_code_integer_overflow)))) @@ -688,12 +686,12 @@ ;; (We could in fact avoid executing the divide instruction ;; at all in this case, but that would require introducing ;; control flow.) -(decl maybe_avoid_srem_overflow (bool Type RegPair Reg) RegPair) +(decl maybe_avoid_srem_overflow (bool Type Reg Reg) Reg) (rule (maybe_avoid_srem_overflow $false _ x _) x) (rule (maybe_avoid_srem_overflow $true $I32 x _) x) (rule (maybe_avoid_srem_overflow $true $I64 x y) - (cmov_imm_regpair_lo $I64 (icmps_simm16 $I64 y -1) - (intcc_as_cond (IntCC.Equal)) 0 x)) + (with_flags_reg (icmps_simm16 $I64 y -1) + (cmov_imm $I64 (intcc_as_cond (IntCC.Equal)) 0 x))) ;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index 58f8bdba3e..16ff77a3dc 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -8,7 +8,8 @@ use crate::ir::ExternalName; use crate::isa::s390x::abi::{S390xMachineDeps, REG_SAVE_AREA_SIZE}; use crate::isa::s390x::inst::{ gpr, stack_reg, writable_gpr, zero_reg, CallIndInfo, CallInfo, Cond, Inst as MInst, LaneOrder, - MemArg, MemArgPair, SymbolReloc, UImm12, UImm16Shifted, UImm32Shifted, + MemArg, MemArgPair, RegPair, SymbolReloc, UImm12, UImm16Shifted, UImm32Shifted, + WritableRegPair, }; use crate::isa::s390x::settings::Flags as IsaFlags; use crate::machinst::isle::*; @@ -842,6 +843,36 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> fn preg_stack(&mut self) -> PReg { stack_reg().to_real_reg().unwrap().into() } + + #[inline] + fn writable_regpair(&mut self, hi: WritableReg, lo: WritableReg) -> WritableRegPair { + WritableRegPair { hi, lo } + } + + #[inline] + fn writable_regpair_hi(&mut self, w: WritableRegPair) -> WritableReg { + w.hi + } + + #[inline] + fn writable_regpair_lo(&mut self, w: WritableRegPair) -> WritableReg { + w.lo + } + + #[inline] + fn regpair(&mut self, hi: Reg, lo: Reg) -> RegPair { + RegPair { hi, lo } + } + + #[inline] + fn regpair_hi(&mut self, w: RegPair) -> Reg { + w.hi + } + + #[inline] + fn regpair_lo(&mut self, w: RegPair) -> Reg { + w.lo + } } /// Lane order to be used for a given calling convention. diff --git a/cranelift/codegen/src/machinst/reg.rs b/cranelift/codegen/src/machinst/reg.rs index 5c4bd494a3..27fb64a7ab 100644 --- a/cranelift/codegen/src/machinst/reg.rs +++ b/cranelift/codegen/src/machinst/reg.rs @@ -391,18 +391,6 @@ impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> { } } - /// Add a register use+def, or "modify", where the reg must stay - /// in the same register on the input and output side of the - /// instruction. - pub fn reg_mod(&mut self, reg: Writable) { - self.add_operand(Operand::new( - reg.to_reg().into(), - regalloc2::OperandConstraint::Reg, - regalloc2::OperandKind::Mod, - regalloc2::OperandPos::Early, - )); - } - /// Add a register clobber set. This is a set of registers that /// are written by the instruction, so must be reserved (not used) /// for the whole instruction, but are not used afterward. diff --git a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif index 0e649d6dfb..103e5bde5b 100644 --- a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif @@ -701,24 +701,26 @@ block0(v0: i128, v1: i128): return v2 } -; stmg %r13, %r15, 104(%r15) +; stmg %r6, %r15, 48(%r15) ; block0: +; lgr %r6, %r2 ; vl %v0, 0(%r3) ; vl %v1, 0(%r4) ; lgdr %r5, %f0 -; vlgvg %r3, %v0, 1 -; lgdr %r4, %f1 -; vlgvg %r1, %v1, 1 -; lgr %r13, %r1 -; mlgr %r0, %r3 -; msgr %r3, %r4 -; lgr %r4, %r13 -; msgr %r5, %r4 -; agr %r3, %r0 -; agr %r5, %r3 -; vlvgp %v5, %r5, %r1 -; vst %v5, 0(%r2) -; lmg %r13, %r15, 104(%r15) +; vlgvg %r4, %v0, 1 +; lgdr %r8, %f1 +; vlgvg %r10, %v1, 1 +; lgr %r3, %r4 +; mlgr %r2, %r10 +; lgr %r9, %r2 +; msgr %r4, %r8 +; msgrkc %r2, %r5, %r10 +; agr %r4, %r9 +; agr %r2, %r4 +; vlvgp %v6, %r2, %r3 +; lgr %r2, %r6 +; vst %v6, 0(%r2) +; lmg %r6, %r15, 48(%r15) ; br %r14 function %imul_i64(i64, i64) -> i64 { @@ -934,9 +936,10 @@ block0(v0: i64, v1: i64): } ; block0: -; lgr %r1, %r3 -; mlgr %r0, %r2 -; lgr %r2, %r0 +; lgr %r4, %r2 +; lgr %r2, %r3 +; lgr %r3, %r4 +; mlgr %r2, %r2 ; br %r14 function %umulhi_i32(i32, i32) -> i32 { @@ -985,8 +988,7 @@ block0(v0: i64, v1: i64): } ; block0: -; mgrk %r0, %r2, %r3 -; lgr %r2, %r0 +; mgrk %r2, %r2, %r3 ; br %r14 function %smulhi_i32(i32, i32) -> i32 { @@ -1035,14 +1037,15 @@ block0(v0: i64, v1: i64): } ; block0: -; lgr %r1, %r2 ; llihf %r4, 2147483647 ; iilf %r4, 4294967295 -; xgrk %r2, %r4, %r1 -; ngrk %r4, %r2, %r3 +; xgrk %r5, %r4, %r2 +; ngrk %r4, %r5, %r3 ; cgite %r4, -1 -; dsgr %r0, %r3 -; lgr %r2, %r1 +; lgr %r4, %r3 +; lgr %r3, %r2 +; dsgr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %sdiv_i64_imm(i64) -> i64 { @@ -1053,10 +1056,10 @@ block0(v0: i64): } ; block0: -; lgr %r1, %r2 -; lghi %r2, 2 -; dsgr %r0, %r2 -; lgr %r2, %r1 +; lgr %r3, %r2 +; lghi %r5, 2 +; dsgr %r2, %r5 +; lgr %r2, %r3 ; br %r14 function %sdiv_i32(i32, i32) -> i32 { @@ -1066,13 +1069,15 @@ block0(v0: i32, v1: i32): } ; block0: -; lgfr %r1, %r2 +; lgr %r5, %r3 +; lgfr %r3, %r2 ; iilf %r4, 2147483647 -; xrk %r5, %r4, %r1 -; nrk %r4, %r5, %r3 -; cite %r4, -1 -; dsgfr %r0, %r3 -; lgr %r2, %r1 +; xrk %r2, %r4, %r3 +; lgr %r4, %r5 +; nrk %r5, %r2, %r4 +; cite %r5, -1 +; dsgfr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %sdiv_i32_imm(i32) -> i32 { @@ -1083,10 +1088,10 @@ block0(v0: i32): } ; block0: -; lgfr %r1, %r2 -; lhi %r2, 2 -; dsgfr %r0, %r2 -; lgr %r2, %r1 +; lgfr %r3, %r2 +; lhi %r4, 2 +; dsgfr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %sdiv_i16(i16, i16) -> i16 { @@ -1096,14 +1101,16 @@ block0(v0: i16, v1: i16): } ; block0: -; lghr %r1, %r2 -; lhr %r3, %r3 -; lhi %r5, 32767 -; xrk %r4, %r5, %r1 -; nrk %r5, %r4, %r3 -; cite %r5, -1 -; dsgfr %r0, %r3 -; lgr %r2, %r1 +; lghr %r2, %r2 +; lgr %r5, %r2 +; lhr %r4, %r3 +; lhi %r2, 32767 +; lgr %r3, %r5 +; xrk %r5, %r2, %r3 +; nrk %r2, %r5, %r4 +; cite %r2, -1 +; dsgfr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %sdiv_i16_imm(i16) -> i16 { @@ -1114,10 +1121,10 @@ block0(v0: i16): } ; block0: -; lghr %r1, %r2 -; lhi %r2, 2 -; dsgfr %r0, %r2 -; lgr %r2, %r1 +; lghr %r3, %r2 +; lhi %r4, 2 +; dsgfr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %sdiv_i8(i8, i8) -> i8 { @@ -1127,14 +1134,16 @@ block0(v0: i8, v1: i8): } ; block0: -; lgbr %r1, %r2 -; lbr %r3, %r3 -; lhi %r5, 127 -; xrk %r4, %r5, %r1 -; nrk %r5, %r4, %r3 -; cite %r5, -1 -; dsgfr %r0, %r3 -; lgr %r2, %r1 +; lgbr %r2, %r2 +; lgr %r5, %r2 +; lbr %r4, %r3 +; lhi %r2, 127 +; lgr %r3, %r5 +; xrk %r5, %r2, %r3 +; nrk %r2, %r5, %r4 +; cite %r2, -1 +; dsgfr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %sdiv_i8_imm(i8) -> i8 { @@ -1145,10 +1154,10 @@ block0(v0: i8): } ; block0: -; lgbr %r1, %r2 -; lhi %r2, 2 -; dsgfr %r0, %r2 -; lgr %r2, %r1 +; lgbr %r3, %r2 +; lhi %r4, 2 +; dsgfr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i64(i64, i64) -> i64 { @@ -1158,10 +1167,11 @@ block0(v0: i64, v1: i64): } ; block0: -; lghi %r0, 0 -; lgr %r1, %r2 -; dlgr %r0, %r3 -; lgr %r2, %r1 +; lgr %r5, %r3 +; lgr %r3, %r2 +; lghi %r2, 0 +; dlgr %r2, %r5 +; lgr %r2, %r3 ; br %r14 function %udiv_i64_imm(i64) -> i64 { @@ -1172,11 +1182,11 @@ block0(v0: i64): } ; block0: -; lghi %r0, 0 -; lgr %r1, %r2 -; lghi %r3, 2 -; dlgr %r0, %r3 -; lgr %r2, %r1 +; lgr %r3, %r2 +; lghi %r2, 0 +; lghi %r4, 2 +; dlgr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i32(i32, i32) -> i32 { @@ -1186,10 +1196,11 @@ block0(v0: i32, v1: i32): } ; block0: -; lhi %r0, 0 -; lgr %r1, %r2 -; dlr %r0, %r3 -; lgr %r2, %r1 +; lgr %r5, %r3 +; lgr %r3, %r2 +; lhi %r2, 0 +; dlr %r2, %r5 +; lgr %r2, %r3 ; br %r14 function %udiv_i32_imm(i32) -> i32 { @@ -1200,11 +1211,11 @@ block0(v0: i32): } ; block0: -; lhi %r0, 0 -; lgr %r1, %r2 -; lhi %r3, 2 -; dlr %r0, %r3 -; lgr %r2, %r1 +; lgr %r3, %r2 +; lhi %r2, 0 +; lhi %r4, 2 +; dlr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i16(i16, i16) -> i16 { @@ -1214,11 +1225,15 @@ block0(v0: i16, v1: i16): } ; block0: -; lhi %r0, 0 -; llhr %r1, %r2 -; llhr %r4, %r3 -; dlr %r0, %r4 -; lgr %r2, %r1 +; lgr %r4, %r3 +; lhi %r3, 0 +; lgr %r5, %r3 +; llhr %r3, %r2 +; lgr %r2, %r4 +; llhr %r4, %r2 +; lgr %r2, %r5 +; dlr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i16_imm(i16) -> i16 { @@ -1229,11 +1244,13 @@ block0(v0: i16): } ; block0: -; lhi %r0, 0 -; llhr %r1, %r2 -; lhi %r3, 2 -; dlr %r0, %r3 -; lgr %r2, %r1 +; lhi %r5, 0 +; lgr %r4, %r5 +; llhr %r3, %r2 +; lhi %r5, 2 +; lgr %r2, %r4 +; dlr %r2, %r5 +; lgr %r2, %r3 ; br %r14 function %udiv_i8(i8, i8) -> i8 { @@ -1243,11 +1260,15 @@ block0(v0: i8, v1: i8): } ; block0: -; lhi %r0, 0 -; llcr %r1, %r2 -; llcr %r4, %r3 -; dlr %r0, %r4 -; lgr %r2, %r1 +; lgr %r4, %r3 +; lhi %r3, 0 +; lgr %r5, %r3 +; llcr %r3, %r2 +; lgr %r2, %r4 +; llcr %r4, %r2 +; lgr %r2, %r5 +; dlr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i8_imm(i8) -> i8 { @@ -1258,11 +1279,13 @@ block0(v0: i8): } ; block0: -; lhi %r0, 0 -; llcr %r1, %r2 -; lhi %r3, 2 -; dlr %r0, %r3 -; lgr %r2, %r1 +; lhi %r5, 0 +; lgr %r4, %r5 +; llcr %r3, %r2 +; lhi %r5, 2 +; lgr %r2, %r4 +; dlr %r2, %r5 +; lgr %r2, %r3 ; br %r14 function %srem_i64(i64, i64) -> i64 { @@ -1272,11 +1295,11 @@ block0(v0: i64, v1: i64): } ; block0: -; lgr %r1, %r2 ; cghi %r3, -1 -; locghie %r1, 0 -; dsgr %r0, %r3 -; lgr %r2, %r0 +; lgr %r4, %r3 +; lgr %r3, %r2 +; locghie %r3, 0 +; dsgr %r2, %r4 ; br %r14 function %srem_i32(i32, i32) -> i32 { @@ -1286,9 +1309,9 @@ block0(v0: i32, v1: i32): } ; block0: -; lgfr %r1, %r2 -; dsgfr %r0, %r3 -; lgr %r2, %r0 +; lgr %r4, %r3 +; lgfr %r3, %r2 +; dsgfr %r2, %r4 ; br %r14 function %srem_i16(i16, i16) -> i16 { @@ -1298,10 +1321,10 @@ block0(v0: i16, v1: i16): } ; block0: -; lghr %r1, %r2 -; lhr %r3, %r3 -; dsgfr %r0, %r3 -; lgr %r2, %r0 +; lgr %r4, %r3 +; lghr %r3, %r2 +; lhr %r4, %r4 +; dsgfr %r2, %r4 ; br %r14 function %srem_i8(i8, i8) -> i8 { @@ -1311,10 +1334,10 @@ block0(v0: i8, v1: i8): } ; block0: -; lgbr %r1, %r2 -; lbr %r3, %r3 -; dsgfr %r0, %r3 -; lgr %r2, %r0 +; lgr %r4, %r3 +; lgbr %r3, %r2 +; lbr %r4, %r4 +; dsgfr %r2, %r4 ; br %r14 function %urem_i64(i64, i64) -> i64 { @@ -1324,10 +1347,10 @@ block0(v0: i64, v1: i64): } ; block0: -; lghi %r0, 0 -; lgr %r1, %r2 -; dlgr %r0, %r3 -; lgr %r2, %r0 +; lgr %r5, %r3 +; lgr %r3, %r2 +; lghi %r2, 0 +; dlgr %r2, %r5 ; br %r14 function %urem_i32(i32, i32) -> i32 { @@ -1337,10 +1360,10 @@ block0(v0: i32, v1: i32): } ; block0: -; lhi %r0, 0 -; lgr %r1, %r2 -; dlr %r0, %r3 -; lgr %r2, %r0 +; lgr %r5, %r3 +; lgr %r3, %r2 +; lhi %r2, 0 +; dlr %r2, %r5 ; br %r14 function %urem_i16(i16, i16) -> i16 { @@ -1350,11 +1373,14 @@ block0(v0: i16, v1: i16): } ; block0: -; lhi %r0, 0 -; llhr %r1, %r2 -; llhr %r4, %r3 -; dlr %r0, %r4 -; lgr %r2, %r0 +; lgr %r4, %r3 +; lhi %r3, 0 +; lgr %r5, %r3 +; llhr %r3, %r2 +; lgr %r2, %r4 +; llhr %r4, %r2 +; lgr %r2, %r5 +; dlr %r2, %r4 ; br %r14 function %urem_i8(i8, i8) -> i8 { @@ -1364,10 +1390,13 @@ block0(v0: i8, v1: i8): } ; block0: -; lhi %r0, 0 -; llcr %r1, %r2 -; llcr %r4, %r3 -; dlr %r0, %r4 -; lgr %r2, %r0 +; lgr %r4, %r3 +; lhi %r3, 0 +; lgr %r5, %r3 +; llcr %r3, %r2 +; lgr %r2, %r4 +; llcr %r4, %r2 +; lgr %r2, %r5 +; dlr %r2, %r4 ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif index 1bdeea3ac1..6f1341f7a8 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif @@ -12,10 +12,10 @@ block0(v0: i64, v1: i64, v2: i64): } ; block0: -; lrvgr %r2, %r2 -; lrvgr %r5, %r3 -; csg %r2, %r5, 0(%r4) -; lrvgr %r2, %r2 +; lrvgr %r5, %r2 +; lrvgr %r2, %r3 +; csg %r5, %r2, 0(%r4) +; lrvgr %r2, %r5 ; br %r14 function %atomic_cas_i32(i32, i32, i64) -> i32 { @@ -25,10 +25,10 @@ block0(v0: i32, v1: i32, v2: i64): } ; block0: -; lrvr %r2, %r2 -; lrvr %r5, %r3 -; cs %r2, %r5, 0(%r4) -; lrvr %r2, %r2 +; lrvr %r5, %r2 +; lrvr %r2, %r3 +; cs %r5, %r2, 0(%r4) +; lrvr %r2, %r5 ; br %r14 function %atomic_cas_i16(i64, i16, i16, i64) -> i16 { diff --git a/cranelift/filetests/filetests/isa/s390x/bitops.clif b/cranelift/filetests/filetests/isa/s390x/bitops.clif index 2ea8f3b5a5..c8b4a18886 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitops.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitops.clif @@ -187,8 +187,7 @@ block0(v0: i64): } ; block0: -; flogr %r0, %r2 -; lgr %r2, %r0 +; flogr %r2, %r2 ; br %r14 function %clz_i32(i32) -> i32 { @@ -199,8 +198,8 @@ block0(v0: i32): ; block0: ; llgfr %r5, %r2 -; flogr %r0, %r5 -; ahik %r2, %r0, -32 +; flogr %r2, %r5 +; ahi %r2, -32 ; br %r14 function %clz_i16(i16) -> i16 { @@ -211,8 +210,8 @@ block0(v0: i16): ; block0: ; llghr %r5, %r2 -; flogr %r0, %r5 -; ahik %r2, %r0, -48 +; flogr %r2, %r5 +; ahi %r2, -48 ; br %r14 function %clz_i8(i8) -> i8 { @@ -223,8 +222,8 @@ block0(v0: i8): ; block0: ; llgcr %r5, %r2 -; flogr %r0, %r5 -; ahik %r2, %r0, -56 +; flogr %r2, %r5 +; ahi %r2, -56 ; br %r14 function %cls_i128(i128) -> i128 { @@ -260,8 +259,8 @@ block0(v0: i64): ; block0: ; srag %r5, %r2, 63 ; xgrk %r3, %r2, %r5 -; flogr %r0, %r3 -; aghik %r2, %r0, -1 +; flogr %r2, %r3 +; aghi %r2, -1 ; br %r14 function %cls_i32(i32) -> i32 { @@ -274,8 +273,8 @@ block0(v0: i32): ; lgfr %r5, %r2 ; srag %r3, %r5, 63 ; xgr %r5, %r3 -; flogr %r0, %r5 -; ahik %r2, %r0, -33 +; flogr %r2, %r5 +; ahi %r2, -33 ; br %r14 function %cls_i16(i16) -> i16 { @@ -288,8 +287,8 @@ block0(v0: i16): ; lghr %r5, %r2 ; srag %r3, %r5, 63 ; xgr %r5, %r3 -; flogr %r0, %r5 -; ahik %r2, %r0, -49 +; flogr %r2, %r5 +; ahi %r2, -49 ; br %r14 function %cls_i8(i8) -> i8 { @@ -302,8 +301,8 @@ block0(v0: i8): ; lgbr %r5, %r2 ; srag %r3, %r5, 63 ; xgr %r5, %r3 -; flogr %r0, %r5 -; ahik %r2, %r0, -57 +; flogr %r2, %r5 +; ahi %r2, -57 ; br %r14 function %ctz_i128(i128) -> i128 { @@ -334,10 +333,11 @@ block0(v0: i64): ; block0: ; lcgr %r5, %r2 ; ngrk %r3, %r2, %r5 -; flogr %r0, %r3 -; locghie %r0, -1 -; lghi %r3, 63 -; sgrk %r2, %r3, %r0 +; flogr %r2, %r3 +; lgr %r4, %r2 +; locghie %r4, -1 +; lghi %r2, 63 +; sgr %r2, %r4 ; br %r14 function %ctz_i32(i32) -> i32 { @@ -351,9 +351,9 @@ block0(v0: i32): ; oihl %r5, 1 ; lcgr %r3, %r5 ; ngr %r5, %r3 -; flogr %r0, %r5 -; lhi %r4, 63 -; srk %r2, %r4, %r0 +; flogr %r2, %r5 +; lhi %r3, 63 +; srk %r2, %r3, %r2 ; br %r14 function %ctz_i16(i16) -> i16 { @@ -367,9 +367,9 @@ block0(v0: i16): ; oilh %r5, 1 ; lcgr %r3, %r5 ; ngr %r5, %r3 -; flogr %r0, %r5 -; lhi %r4, 63 -; srk %r2, %r4, %r0 +; flogr %r2, %r5 +; lhi %r3, 63 +; srk %r2, %r3, %r2 ; br %r14 function %ctz_i8(i8) -> i8 { @@ -383,9 +383,9 @@ block0(v0: i8): ; oill %r5, 256 ; lcgr %r3, %r5 ; ngr %r5, %r3 -; flogr %r0, %r5 -; lhi %r4, 63 -; srk %r2, %r4, %r0 +; flogr %r2, %r5 +; lhi %r3, 63 +; srk %r2, %r3, %r2 ; br %r14 function %popcnt_i128(i128) -> i128 { diff --git a/cranelift/filetests/filetests/isa/s390x/condops.clif b/cranelift/filetests/filetests/isa/s390x/condops.clif index d097e44e5d..3a95b79a5b 100644 --- a/cranelift/filetests/filetests/isa/s390x/condops.clif +++ b/cranelift/filetests/filetests/isa/s390x/condops.clif @@ -52,9 +52,9 @@ block0(v0: i32, v1: i8x16, v2: i8x16): } ; block0: -; vlr %v16, %v24 ; clfi %r2, 42 -; vlr %v24, %v25 -; jne 10 ; vlr %v24, %v16 +; vlr %v6, %v25 +; jne 10 ; vlr %v6, %v24 +; vlr %v24, %v6 ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/conversions.clif b/cranelift/filetests/filetests/isa/s390x/conversions.clif index 2159294f5b..938744a5aa 100644 --- a/cranelift/filetests/filetests/isa/s390x/conversions.clif +++ b/cranelift/filetests/filetests/isa/s390x/conversions.clif @@ -8,9 +8,9 @@ block0(v0: i64): } ; block0: -; vgbm %v4, 0 -; vlvgg %v4, %r3, 1 -; vst %v4, 0(%r2) +; vgbm %v5, 0 +; vlvgg %v5, %r3, 1 +; vst %v5, 0(%r2) ; br %r14 function %uextend_i32_i128(i32) -> i128 { @@ -20,9 +20,9 @@ block0(v0: i32): } ; block0: -; vgbm %v4, 0 -; vlvgf %v4, %r3, 3 -; vst %v4, 0(%r2) +; vgbm %v5, 0 +; vlvgf %v5, %r3, 3 +; vst %v5, 0(%r2) ; br %r14 function %uextend_i32_i64(i32) -> i64 { @@ -42,9 +42,9 @@ block0(v0: i16): } ; block0: -; vgbm %v4, 0 -; vlvgh %v4, %r3, 7 -; vst %v4, 0(%r2) +; vgbm %v5, 0 +; vlvgh %v5, %r3, 7 +; vst %v5, 0(%r2) ; br %r14 function %uextend_i16_i64(i16) -> i64 { @@ -74,9 +74,9 @@ block0(v0: i8): } ; block0: -; vgbm %v4, 0 -; vlvgb %v4, %r3, 15 -; vst %v4, 0(%r2) +; vgbm %v5, 0 +; vlvgb %v5, %r3, 15 +; vst %v5, 0(%r2) ; br %r14 function %uextend_i8_i64(i8) -> i64 { @@ -336,8 +336,8 @@ block0(v0: i128): ; vceqgs %v7, %v0, %v5 ; lghi %r3, 0 ; locghine %r3, -1 -; vlvgp %v20, %r3, %r3 -; vst %v20, 0(%r2) +; vlvgp %v21, %r3, %r3 +; vst %v21, 0(%r2) ; br %r14 function %bmask_i128_i64(i128) -> i64 { @@ -406,8 +406,8 @@ block0(v0: i64, v1: i64): ; cghi %r4, 0 ; lghi %r4, 0 ; locghilh %r4, -1 -; vlvgp %v17, %r4, %r4 -; vst %v17, 0(%r2) +; vlvgp %v18, %r4, %r4 +; vst %v18, 0(%r2) ; br %r14 function %bmask_i64_i64(i64, i64) -> i64 { @@ -468,8 +468,8 @@ block0(v0: i32, v1: i32): ; chi %r4, 0 ; lghi %r4, 0 ; locghilh %r4, -1 -; vlvgp %v17, %r4, %r4 -; vst %v17, 0(%r2) +; vlvgp %v18, %r4, %r4 +; vst %v18, 0(%r2) ; br %r14 function %bmask_i32_i64(i32, i32) -> i64 { @@ -531,8 +531,8 @@ block0(v0: i16, v1: i16): ; chi %r3, 0 ; lghi %r3, 0 ; locghilh %r3, -1 -; vlvgp %v19, %r3, %r3 -; vst %v19, 0(%r2) +; vlvgp %v20, %r3, %r3 +; vst %v20, 0(%r2) ; br %r14 function %bmask_i16_i64(i16, i16) -> i64 { @@ -598,8 +598,8 @@ block0(v0: i8, v1: i8): ; chi %r3, 0 ; lghi %r3, 0 ; locghilh %r3, -1 -; vlvgp %v19, %r3, %r3 -; vst %v19, 0(%r2) +; vlvgp %v20, %r3, %r3 +; vst %v20, 0(%r2) ; br %r14 function %bmask_i8_i64(i8, i8) -> i64 { @@ -665,8 +665,8 @@ block0(v0: i8, v1: i8): ; chi %r3, 0 ; lghi %r3, 0 ; locghilh %r3, -1 -; vlvgp %v19, %r3, %r3 -; vst %v19, 0(%r2) +; vlvgp %v20, %r3, %r3 +; vst %v20, 0(%r2) ; br %r14 function %bmask_i8_i64(i8, i8) -> i64 { diff --git a/cranelift/filetests/filetests/isa/s390x/div-traps.clif b/cranelift/filetests/filetests/isa/s390x/div-traps.clif index 1c7a2b4f44..328983590f 100644 --- a/cranelift/filetests/filetests/isa/s390x/div-traps.clif +++ b/cranelift/filetests/filetests/isa/s390x/div-traps.clif @@ -13,15 +13,18 @@ block0(v0: i64, v1: i64): } ; block0: -; lgr %r1, %r2 ; cgite %r3, 0 ; llihf %r4, 2147483647 ; iilf %r4, 4294967295 -; xgr %r4, %r1 -; ngrk %r5, %r4, %r3 -; cgite %r5, -1 -; dsgr %r0, %r3 -; lgr %r2, %r1 +; xgr %r4, %r2 +; lgr %r5, %r2 +; ngr %r4, %r3 +; lgr %r2, %r3 +; cgite %r4, -1 +; lgr %r4, %r2 +; lgr %r3, %r5 +; dsgr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %sdiv_i64_imm(i64) -> i64 { @@ -32,10 +35,10 @@ block0(v0: i64): } ; block0: -; lgr %r1, %r2 -; lghi %r2, 2 -; dsgr %r0, %r2 -; lgr %r2, %r1 +; lgr %r3, %r2 +; lghi %r5, 2 +; dsgr %r2, %r5 +; lgr %r2, %r3 ; br %r14 function %sdiv_i32(i32, i32) -> i32 { @@ -45,14 +48,18 @@ block0(v0: i32, v1: i32): } ; block0: -; lgfr %r1, %r2 +; lgfr %r2, %r2 ; cite %r3, 0 -; iilf %r4, 2147483647 -; xrk %r2, %r4, %r1 -; nrk %r4, %r2, %r3 -; cite %r4, -1 -; dsgfr %r0, %r3 -; lgr %r2, %r1 +; iilf %r5, 2147483647 +; lgr %r4, %r2 +; xr %r5, %r4 +; nr %r5, %r3 +; lgr %r4, %r3 +; cite %r5, -1 +; lgr %r3, %r2 +; lgr %r2, %r4 +; dsgfr %r2, %r2 +; lgr %r2, %r3 ; br %r14 function %sdiv_i32_imm(i32) -> i32 { @@ -63,10 +70,10 @@ block0(v0: i32): } ; block0: -; lgfr %r1, %r2 -; lhi %r2, 2 -; dsgfr %r0, %r2 -; lgr %r2, %r1 +; lgfr %r3, %r2 +; lhi %r4, 2 +; dsgfr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %sdiv_i16(i16, i16) -> i16 { @@ -76,15 +83,16 @@ block0(v0: i16, v1: i16): } ; block0: -; lghr %r1, %r2 -; lhr %r3, %r3 -; cite %r3, 0 -; lhi %r2, 32767 -; xrk %r4, %r2, %r1 -; nrk %r2, %r4, %r3 -; cite %r2, -1 -; dsgfr %r0, %r3 -; lgr %r2, %r1 +; lghr %r2, %r2 +; lhr %r4, %r3 +; cite %r4, 0 +; lhi %r5, 32767 +; lgr %r3, %r2 +; xr %r5, %r3 +; nr %r5, %r4 +; cite %r5, -1 +; dsgfr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %sdiv_i16_imm(i16) -> i16 { @@ -95,10 +103,10 @@ block0(v0: i16): } ; block0: -; lghr %r1, %r2 -; lhi %r2, 2 -; dsgfr %r0, %r2 -; lgr %r2, %r1 +; lghr %r3, %r2 +; lhi %r4, 2 +; dsgfr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %sdiv_i8(i8, i8) -> i8 { @@ -108,15 +116,16 @@ block0(v0: i8, v1: i8): } ; block0: -; lgbr %r1, %r2 -; lbr %r3, %r3 -; cite %r3, 0 -; lhi %r2, 127 -; xrk %r4, %r2, %r1 -; nrk %r2, %r4, %r3 -; cite %r2, -1 -; dsgfr %r0, %r3 -; lgr %r2, %r1 +; lgbr %r2, %r2 +; lbr %r4, %r3 +; cite %r4, 0 +; lhi %r5, 127 +; lgr %r3, %r2 +; xr %r5, %r3 +; nr %r5, %r4 +; cite %r5, -1 +; dsgfr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %sdiv_i8_imm(i8) -> i8 { @@ -127,10 +136,10 @@ block0(v0: i8): } ; block0: -; lgbr %r1, %r2 -; lhi %r2, 2 -; dsgfr %r0, %r2 -; lgr %r2, %r1 +; lgbr %r3, %r2 +; lhi %r4, 2 +; dsgfr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i64(i64, i64) -> i64 { @@ -140,11 +149,14 @@ block0(v0: i64, v1: i64): } ; block0: -; lghi %r0, 0 -; lgr %r1, %r2 +; lgr %r4, %r2 +; lghi %r2, 0 ; cgite %r3, 0 -; dlgr %r0, %r3 -; lgr %r2, %r1 +; lgr %r5, %r3 +; lgr %r3, %r4 +; lgr %r4, %r5 +; dlgr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i64_imm(i64) -> i64 { @@ -155,11 +167,11 @@ block0(v0: i64): } ; block0: -; lghi %r0, 0 -; lgr %r1, %r2 -; lghi %r3, 2 -; dlgr %r0, %r3 -; lgr %r2, %r1 +; lgr %r3, %r2 +; lghi %r2, 0 +; lghi %r4, 2 +; dlgr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i32(i32, i32) -> i32 { @@ -169,11 +181,14 @@ block0(v0: i32, v1: i32): } ; block0: -; lhi %r0, 0 -; lgr %r1, %r2 +; lgr %r4, %r2 +; lhi %r2, 0 ; cite %r3, 0 -; dlr %r0, %r3 -; lgr %r2, %r1 +; lgr %r5, %r3 +; lgr %r3, %r4 +; lgr %r4, %r5 +; dlr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i32_imm(i32) -> i32 { @@ -184,11 +199,11 @@ block0(v0: i32): } ; block0: -; lhi %r0, 0 -; lgr %r1, %r2 -; lhi %r3, 2 -; dlr %r0, %r3 -; lgr %r2, %r1 +; lgr %r3, %r2 +; lhi %r2, 0 +; lhi %r4, 2 +; dlr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i16(i16, i16) -> i16 { @@ -198,12 +213,16 @@ block0(v0: i16, v1: i16): } ; block0: -; lhi %r0, 0 -; llhr %r1, %r2 -; llhr %r4, %r3 +; lgr %r4, %r3 +; lhi %r3, 0 +; lgr %r5, %r3 +; llhr %r3, %r2 +; lgr %r2, %r4 +; llhr %r4, %r2 ; cite %r4, 0 -; dlr %r0, %r4 -; lgr %r2, %r1 +; lgr %r2, %r5 +; dlr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i16_imm(i16) -> i16 { @@ -214,11 +233,13 @@ block0(v0: i16): } ; block0: -; lhi %r0, 0 -; llhr %r1, %r2 -; lhi %r3, 2 -; dlr %r0, %r3 -; lgr %r2, %r1 +; lhi %r5, 0 +; lgr %r4, %r5 +; llhr %r3, %r2 +; lhi %r5, 2 +; lgr %r2, %r4 +; dlr %r2, %r5 +; lgr %r2, %r3 ; br %r14 function %udiv_i8(i8, i8) -> i8 { @@ -228,12 +249,16 @@ block0(v0: i8, v1: i8): } ; block0: -; lhi %r0, 0 -; llcr %r1, %r2 -; llcr %r4, %r3 +; lgr %r4, %r3 +; lhi %r3, 0 +; lgr %r5, %r3 +; llcr %r3, %r2 +; lgr %r2, %r4 +; llcr %r4, %r2 ; cite %r4, 0 -; dlr %r0, %r4 -; lgr %r2, %r1 +; lgr %r2, %r5 +; dlr %r2, %r4 +; lgr %r2, %r3 ; br %r14 function %udiv_i8_imm(i8) -> i8 { @@ -244,11 +269,13 @@ block0(v0: i8): } ; block0: -; lhi %r0, 0 -; llcr %r1, %r2 -; lhi %r3, 2 -; dlr %r0, %r3 -; lgr %r2, %r1 +; lhi %r5, 0 +; lgr %r4, %r5 +; llcr %r3, %r2 +; lhi %r5, 2 +; lgr %r2, %r4 +; dlr %r2, %r5 +; lgr %r2, %r3 ; br %r14 function %srem_i64(i64, i64) -> i64 { @@ -258,12 +285,12 @@ block0(v0: i64, v1: i64): } ; block0: -; lgr %r1, %r2 ; cgite %r3, 0 ; cghi %r3, -1 -; locghie %r1, 0 -; dsgr %r0, %r3 -; lgr %r2, %r0 +; lgr %r5, %r3 +; lgr %r3, %r2 +; locghie %r3, 0 +; dsgr %r2, %r5 ; br %r14 function %srem_i32(i32, i32) -> i32 { @@ -273,10 +300,10 @@ block0(v0: i32, v1: i32): } ; block0: -; lgfr %r1, %r2 -; cite %r3, 0 -; dsgfr %r0, %r3 -; lgr %r2, %r0 +; lgr %r4, %r3 +; lgfr %r3, %r2 +; cite %r4, 0 +; dsgfr %r2, %r4 ; br %r14 function %srem_i16(i16, i16) -> i16 { @@ -286,11 +313,12 @@ block0(v0: i16, v1: i16): } ; block0: -; lghr %r1, %r2 -; lhr %r3, %r3 -; cite %r3, 0 -; dsgfr %r0, %r3 -; lgr %r2, %r0 +; lghr %r2, %r2 +; lgr %r5, %r2 +; lhr %r4, %r3 +; cite %r4, 0 +; lgr %r3, %r5 +; dsgfr %r2, %r4 ; br %r14 function %srem_i8(i8, i8) -> i8 { @@ -300,11 +328,12 @@ block0(v0: i8, v1: i8): } ; block0: -; lgbr %r1, %r2 -; lbr %r3, %r3 -; cite %r3, 0 -; dsgfr %r0, %r3 -; lgr %r2, %r0 +; lgbr %r2, %r2 +; lgr %r5, %r2 +; lbr %r4, %r3 +; cite %r4, 0 +; lgr %r3, %r5 +; dsgfr %r2, %r4 ; br %r14 function %urem_i64(i64, i64) -> i64 { @@ -314,11 +343,13 @@ block0(v0: i64, v1: i64): } ; block0: -; lghi %r0, 0 -; lgr %r1, %r2 +; lgr %r4, %r2 +; lghi %r2, 0 ; cgite %r3, 0 -; dlgr %r0, %r3 -; lgr %r2, %r0 +; lgr %r5, %r3 +; lgr %r3, %r4 +; lgr %r4, %r5 +; dlgr %r2, %r4 ; br %r14 function %urem_i32(i32, i32) -> i32 { @@ -328,11 +359,13 @@ block0(v0: i32, v1: i32): } ; block0: -; lhi %r0, 0 -; lgr %r1, %r2 +; lgr %r4, %r2 +; lhi %r2, 0 ; cite %r3, 0 -; dlr %r0, %r3 -; lgr %r2, %r0 +; lgr %r5, %r3 +; lgr %r3, %r4 +; lgr %r4, %r5 +; dlr %r2, %r4 ; br %r14 function %urem_i16(i16, i16) -> i16 { @@ -342,12 +375,15 @@ block0(v0: i16, v1: i16): } ; block0: -; lhi %r0, 0 -; llhr %r1, %r2 -; llhr %r4, %r3 +; lgr %r4, %r3 +; lhi %r3, 0 +; lgr %r5, %r3 +; llhr %r3, %r2 +; lgr %r2, %r4 +; llhr %r4, %r2 ; cite %r4, 0 -; dlr %r0, %r4 -; lgr %r2, %r0 +; lgr %r2, %r5 +; dlr %r2, %r4 ; br %r14 function %urem_i8(i8, i8) -> i8 { @@ -357,11 +393,14 @@ block0(v0: i8, v1: i8): } ; block0: -; lhi %r0, 0 -; llcr %r1, %r2 -; llcr %r4, %r3 +; lgr %r4, %r3 +; lhi %r3, 0 +; lgr %r5, %r3 +; llcr %r3, %r2 +; lgr %r2, %r4 +; llcr %r4, %r2 ; cite %r4, 0 -; dlr %r0, %r4 -; lgr %r2, %r0 +; lgr %r2, %r5 +; dlr %r2, %r4 ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/floating-point-arch13.clif b/cranelift/filetests/filetests/isa/s390x/floating-point-arch13.clif index 81fe456fb9..e94238e6c9 100644 --- a/cranelift/filetests/filetests/isa/s390x/floating-point-arch13.clif +++ b/cranelift/filetests/filetests/isa/s390x/floating-point-arch13.clif @@ -503,9 +503,8 @@ block0(v0: f32): ; block0: ; wclfeb %f3, %f0, 0, 5 -; vlgvf %r3, %v3, 0 -; lgr %r2, %r3 -; clfi %r3, 256 +; vlgvf %r2, %v3, 0 +; clfi %r2, 256 ; lochih %r2, 255 ; br %r14 @@ -517,14 +516,12 @@ block0(v0: f32): ; block0: ; wcfeb %f3, %f0, 0, 5 -; vlgvf %r3, %v3, 0 +; vlgvf %r2, %v3, 0 ; cebr %f0, %f0 -; lochio %r3, 0 -; lgr %r4, %r3 -; chi %r3, 127 -; lochih %r4, 127 -; lgr %r2, %r4 -; chi %r4, -128 +; lochio %r2, 0 +; chi %r2, 127 +; lochih %r2, 127 +; chi %r2, -128 ; lochil %r2, -128 ; br %r14 @@ -536,9 +533,8 @@ block0(v0: f32): ; block0: ; wclfeb %f3, %f0, 0, 5 -; vlgvf %r3, %v3, 0 -; lgr %r2, %r3 -; clfi %r3, 65535 +; vlgvf %r2, %v3, 0 +; clfi %r2, 65535 ; lochih %r2, -1 ; br %r14 @@ -550,14 +546,12 @@ block0(v0: f32): ; block0: ; wcfeb %f3, %f0, 0, 5 -; vlgvf %r3, %v3, 0 +; vlgvf %r2, %v3, 0 ; cebr %f0, %f0 -; lochio %r3, 0 -; lgr %r4, %r3 -; chi %r3, 32767 -; lochih %r4, 32767 -; lgr %r2, %r4 -; chi %r4, -32768 +; lochio %r2, 0 +; chi %r2, 32767 +; lochih %r2, 32767 +; chi %r2, -32768 ; lochil %r2, -32768 ; br %r14 @@ -619,9 +613,8 @@ block0(v0: f64): ; block0: ; wclgdb %f3, %f0, 0, 5 -; lgdr %r3, %f3 -; lgr %r2, %r3 -; clgfi %r3, 256 +; lgdr %r2, %f3 +; clgfi %r2, 256 ; locghih %r2, 255 ; br %r14 @@ -633,14 +626,12 @@ block0(v0: f64): ; block0: ; wcgdb %f3, %f0, 0, 5 -; lgdr %r3, %f3 +; lgdr %r2, %f3 ; cdbr %f0, %f0 -; locghio %r3, 0 -; lgr %r4, %r3 -; cghi %r3, 127 -; locghih %r4, 127 -; lgr %r2, %r4 -; cghi %r4, -128 +; locghio %r2, 0 +; cghi %r2, 127 +; locghih %r2, 127 +; cghi %r2, -128 ; locghil %r2, -128 ; br %r14 @@ -652,9 +643,8 @@ block0(v0: f64): ; block0: ; wclgdb %f3, %f0, 0, 5 -; lgdr %r3, %f3 -; lgr %r2, %r3 -; clgfi %r3, 65535 +; lgdr %r2, %f3 +; clgfi %r2, 65535 ; locghih %r2, -1 ; br %r14 @@ -666,14 +656,12 @@ block0(v0: f64): ; block0: ; wcgdb %f3, %f0, 0, 5 -; lgdr %r3, %f3 +; lgdr %r2, %f3 ; cdbr %f0, %f0 -; locghio %r3, 0 -; lgr %r4, %r3 -; cghi %r3, 32767 -; locghih %r4, 32767 -; lgr %r2, %r4 -; cghi %r4, -32768 +; locghio %r2, 0 +; cghi %r2, 32767 +; locghih %r2, 32767 +; cghi %r2, -32768 ; locghil %r2, -32768 ; br %r14 @@ -702,12 +690,12 @@ block0(v0: f64): ; lgdr %r2, %f3 ; cdbr %f0, %f0 ; locghio %r2, 0 -; lgfi %r5, 2147483647 +; lgfi %r4, 2147483647 +; cgr %r2, %r4 +; locgrh %r2, %r4 +; lgfi %r5, -2147483648 ; cgr %r2, %r5 -; locgrh %r2, %r5 -; lgfi %r3, -2147483648 -; cgr %r2, %r3 -; locgrl %r2, %r3 +; locgrl %r2, %r5 ; br %r14 function %fcvt_to_uint_sat_f64_i64(f64) -> i64 { diff --git a/cranelift/filetests/filetests/isa/s390x/floating-point.clif b/cranelift/filetests/filetests/isa/s390x/floating-point.clif index 9e692cbf0a..1bfbf090bc 100644 --- a/cranelift/filetests/filetests/isa/s390x/floating-point.clif +++ b/cranelift/filetests/filetests/isa/s390x/floating-point.clif @@ -927,9 +927,8 @@ block0(v0: f32): ; block0: ; ldebr %f3, %f0 ; wclgdb %f5, %f3, 0, 5 -; lgdr %r5, %f5 -; lgr %r2, %r5 -; clgfi %r5, 256 +; lgdr %r2, %f5 +; clgfi %r2, 256 ; locghih %r2, 255 ; br %r14 @@ -942,14 +941,12 @@ block0(v0: f32): ; block0: ; ldebr %f3, %f0 ; wcgdb %f5, %f3, 0, 5 -; lgdr %r5, %f5 +; lgdr %r2, %f5 ; cebr %f0, %f0 -; locghio %r5, 0 -; lgr %r4, %r5 -; cghi %r5, 127 -; locghih %r4, 127 -; lgr %r2, %r4 -; cghi %r4, -128 +; locghio %r2, 0 +; cghi %r2, 127 +; locghih %r2, 127 +; cghi %r2, -128 ; locghil %r2, -128 ; br %r14 @@ -962,9 +959,8 @@ block0(v0: f32): ; block0: ; ldebr %f3, %f0 ; wclgdb %f5, %f3, 0, 5 -; lgdr %r5, %f5 -; lgr %r2, %r5 -; clgfi %r5, 65535 +; lgdr %r2, %f5 +; clgfi %r2, 65535 ; locghih %r2, -1 ; br %r14 @@ -977,14 +973,12 @@ block0(v0: f32): ; block0: ; ldebr %f3, %f0 ; wcgdb %f5, %f3, 0, 5 -; lgdr %r5, %f5 +; lgdr %r2, %f5 ; cebr %f0, %f0 -; locghio %r5, 0 -; lgr %r4, %r5 -; cghi %r5, 32767 -; locghih %r4, 32767 -; lgr %r2, %r4 -; cghi %r4, -32768 +; locghio %r2, 0 +; cghi %r2, 32767 +; locghih %r2, 32767 +; cghi %r2, -32768 ; locghil %r2, -32768 ; br %r14 @@ -1018,9 +1012,9 @@ block0(v0: f32): ; lgfi %r3, 2147483647 ; cgr %r2, %r3 ; locgrh %r2, %r3 -; lgfi %r5, -2147483648 -; cgr %r2, %r5 -; locgrl %r2, %r5 +; lgfi %r3, -2147483648 +; cgr %r2, %r3 +; locgrl %r2, %r3 ; br %r14 function %fcvt_to_uint_sat_f32_i64(f32) -> i64 { @@ -1057,9 +1051,8 @@ block0(v0: f64): ; block0: ; wclgdb %f3, %f0, 0, 5 -; lgdr %r3, %f3 -; lgr %r2, %r3 -; clgfi %r3, 256 +; lgdr %r2, %f3 +; clgfi %r2, 256 ; locghih %r2, 255 ; br %r14 @@ -1071,14 +1064,12 @@ block0(v0: f64): ; block0: ; wcgdb %f3, %f0, 0, 5 -; lgdr %r3, %f3 +; lgdr %r2, %f3 ; cdbr %f0, %f0 -; locghio %r3, 0 -; lgr %r4, %r3 -; cghi %r3, 127 -; locghih %r4, 127 -; lgr %r2, %r4 -; cghi %r4, -128 +; locghio %r2, 0 +; cghi %r2, 127 +; locghih %r2, 127 +; cghi %r2, -128 ; locghil %r2, -128 ; br %r14 @@ -1090,9 +1081,8 @@ block0(v0: f64): ; block0: ; wclgdb %f3, %f0, 0, 5 -; lgdr %r3, %f3 -; lgr %r2, %r3 -; clgfi %r3, 65535 +; lgdr %r2, %f3 +; clgfi %r2, 65535 ; locghih %r2, -1 ; br %r14 @@ -1104,14 +1094,12 @@ block0(v0: f64): ; block0: ; wcgdb %f3, %f0, 0, 5 -; lgdr %r3, %f3 +; lgdr %r2, %f3 ; cdbr %f0, %f0 -; locghio %r3, 0 -; lgr %r4, %r3 -; cghi %r3, 32767 -; locghih %r4, 32767 -; lgr %r2, %r4 -; cghi %r4, -32768 +; locghio %r2, 0 +; cghi %r2, 32767 +; locghih %r2, 32767 +; cghi %r2, -32768 ; locghil %r2, -32768 ; br %r14 @@ -1140,12 +1128,12 @@ block0(v0: f64): ; lgdr %r2, %f3 ; cdbr %f0, %f0 ; locghio %r2, 0 -; lgfi %r5, 2147483647 +; lgfi %r4, 2147483647 +; cgr %r2, %r4 +; locgrh %r2, %r4 +; lgfi %r5, -2147483648 ; cgr %r2, %r5 -; locgrh %r2, %r5 -; lgfi %r3, -2147483648 -; cgr %r2, %r3 -; locgrl %r2, %r3 +; locgrl %r2, %r5 ; br %r14 function %fcvt_to_uint_sat_f64_i64(f64) -> i64 { diff --git a/cranelift/filetests/filetests/isa/s390x/icmp.clif b/cranelift/filetests/filetests/isa/s390x/icmp.clif index a1b8146f26..3cff7438f3 100644 --- a/cranelift/filetests/filetests/isa/s390x/icmp.clif +++ b/cranelift/filetests/filetests/isa/s390x/icmp.clif @@ -459,8 +459,8 @@ block0(v0: i64, v1: i64): } ; block0: -; llgh %r3, 0(%r3) -; clgr %r2, %r3 +; llgh %r4, 0(%r3) +; clgr %r2, %r4 ; lhi %r2, 0 ; lochil %r2, 1 ; br %r14 @@ -554,8 +554,8 @@ block0(v0: i32, v1: i64): } ; block0: -; llh %r3, 0(%r3) -; clr %r2, %r3 +; llh %r4, 0(%r3) +; clr %r2, %r4 ; lhi %r2, 0 ; lochil %r2, 1 ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/tls_elf.clif b/cranelift/filetests/filetests/isa/s390x/tls_elf.clif index e0f598a96f..e23fc14d04 100644 --- a/cranelift/filetests/filetests/isa/s390x/tls_elf.clif +++ b/cranelift/filetests/filetests/isa/s390x/tls_elf.clif @@ -18,9 +18,9 @@ block0(v0: i32): ; bras %r1, 12 ; data userextname0@tlsgd ; lg %r2, 0(%r1) ; brasl %r14, %ElfTlsGetOffset:tls_gdcall:userextname0 ; ear %r3, %a0 -; sllg %r4, %r3, 32 -; ear %r4, %a1 -; agr %r2, %r4 +; sllg %r5, %r3, 32 +; ear %r5, %a1 +; agr %r2, %r5 ; lmg %r12, %r15, 256(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif b/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif index f029299f7e..c0a9e9aa46 100644 --- a/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif +++ b/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif @@ -696,14 +696,15 @@ block0(v0: i64x2, v1: i64x2): } ; block0: -; vlgvg %r2, %v24, 0 -; vlgvg %r1, %v25, 0 -; mlgr %r0, %r2 -; lgr %r3, %r0 -; vlgvg %r2, %v24, 1 -; vlgvg %r1, %v25, 1 -; mlgr %r0, %r2 -; vlvgp %v24, %r3, %r0 +; vlgvg %r3, %v24, 0 +; vlgvg %r4, %v25, 0 +; mlgr %r2, %r4 +; lgr %r4, %r2 +; vlgvg %r3, %v24, 1 +; vlgvg %r5, %v25, 1 +; mlgr %r2, %r5 +; lgr %r3, %r2 +; vlvgp %v24, %r4, %r3 ; br %r14 function %umulhi_i32x4(i32x4, i32x4) -> i32x4 { @@ -745,13 +746,13 @@ block0(v0: i64x2, v1: i64x2): ; block0: ; vlgvg %r2, %v24, 0 ; vlgvg %r4, %v25, 0 -; mgrk %r0, %r2, %r4 -; lgr %r2, %r0 -; vlgvg %r5, %v24, 1 -; vlgvg %r3, %v25, 1 -; mgrk %r0, %r5, %r3 +; mgrk %r2, %r2, %r4 ; lgr %r5, %r2 -; vlvgp %v24, %r5, %r0 +; vlgvg %r4, %v24, 1 +; vlgvg %r2, %v25, 1 +; mgrk %r2, %r4, %r2 +; lgr %r3, %r2 +; vlvgp %v24, %r5, %r3 ; br %r14 function %smulhi_i32x4(i32x4, i32x4) -> i32x4 {