x64: port select to ISLE (#3682)
* x64: port `select` using an FP comparison to ISLE This change includes quite a few interlocking parts, required mainly by the current x64 conventions in ISLE: - it adds a way to emit a `cmove` with multiple OR-ing conditions; because x64 ISLE cannot currently safely emit a comparison followed by several jumps, this adds `MachInst::CmoveOr` and `MachInst::XmmCmoveOr` macro instructions. Unfortunately, these macro instructions hide the multi-instruction sequence in `lower.isle` - to properly keep track of what instructions consume and produce flags, @cfallin added a way to pass around variants of `ConsumesFlags` and `ProducesFlags`--these changes affect all backends - then, to lower the `fcmp + select` CLIF, this change adds several `cmove*_from_values` helpers that perform all of the awkward conversions between `Value`, `ValueReg`, `Reg`, and `Gpr/Xmm`; one upside is that now these lowerings have much-improved documentation explaining why the various `FloatCC` and `CC` choices are made the the way they are. Co-authored-by: Chris Fallin <chris@cfallin.org>
This commit is contained in:
@@ -1461,36 +1461,41 @@
|
|||||||
(writable_reg_to_reg dst)))
|
(writable_reg_to_reg dst)))
|
||||||
|
|
||||||
;; Helper for emitting `adds` instructions.
|
;; Helper for emitting `adds` instructions.
|
||||||
(decl add_with_flags (Type Reg Reg) ProducesFlags)
|
(decl add_with_flags_paired (Type Reg Reg) ProducesFlags)
|
||||||
(rule (add_with_flags ty src1 src2)
|
(rule (add_with_flags_paired ty src1 src2)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I64)))
|
(let ((dst WritableReg (temp_writable_reg $I64)))
|
||||||
(ProducesFlags.ProducesFlags (MInst.AluRRR (ALUOp.AddS) (operand_size ty) dst src1 src2)
|
(ProducesFlags.ProducesFlagsReturnsResultWithConsumer
|
||||||
|
(MInst.AluRRR (ALUOp.AddS) (operand_size ty) dst src1 src2)
|
||||||
(writable_reg_to_reg dst))))
|
(writable_reg_to_reg dst))))
|
||||||
|
|
||||||
;; Helper for emitting `adc` instructions.
|
;; Helper for emitting `adc` instructions.
|
||||||
(decl adc (Type Reg Reg) ConsumesFlags)
|
(decl adc_paired (Type Reg Reg) ConsumesFlags)
|
||||||
(rule (adc ty src1 src2)
|
(rule (adc_paired ty src1 src2)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I64)))
|
(let ((dst WritableReg (temp_writable_reg $I64)))
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.AluRRR (ALUOp.Adc) (operand_size ty) dst src1 src2)
|
(ConsumesFlags.ConsumesFlagsReturnsResultWithProducer
|
||||||
|
(MInst.AluRRR (ALUOp.Adc) (operand_size ty) dst src1 src2)
|
||||||
(writable_reg_to_reg dst))))
|
(writable_reg_to_reg dst))))
|
||||||
|
|
||||||
;; Helper for emitting `subs` instructions.
|
;; Helper for emitting `subs` instructions.
|
||||||
(decl sub_with_flags (Type Reg Reg) ProducesFlags)
|
(decl sub_with_flags_paired (Type Reg Reg) ProducesFlags)
|
||||||
(rule (sub_with_flags ty src1 src2)
|
(rule (sub_with_flags_paired ty src1 src2)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I64)))
|
(let ((dst WritableReg (temp_writable_reg $I64)))
|
||||||
(ProducesFlags.ProducesFlags (MInst.AluRRR (ALUOp.SubS) (operand_size ty) dst src1 src2)
|
(ProducesFlags.ProducesFlagsReturnsResultWithConsumer
|
||||||
|
(MInst.AluRRR (ALUOp.SubS) (operand_size ty) dst src1 src2)
|
||||||
(writable_reg_to_reg dst))))
|
(writable_reg_to_reg dst))))
|
||||||
|
|
||||||
(decl cmp64_imm (Reg Imm12) ProducesFlags)
|
(decl cmp64_imm (Reg Imm12) ProducesFlags)
|
||||||
(rule (cmp64_imm src1 src2)
|
(rule (cmp64_imm src1 src2)
|
||||||
(ProducesFlags.ProducesFlags (MInst.AluRRImm12 (ALUOp.SubS) (OperandSize.Size64) (writable_zero_reg) src1 src2)
|
(ProducesFlags.ProducesFlagsSideEffect
|
||||||
(zero_reg)))
|
(MInst.AluRRImm12 (ALUOp.SubS) (OperandSize.Size64) (writable_zero_reg)
|
||||||
|
src1 src2)))
|
||||||
|
|
||||||
;; Helper for emitting `sbc` instructions.
|
;; Helper for emitting `sbc` instructions.
|
||||||
(decl sbc (Type Reg Reg) ConsumesFlags)
|
(decl sbc_paired (Type Reg Reg) ConsumesFlags)
|
||||||
(rule (sbc ty src1 src2)
|
(rule (sbc_paired ty src1 src2)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I64)))
|
(let ((dst WritableReg (temp_writable_reg $I64)))
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.AluRRR (ALUOp.Sbc) (operand_size ty) dst src1 src2)
|
(ConsumesFlags.ConsumesFlagsReturnsResultWithProducer
|
||||||
|
(MInst.AluRRR (ALUOp.Sbc) (operand_size ty) dst src1 src2)
|
||||||
(writable_reg_to_reg dst))))
|
(writable_reg_to_reg dst))))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.VecMisc` instructions.
|
;; Helper for emitting `MInst.VecMisc` instructions.
|
||||||
@@ -1581,12 +1586,12 @@
|
|||||||
;; which must be paired with `with_flags*` helpers.
|
;; which must be paired with `with_flags*` helpers.
|
||||||
(decl tst_imm (Type Reg ImmLogic) ProducesFlags)
|
(decl tst_imm (Type Reg ImmLogic) ProducesFlags)
|
||||||
(rule (tst_imm ty reg imm)
|
(rule (tst_imm ty reg imm)
|
||||||
(ProducesFlags.ProducesFlags (MInst.AluRRImmLogic (ALUOp.AndS)
|
(ProducesFlags.ProducesFlagsSideEffect
|
||||||
|
(MInst.AluRRImmLogic (ALUOp.AndS)
|
||||||
(operand_size ty)
|
(operand_size ty)
|
||||||
(writable_zero_reg)
|
(writable_zero_reg)
|
||||||
reg
|
reg
|
||||||
imm)
|
imm)))
|
||||||
(invalid_reg)))
|
|
||||||
|
|
||||||
;; Helper for generating a `CSel` instruction.
|
;; Helper for generating a `CSel` instruction.
|
||||||
;;
|
;;
|
||||||
@@ -1596,7 +1601,8 @@
|
|||||||
(decl csel (Cond Reg Reg) ConsumesFlags)
|
(decl csel (Cond Reg Reg) ConsumesFlags)
|
||||||
(rule (csel cond if_true if_false)
|
(rule (csel cond if_true if_false)
|
||||||
(let ((dst WritableReg (temp_writable_reg $I64)))
|
(let ((dst WritableReg (temp_writable_reg $I64)))
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.CSel dst cond if_true if_false)
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.CSel dst cond if_true if_false)
|
||||||
(writable_reg_to_reg dst))))
|
(writable_reg_to_reg dst))))
|
||||||
|
|
||||||
;; Helpers for generating `add` instructions.
|
;; Helpers for generating `add` instructions.
|
||||||
|
|||||||
@@ -91,8 +91,8 @@
|
|||||||
;; the actual addition is `adds` followed by `adc` which comprises the
|
;; the actual addition is `adds` followed by `adc` which comprises the
|
||||||
;; low/high bits of the result
|
;; low/high bits of the result
|
||||||
(with_flags
|
(with_flags
|
||||||
(add_with_flags $I64 x_lo y_lo)
|
(add_with_flags_paired $I64 x_lo y_lo)
|
||||||
(adc $I64 x_hi y_hi))))
|
(adc_paired $I64 x_hi y_hi))))
|
||||||
|
|
||||||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -142,8 +142,8 @@
|
|||||||
;; the actual subtraction is `subs` followed by `sbc` which comprises
|
;; the actual subtraction is `subs` followed by `sbc` which comprises
|
||||||
;; the low/high bits of the result
|
;; the low/high bits of the result
|
||||||
(with_flags
|
(with_flags
|
||||||
(sub_with_flags $I64 x_lo y_lo)
|
(sub_with_flags_paired $I64 x_lo y_lo)
|
||||||
(sbc $I64 x_hi y_hi))))
|
(sbc_paired $I64 x_hi y_hi))))
|
||||||
|
|
||||||
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -708,10 +708,11 @@
|
|||||||
inv_amt))
|
inv_amt))
|
||||||
(maybe_hi Reg (orr $I64 hi_lshift lo_rshift))
|
(maybe_hi Reg (orr $I64 hi_lshift lo_rshift))
|
||||||
)
|
)
|
||||||
(with_flags_2
|
(with_flags
|
||||||
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
||||||
|
(consumes_flags_concat
|
||||||
(csel (Cond.Ne) (zero_reg) lo_lshift)
|
(csel (Cond.Ne) (zero_reg) lo_lshift)
|
||||||
(csel (Cond.Ne) lo_lshift maybe_hi))))
|
(csel (Cond.Ne) lo_lshift maybe_hi)))))
|
||||||
|
|
||||||
;; Shift for vector types.
|
;; Shift for vector types.
|
||||||
(rule (lower (has_type (vec128 ty) (ishl x y)))
|
(rule (lower (has_type (vec128 ty) (ishl x y)))
|
||||||
@@ -805,10 +806,11 @@
|
|||||||
inv_amt))
|
inv_amt))
|
||||||
(maybe_lo Reg (orr $I64 lo_rshift hi_lshift))
|
(maybe_lo Reg (orr $I64 lo_rshift hi_lshift))
|
||||||
)
|
)
|
||||||
(with_flags_2
|
(with_flags
|
||||||
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
||||||
|
(consumes_flags_concat
|
||||||
(csel (Cond.Ne) hi_rshift maybe_lo)
|
(csel (Cond.Ne) hi_rshift maybe_lo)
|
||||||
(csel (Cond.Ne) (zero_reg) hi_rshift))))
|
(csel (Cond.Ne) (zero_reg) hi_rshift)))))
|
||||||
|
|
||||||
;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -858,10 +860,11 @@
|
|||||||
(hi_sign Reg (asr_imm $I64 src_hi (imm_shift_from_u8 63)))
|
(hi_sign Reg (asr_imm $I64 src_hi (imm_shift_from_u8 63)))
|
||||||
(maybe_lo Reg (orr $I64 lo_rshift hi_lshift))
|
(maybe_lo Reg (orr $I64 lo_rshift hi_lshift))
|
||||||
)
|
)
|
||||||
(with_flags_2
|
(with_flags
|
||||||
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
(tst_imm $I64 amt (u64_into_imm_logic $I64 64))
|
||||||
|
(consumes_flags_concat
|
||||||
(csel (Cond.Ne) hi_rshift maybe_lo)
|
(csel (Cond.Ne) hi_rshift maybe_lo)
|
||||||
(csel (Cond.Ne) hi_sign hi_rshift))))
|
(csel (Cond.Ne) hi_sign hi_rshift)))))
|
||||||
|
|
||||||
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -1123,7 +1126,7 @@
|
|||||||
(sign_eq_eon Reg (eon $I64 hi lo))
|
(sign_eq_eon Reg (eon $I64 hi lo))
|
||||||
(sign_eq Reg (lsr_imm $I64 sign_eq_eon (imm_shift_from_u8 63)))
|
(sign_eq Reg (lsr_imm $I64 sign_eq_eon (imm_shift_from_u8 63)))
|
||||||
(lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
|
(lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
|
||||||
(maybe_lo Reg (with_flags_1
|
(maybe_lo Reg (with_flags_reg
|
||||||
(cmp64_imm hi_cls (u8_into_imm12 63))
|
(cmp64_imm hi_cls (u8_into_imm12 63))
|
||||||
(csel (Cond.Eq) lo_sign_bits (zero_reg))
|
(csel (Cond.Eq) lo_sign_bits (zero_reg))
|
||||||
))
|
))
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
src/clif.isle 9ea75a6f790b5c03
|
src/clif.isle 9ea75a6f790b5c03
|
||||||
src/prelude.isle 73285cd431346d53
|
src/prelude.isle 980b300b3ec3e338
|
||||||
src/isa/aarch64/inst.isle 4c176462894836e5
|
src/isa/aarch64/inst.isle a7f3572a5cf2f201
|
||||||
src/isa/aarch64/lower.isle aff657984bf30686
|
src/isa/aarch64/lower.isle 534c135b5f535f33
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1427,9 +1427,8 @@
|
|||||||
;; Helper for emitting `MInst.RxSBGTest` instructions.
|
;; Helper for emitting `MInst.RxSBGTest` instructions.
|
||||||
(decl rxsbg_test (RxSBGOp Reg Reg u8 u8 i8) ProducesFlags)
|
(decl rxsbg_test (RxSBGOp Reg Reg u8 u8 i8) ProducesFlags)
|
||||||
(rule (rxsbg_test op src1 src2 start_bit end_bit rotate_amt)
|
(rule (rxsbg_test op src1 src2 start_bit end_bit rotate_amt)
|
||||||
(ProducesFlags.ProducesFlags (MInst.RxSBGTest op src1 src2
|
(ProducesFlags.ProducesFlagsSideEffect
|
||||||
start_bit end_bit rotate_amt)
|
(MInst.RxSBGTest op src1 src2 start_bit end_bit rotate_amt)))
|
||||||
(invalid_reg)))
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.UnaryRR` instructions.
|
;; Helper for emitting `MInst.UnaryRR` instructions.
|
||||||
(decl unary_rr (Type UnaryOp Reg) Reg)
|
(decl unary_rr (Type UnaryOp Reg) Reg)
|
||||||
@@ -1441,32 +1440,27 @@
|
|||||||
;; Helper for emitting `MInst.CmpRR` instructions.
|
;; Helper for emitting `MInst.CmpRR` instructions.
|
||||||
(decl cmp_rr (CmpOp Reg Reg) ProducesFlags)
|
(decl cmp_rr (CmpOp Reg Reg) ProducesFlags)
|
||||||
(rule (cmp_rr op src1 src2)
|
(rule (cmp_rr op src1 src2)
|
||||||
(ProducesFlags.ProducesFlags (MInst.CmpRR op src1 src2)
|
(ProducesFlags.ProducesFlagsSideEffect (MInst.CmpRR op src1 src2)))
|
||||||
(invalid_reg)))
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.CmpRX` instructions.
|
;; Helper for emitting `MInst.CmpRX` instructions.
|
||||||
(decl cmp_rx (CmpOp Reg MemArg) ProducesFlags)
|
(decl cmp_rx (CmpOp Reg MemArg) ProducesFlags)
|
||||||
(rule (cmp_rx op src mem)
|
(rule (cmp_rx op src mem)
|
||||||
(ProducesFlags.ProducesFlags (MInst.CmpRX op src mem)
|
(ProducesFlags.ProducesFlagsSideEffect (MInst.CmpRX op src mem)))
|
||||||
(invalid_reg)))
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.CmpRSImm16` instructions.
|
;; Helper for emitting `MInst.CmpRSImm16` instructions.
|
||||||
(decl cmp_rsimm16 (CmpOp Reg i16) ProducesFlags)
|
(decl cmp_rsimm16 (CmpOp Reg i16) ProducesFlags)
|
||||||
(rule (cmp_rsimm16 op src imm)
|
(rule (cmp_rsimm16 op src imm)
|
||||||
(ProducesFlags.ProducesFlags (MInst.CmpRSImm16 op src imm)
|
(ProducesFlags.ProducesFlagsSideEffect (MInst.CmpRSImm16 op src imm)))
|
||||||
(invalid_reg)))
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.CmpRSImm32` instructions.
|
;; Helper for emitting `MInst.CmpRSImm32` instructions.
|
||||||
(decl cmp_rsimm32 (CmpOp Reg i32) ProducesFlags)
|
(decl cmp_rsimm32 (CmpOp Reg i32) ProducesFlags)
|
||||||
(rule (cmp_rsimm32 op src imm)
|
(rule (cmp_rsimm32 op src imm)
|
||||||
(ProducesFlags.ProducesFlags (MInst.CmpRSImm32 op src imm)
|
(ProducesFlags.ProducesFlagsSideEffect (MInst.CmpRSImm32 op src imm)))
|
||||||
(invalid_reg)))
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.CmpRUImm32` instructions.
|
;; Helper for emitting `MInst.CmpRUImm32` instructions.
|
||||||
(decl cmp_ruimm32 (CmpOp Reg u32) ProducesFlags)
|
(decl cmp_ruimm32 (CmpOp Reg u32) ProducesFlags)
|
||||||
(rule (cmp_ruimm32 op src imm)
|
(rule (cmp_ruimm32 op src imm)
|
||||||
(ProducesFlags.ProducesFlags (MInst.CmpRUImm32 op src imm)
|
(ProducesFlags.ProducesFlagsSideEffect (MInst.CmpRUImm32 op src imm)))
|
||||||
(invalid_reg)))
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.AtomicRmw` instructions.
|
;; Helper for emitting `MInst.AtomicRmw` instructions.
|
||||||
(decl atomic_rmw_impl (Type ALUOp Reg MemArg) Reg)
|
(decl atomic_rmw_impl (Type ALUOp Reg MemArg) Reg)
|
||||||
@@ -1615,20 +1609,18 @@
|
|||||||
;; Helper for emitting `MInst.FpuCmp32` instructions.
|
;; Helper for emitting `MInst.FpuCmp32` instructions.
|
||||||
(decl fpu_cmp32 (Reg Reg) ProducesFlags)
|
(decl fpu_cmp32 (Reg Reg) ProducesFlags)
|
||||||
(rule (fpu_cmp32 src1 src2)
|
(rule (fpu_cmp32 src1 src2)
|
||||||
(ProducesFlags.ProducesFlags (MInst.FpuCmp32 src1 src2)
|
(ProducesFlags.ProducesFlagsSideEffect (MInst.FpuCmp32 src1 src2)))
|
||||||
(invalid_reg)))
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.FpuCmp64` instructions.
|
;; Helper for emitting `MInst.FpuCmp64` instructions.
|
||||||
(decl fpu_cmp64 (Reg Reg) ProducesFlags)
|
(decl fpu_cmp64 (Reg Reg) ProducesFlags)
|
||||||
(rule (fpu_cmp64 src1 src2)
|
(rule (fpu_cmp64 src1 src2)
|
||||||
(ProducesFlags.ProducesFlags (MInst.FpuCmp64 src1 src2)
|
(ProducesFlags.ProducesFlagsSideEffect (MInst.FpuCmp64 src1 src2)))
|
||||||
(invalid_reg)))
|
|
||||||
|
|
||||||
;; Helper for emitting `MInst.FpuToInt` instructions.
|
;; Helper for emitting `MInst.FpuToInt` instructions.
|
||||||
(decl fpu_to_int (Type FpuToIntOp Reg) ProducesFlags)
|
(decl fpu_to_int (Type FpuToIntOp Reg) ProducesFlags)
|
||||||
(rule (fpu_to_int ty op src)
|
(rule (fpu_to_int ty op src)
|
||||||
(let ((dst WritableReg (temp_writable_reg ty)))
|
(let ((dst WritableReg (temp_writable_reg ty)))
|
||||||
(ProducesFlags.ProducesFlags (MInst.FpuToInt op dst src)
|
(ProducesFlags.ProducesFlagsReturnsReg (MInst.FpuToInt op dst src)
|
||||||
(writable_reg_to_reg dst))))
|
(writable_reg_to_reg dst))))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.IntToFpu` instructions.
|
;; Helper for emitting `MInst.IntToFpu` instructions.
|
||||||
@@ -1751,7 +1743,7 @@
|
|||||||
|
|
||||||
;; Emit a `ProducesFlags` instruction when the flags are not actually needed.
|
;; Emit a `ProducesFlags` instruction when the flags are not actually needed.
|
||||||
(decl drop_flags (ProducesFlags) Reg)
|
(decl drop_flags (ProducesFlags) Reg)
|
||||||
(rule (drop_flags (ProducesFlags.ProducesFlags inst result))
|
(rule (drop_flags (ProducesFlags.ProducesFlagsReturnsReg inst result))
|
||||||
(let ((_ Unit (emit inst)))
|
(let ((_ Unit (emit inst)))
|
||||||
result))
|
result))
|
||||||
|
|
||||||
@@ -1834,10 +1826,10 @@
|
|||||||
|
|
||||||
;; Push instructions to break out of the loop if condition is met.
|
;; Push instructions to break out of the loop if condition is met.
|
||||||
(decl push_break_if (VecMInstBuilder ProducesFlags Cond) Reg)
|
(decl push_break_if (VecMInstBuilder ProducesFlags Cond) Reg)
|
||||||
(rule (push_break_if ib (ProducesFlags.ProducesFlags inst result) cond)
|
(rule (push_break_if ib (ProducesFlags.ProducesFlagsSideEffect inst) cond)
|
||||||
(let ((_1 Unit (inst_builder_push ib inst))
|
(let ((_1 Unit (inst_builder_push ib inst))
|
||||||
(_2 Unit (inst_builder_push ib (MInst.CondBreak cond))))
|
(_2 Unit (inst_builder_push ib (MInst.CondBreak cond))))
|
||||||
result))
|
(invalid_reg)))
|
||||||
|
|
||||||
;; Emit a `MInst.Loop` instruction holding a loop body instruction sequence.
|
;; Emit a `MInst.Loop` instruction holding a loop body instruction sequence.
|
||||||
(decl emit_loop (VecMInstBuilder Cond) Unit)
|
(decl emit_loop (VecMInstBuilder Cond) Unit)
|
||||||
@@ -2215,10 +2207,10 @@
|
|||||||
;; Conditionally move immediate value into destination register. (Non-SSA form.)
|
;; Conditionally move immediate value into destination register. (Non-SSA form.)
|
||||||
(decl emit_cmov_imm (Type WritableReg Cond i16) ConsumesFlags)
|
(decl emit_cmov_imm (Type WritableReg Cond i16) ConsumesFlags)
|
||||||
(rule (emit_cmov_imm (gpr32_ty _ty) dst cond imm)
|
(rule (emit_cmov_imm (gpr32_ty _ty) dst cond imm)
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.CMov32SImm16 dst cond imm)
|
(ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov32SImm16 dst cond imm)
|
||||||
(writable_reg_to_reg dst)))
|
(writable_reg_to_reg dst)))
|
||||||
(rule (emit_cmov_imm (gpr64_ty _ty) dst cond imm)
|
(rule (emit_cmov_imm (gpr64_ty _ty) dst cond imm)
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.CMov64SImm16 dst cond imm)
|
(ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov64SImm16 dst cond imm)
|
||||||
(writable_reg_to_reg dst)))
|
(writable_reg_to_reg dst)))
|
||||||
|
|
||||||
;; Conditionally select between immediate and source register.
|
;; Conditionally select between immediate and source register.
|
||||||
@@ -2233,7 +2225,7 @@
|
|||||||
(rule (cmov_imm_regpair_lo ty producer cond imm src)
|
(rule (cmov_imm_regpair_lo ty producer cond imm src)
|
||||||
(let ((dst WritableRegPair (copy_writable_regpair src))
|
(let ((dst WritableRegPair (copy_writable_regpair src))
|
||||||
(consumer ConsumesFlags (emit_cmov_imm ty (writable_regpair_lo dst) cond imm))
|
(consumer ConsumesFlags (emit_cmov_imm ty (writable_regpair_lo dst) cond imm))
|
||||||
(_ Reg (with_flags_1 producer consumer)))
|
(_ Reg (with_flags_reg producer consumer)))
|
||||||
(writable_regpair_to_regpair dst)))
|
(writable_regpair_to_regpair dst)))
|
||||||
|
|
||||||
;; Conditionally modify the high word of a register pair.
|
;; Conditionally modify the high word of a register pair.
|
||||||
@@ -2242,22 +2234,22 @@
|
|||||||
(rule (cmov_imm_regpair_hi ty producer cond imm src)
|
(rule (cmov_imm_regpair_hi ty producer cond imm src)
|
||||||
(let ((dst WritableRegPair (copy_writable_regpair src))
|
(let ((dst WritableRegPair (copy_writable_regpair src))
|
||||||
(consumer ConsumesFlags (emit_cmov_imm ty (writable_regpair_hi dst) cond imm))
|
(consumer ConsumesFlags (emit_cmov_imm ty (writable_regpair_hi dst) cond imm))
|
||||||
(_ Reg (with_flags_1 producer consumer)))
|
(_ Reg (with_flags_reg producer consumer)))
|
||||||
(writable_regpair_to_regpair dst)))
|
(writable_regpair_to_regpair dst)))
|
||||||
|
|
||||||
;; Conditionally select between two source registers. (Non-SSA form.)
|
;; Conditionally select between two source registers. (Non-SSA form.)
|
||||||
(decl emit_cmov_reg (Type WritableReg Cond Reg) ConsumesFlags)
|
(decl emit_cmov_reg (Type WritableReg Cond Reg) ConsumesFlags)
|
||||||
(rule (emit_cmov_reg (gpr32_ty _ty) dst cond src)
|
(rule (emit_cmov_reg (gpr32_ty _ty) dst cond src)
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.CMov32 dst cond src)
|
(ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov32 dst cond src)
|
||||||
(writable_reg_to_reg dst)))
|
(writable_reg_to_reg dst)))
|
||||||
(rule (emit_cmov_reg (gpr64_ty _ty) dst cond src)
|
(rule (emit_cmov_reg (gpr64_ty _ty) dst cond src)
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.CMov64 dst cond src)
|
(ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov64 dst cond src)
|
||||||
(writable_reg_to_reg dst)))
|
(writable_reg_to_reg dst)))
|
||||||
(rule (emit_cmov_reg $F32 dst cond src)
|
(rule (emit_cmov_reg $F32 dst cond src)
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.FpuCMov32 dst cond src)
|
(ConsumesFlags.ConsumesFlagsReturnsReg (MInst.FpuCMov32 dst cond src)
|
||||||
(writable_reg_to_reg dst)))
|
(writable_reg_to_reg dst)))
|
||||||
(rule (emit_cmov_reg $F64 dst cond src)
|
(rule (emit_cmov_reg $F64 dst cond src)
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.FpuCMov64 dst cond src)
|
(ConsumesFlags.ConsumesFlagsReturnsReg (MInst.FpuCMov64 dst cond src)
|
||||||
(writable_reg_to_reg dst)))
|
(writable_reg_to_reg dst)))
|
||||||
|
|
||||||
;; Conditionally select between two source registers.
|
;; Conditionally select between two source registers.
|
||||||
@@ -2270,10 +2262,14 @@
|
|||||||
;; Helpers for generating conditional traps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Helpers for generating conditional traps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(decl trap_if (ProducesFlags Cond TrapCode) Reg)
|
(decl trap_if (ProducesFlags Cond TrapCode) Reg)
|
||||||
(rule (trap_if (ProducesFlags.ProducesFlags inst result) cond trap_code)
|
(rule (trap_if (ProducesFlags.ProducesFlagsReturnsReg inst result) cond trap_code)
|
||||||
(let ((_1 Unit (emit inst))
|
(let ((_1 Unit (emit inst))
|
||||||
(_2 Unit (emit (MInst.TrapIf cond trap_code))))
|
(_2 Unit (emit (MInst.TrapIf cond trap_code))))
|
||||||
result))
|
result))
|
||||||
|
(rule (trap_if (ProducesFlags.ProducesFlagsSideEffect inst) cond trap_code)
|
||||||
|
(let ((_1 Unit (emit inst))
|
||||||
|
(_2 Unit (emit (MInst.TrapIf cond trap_code))))
|
||||||
|
(invalid_reg)))
|
||||||
|
|
||||||
(decl icmps_reg_and_trap (Type Reg Reg Cond TrapCode) Reg)
|
(decl icmps_reg_and_trap (Type Reg Reg Cond TrapCode) Reg)
|
||||||
(rule (icmps_reg_and_trap ty src1 src2 cond trap_code)
|
(rule (icmps_reg_and_trap ty src1 src2 cond trap_code)
|
||||||
@@ -2332,9 +2328,9 @@
|
|||||||
;; instruction in between the producer and consumer. (This use is only valid
|
;; instruction in between the producer and consumer. (This use is only valid
|
||||||
;; if that unrelated instruction does not modify the condition code.)
|
;; if that unrelated instruction does not modify the condition code.)
|
||||||
(decl emit_producer (ProducesFlags) Unit)
|
(decl emit_producer (ProducesFlags) Unit)
|
||||||
(rule (emit_producer (ProducesFlags.ProducesFlags insn _)) (emit insn))
|
(rule (emit_producer (ProducesFlags.ProducesFlagsSideEffect insn)) (emit insn))
|
||||||
(decl emit_consumer (ConsumesFlags) Unit)
|
(decl emit_consumer (ConsumesFlags) Unit)
|
||||||
(rule (emit_consumer (ConsumesFlags.ConsumesFlags insn _)) (emit insn))
|
(rule (emit_consumer (ConsumesFlags.ConsumesFlagsReturnsReg insn _)) (emit insn))
|
||||||
|
|
||||||
;; Use a boolean condition to select between two registers.
|
;; Use a boolean condition to select between two registers.
|
||||||
(decl select_bool_reg (Type ProducesBool Reg Reg) Reg)
|
(decl select_bool_reg (Type ProducesBool Reg Reg) Reg)
|
||||||
|
|||||||
@@ -1102,7 +1102,7 @@
|
|||||||
;; result expected by Cranelift semantics. The only exception
|
;; result expected by Cranelift semantics. The only exception
|
||||||
;; it the case where the input was a NaN. We explicitly check
|
;; it the case where the input was a NaN. We explicitly check
|
||||||
;; for that and force the output to 0 in that case.
|
;; for that and force the output to 0 in that case.
|
||||||
(sat Reg (with_flags_1 (fcmp_reg src_ty src src)
|
(sat Reg (with_flags_reg (fcmp_reg src_ty src src)
|
||||||
(cmov_imm dst_ty
|
(cmov_imm dst_ty
|
||||||
(floatcc_as_cond (FloatCC.Unordered)) 0 dst))))
|
(floatcc_as_cond (FloatCC.Unordered)) 0 dst))))
|
||||||
(value_reg sat)))
|
(value_reg sat)))
|
||||||
@@ -1119,7 +1119,7 @@
|
|||||||
;; result expected by Cranelift semantics. The only exception
|
;; result expected by Cranelift semantics. The only exception
|
||||||
;; it the case where the input was a NaN. We explicitly check
|
;; it the case where the input was a NaN. We explicitly check
|
||||||
;; for that and force the output to 0 in that case.
|
;; for that and force the output to 0 in that case.
|
||||||
(sat Reg (with_flags_1 (fcmp_reg src_ty src src)
|
(sat Reg (with_flags_reg (fcmp_reg src_ty src src)
|
||||||
(cmov_imm dst_ty
|
(cmov_imm dst_ty
|
||||||
(floatcc_as_cond (FloatCC.Unordered)) 0 dst))))
|
(floatcc_as_cond (FloatCC.Unordered)) 0 dst))))
|
||||||
(value_reg sat)))
|
(value_reg sat)))
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
src/clif.isle 9ea75a6f790b5c03
|
src/clif.isle 9ea75a6f790b5c03
|
||||||
src/prelude.isle 73285cd431346d53
|
src/prelude.isle 980b300b3ec3e338
|
||||||
src/isa/s390x/inst.isle 87a2d7c0c69d0324
|
src/isa/s390x/inst.isle b0f53fcf0cdadde1
|
||||||
src/isa/s390x/lower.isle 3c124e26bc411983
|
src/isa/s390x/lower.isle 59264a7442cf6e1c
|
||||||
|
|||||||
1688
cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs
generated
1688
cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs
generated
File diff suppressed because it is too large
Load Diff
@@ -149,15 +149,41 @@
|
|||||||
(Setcc (cc CC)
|
(Setcc (cc CC)
|
||||||
(dst WritableGpr))
|
(dst WritableGpr))
|
||||||
|
|
||||||
;; Integer conditional move.
|
;; =========================================
|
||||||
;;
|
;; Conditional moves.
|
||||||
;; Overwrites the destination register.
|
|
||||||
|
;; GPR conditional move; overwrites the destination register.
|
||||||
(Cmove (size OperandSize)
|
(Cmove (size OperandSize)
|
||||||
(cc CC)
|
(cc CC)
|
||||||
(consequent GprMem)
|
(consequent GprMem)
|
||||||
(alternative Gpr)
|
(alternative Gpr)
|
||||||
(dst WritableGpr))
|
(dst WritableGpr))
|
||||||
|
|
||||||
|
;; GPR conditional move with the `OR` of two conditions; overwrites
|
||||||
|
;; the destination register.
|
||||||
|
(CmoveOr (size OperandSize)
|
||||||
|
(cc1 CC)
|
||||||
|
(cc2 CC)
|
||||||
|
(consequent GprMem)
|
||||||
|
(alternative Gpr)
|
||||||
|
(dst WritableGpr))
|
||||||
|
|
||||||
|
;; XMM conditional move; overwrites the destination register.
|
||||||
|
(XmmCmove (size OperandSize)
|
||||||
|
(cc CC)
|
||||||
|
(consequent XmmMem)
|
||||||
|
(alternative Xmm)
|
||||||
|
(dst WritableXmm))
|
||||||
|
|
||||||
|
;; XMM conditional move with the `OR` of two conditions; overwrites
|
||||||
|
;; the destination register.
|
||||||
|
(XmmCmoveOr (size OperandSize)
|
||||||
|
(cc1 CC)
|
||||||
|
(cc2 CC)
|
||||||
|
(consequent XmmMem)
|
||||||
|
(alternative Xmm)
|
||||||
|
(dst WritableXmm))
|
||||||
|
|
||||||
;; =========================================
|
;; =========================================
|
||||||
;; Stack manipulation.
|
;; Stack manipulation.
|
||||||
|
|
||||||
@@ -275,14 +301,6 @@
|
|||||||
(lhs Xmm)
|
(lhs Xmm)
|
||||||
(rhs_dst WritableXmm))
|
(rhs_dst WritableXmm))
|
||||||
|
|
||||||
;; XMM (scalar) conditional move.
|
|
||||||
;;
|
|
||||||
;; Overwrites the destination register if cc is set.
|
|
||||||
(XmmCmove (size OperandSize)
|
|
||||||
(cc CC)
|
|
||||||
(src XmmMem)
|
|
||||||
(dst WritableXmm))
|
|
||||||
|
|
||||||
;; Float comparisons/tests: cmp (b w l q) (reg addr imm) reg.
|
;; Float comparisons/tests: cmp (b w l q) (reg addr imm) reg.
|
||||||
(XmmCmpRmR (op SseOpcode)
|
(XmmCmpRmR (op SseOpcode)
|
||||||
(src XmmMem)
|
(src XmmMem)
|
||||||
@@ -1027,6 +1045,17 @@
|
|||||||
(decl xmm0 () WritableXmm)
|
(decl xmm0 () WritableXmm)
|
||||||
(extern constructor xmm0 xmm0)
|
(extern constructor xmm0 xmm0)
|
||||||
|
|
||||||
|
;;;; Helpers for determining the register class of a value type ;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(decl is_xmm_type (Type) Type)
|
||||||
|
(extern extractor is_xmm_type is_xmm_type)
|
||||||
|
|
||||||
|
(decl is_gpr_type (Type) Type)
|
||||||
|
(extern extractor is_gpr_type is_gpr_type)
|
||||||
|
|
||||||
|
(decl is_single_register_type (Type) Type)
|
||||||
|
(extern extractor is_single_register_type is_single_register_type)
|
||||||
|
|
||||||
;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(decl avx512vl_enabled () Type)
|
(decl avx512vl_enabled () Type)
|
||||||
@@ -1256,10 +1285,11 @@
|
|||||||
src2))
|
src2))
|
||||||
|
|
||||||
;; Helper for creating `add` instructions whose flags are also used.
|
;; Helper for creating `add` instructions whose flags are also used.
|
||||||
(decl add_with_flags (Type Gpr GprMemImm) ProducesFlags)
|
(decl add_with_flags_paired (Type Gpr GprMemImm) ProducesFlags)
|
||||||
(rule (add_with_flags ty src1 src2)
|
(rule (add_with_flags_paired ty src1 src2)
|
||||||
(let ((dst WritableGpr (temp_writable_gpr)))
|
(let ((dst WritableGpr (temp_writable_gpr)))
|
||||||
(ProducesFlags.ProducesFlags (MInst.AluRmiR (operand_size_of_type_32_64 ty)
|
(ProducesFlags.ProducesFlagsReturnsResultWithConsumer
|
||||||
|
(MInst.AluRmiR (operand_size_of_type_32_64 ty)
|
||||||
(AluRmiROpcode.Add)
|
(AluRmiROpcode.Add)
|
||||||
src1
|
src1
|
||||||
src2
|
src2
|
||||||
@@ -1267,10 +1297,11 @@
|
|||||||
(gpr_to_reg (writable_gpr_to_gpr dst)))))
|
(gpr_to_reg (writable_gpr_to_gpr dst)))))
|
||||||
|
|
||||||
;; Helper for creating `adc` instructions.
|
;; Helper for creating `adc` instructions.
|
||||||
(decl adc (Type Gpr GprMemImm) ConsumesFlags)
|
(decl adc_paired (Type Gpr GprMemImm) ConsumesFlags)
|
||||||
(rule (adc ty src1 src2)
|
(rule (adc_paired ty src1 src2)
|
||||||
(let ((dst WritableGpr (temp_writable_gpr)))
|
(let ((dst WritableGpr (temp_writable_gpr)))
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.AluRmiR (operand_size_of_type_32_64 ty)
|
(ConsumesFlags.ConsumesFlagsReturnsResultWithProducer
|
||||||
|
(MInst.AluRmiR (operand_size_of_type_32_64 ty)
|
||||||
(AluRmiROpcode.Adc)
|
(AluRmiROpcode.Adc)
|
||||||
src1
|
src1
|
||||||
src2
|
src2
|
||||||
@@ -1286,10 +1317,11 @@
|
|||||||
src2))
|
src2))
|
||||||
|
|
||||||
;; Helper for creating `sub` instructions whose flags are also used.
|
;; Helper for creating `sub` instructions whose flags are also used.
|
||||||
(decl sub_with_flags (Type Gpr GprMemImm) ProducesFlags)
|
(decl sub_with_flags_paired (Type Gpr GprMemImm) ProducesFlags)
|
||||||
(rule (sub_with_flags ty src1 src2)
|
(rule (sub_with_flags_paired ty src1 src2)
|
||||||
(let ((dst WritableGpr (temp_writable_gpr)))
|
(let ((dst WritableGpr (temp_writable_gpr)))
|
||||||
(ProducesFlags.ProducesFlags (MInst.AluRmiR (operand_size_of_type_32_64 ty)
|
(ProducesFlags.ProducesFlagsReturnsResultWithConsumer
|
||||||
|
(MInst.AluRmiR (operand_size_of_type_32_64 ty)
|
||||||
(AluRmiROpcode.Sub)
|
(AluRmiROpcode.Sub)
|
||||||
src1
|
src1
|
||||||
src2
|
src2
|
||||||
@@ -1297,10 +1329,11 @@
|
|||||||
(gpr_to_reg (writable_gpr_to_gpr dst)))))
|
(gpr_to_reg (writable_gpr_to_gpr dst)))))
|
||||||
|
|
||||||
;; Helper for creating `sbb` instructions.
|
;; Helper for creating `sbb` instructions.
|
||||||
(decl sbb (Type Gpr GprMemImm) ConsumesFlags)
|
(decl sbb_paired (Type Gpr GprMemImm) ConsumesFlags)
|
||||||
(rule (sbb ty src1 src2)
|
(rule (sbb_paired ty src1 src2)
|
||||||
(let ((dst WritableGpr (temp_writable_gpr)))
|
(let ((dst WritableGpr (temp_writable_gpr)))
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.AluRmiR (operand_size_of_type_32_64 ty)
|
(ConsumesFlags.ConsumesFlagsReturnsResultWithProducer
|
||||||
|
(MInst.AluRmiR (operand_size_of_type_32_64 ty)
|
||||||
(AluRmiROpcode.Sbb)
|
(AluRmiROpcode.Sbb)
|
||||||
src1
|
src1
|
||||||
src2
|
src2
|
||||||
@@ -1456,30 +1489,129 @@
|
|||||||
;; Helper for creating `MInst.CmpRmiR` instructions.
|
;; Helper for creating `MInst.CmpRmiR` instructions.
|
||||||
(decl cmp_rmi_r (OperandSize CmpOpcode GprMemImm Gpr) ProducesFlags)
|
(decl cmp_rmi_r (OperandSize CmpOpcode GprMemImm Gpr) ProducesFlags)
|
||||||
(rule (cmp_rmi_r size opcode src1 src2)
|
(rule (cmp_rmi_r size opcode src1 src2)
|
||||||
(ProducesFlags.ProducesFlags (MInst.CmpRmiR size
|
(ProducesFlags.ProducesFlagsSideEffect
|
||||||
|
(MInst.CmpRmiR size
|
||||||
opcode
|
opcode
|
||||||
src1
|
src1
|
||||||
src2)
|
src2)))
|
||||||
(invalid_reg)))
|
|
||||||
|
|
||||||
;; Helper for creating `cmp` instructions.
|
;; Helper for creating `cmp` instructions.
|
||||||
(decl cmp (OperandSize GprMemImm Gpr) ProducesFlags)
|
(decl cmp (OperandSize GprMemImm Gpr) ProducesFlags)
|
||||||
(rule (cmp size src1 src2)
|
(rule (cmp size src1 src2)
|
||||||
(cmp_rmi_r size (CmpOpcode.Cmp) src1 src2))
|
(cmp_rmi_r size (CmpOpcode.Cmp) src1 src2))
|
||||||
|
|
||||||
|
;; Helper for creating `MInst.XmmCmpRmR` instructions.
|
||||||
|
(decl xmm_cmp_rm_r (SseOpcode XmmMem Xmm) ProducesFlags)
|
||||||
|
(rule (xmm_cmp_rm_r opcode src1 src2)
|
||||||
|
(ProducesFlags.ProducesFlagsSideEffect
|
||||||
|
(MInst.XmmCmpRmR opcode src1 src2)))
|
||||||
|
|
||||||
|
;; Helper for creating `fpcmp` instructions (cannot use `fcmp` as it is taken by
|
||||||
|
;; `clif.isle`).
|
||||||
|
(decl fpcmp (Value Value) ProducesFlags)
|
||||||
|
(rule (fpcmp src1 @ (value_type $F32) src2)
|
||||||
|
(xmm_cmp_rm_r (SseOpcode.Ucomiss) (put_in_xmm_mem src1) (put_in_xmm src2)))
|
||||||
|
(rule (fpcmp src1 @ (value_type $F64) src2)
|
||||||
|
(xmm_cmp_rm_r (SseOpcode.Ucomisd) (put_in_xmm_mem src1) (put_in_xmm src2)))
|
||||||
|
|
||||||
;; Helper for creating `test` instructions.
|
;; Helper for creating `test` instructions.
|
||||||
(decl test (OperandSize GprMemImm Gpr) ProducesFlags)
|
(decl test (OperandSize GprMemImm Gpr) ProducesFlags)
|
||||||
(rule (test size src1 src2)
|
(rule (test size src1 src2)
|
||||||
(cmp_rmi_r size (CmpOpcode.Test) src1 src2))
|
(cmp_rmi_r size (CmpOpcode.Test) src1 src2))
|
||||||
|
|
||||||
;; Helper for creating `MInst.Cmove` instructions.
|
;; Helper for creating `cmove` instructions. Note that these instructions do not
|
||||||
|
;; always result in a single emitted x86 instruction; e.g., XmmCmove uses jumps
|
||||||
|
;; to conditionally move the selected value into an XMM register.
|
||||||
(decl cmove (Type CC GprMem Gpr) ConsumesFlags)
|
(decl cmove (Type CC GprMem Gpr) ConsumesFlags)
|
||||||
(rule (cmove ty cc consequent alternative)
|
(rule (cmove ty cc consequent alternative)
|
||||||
(let ((dst WritableGpr (temp_writable_gpr))
|
(let ((dst WritableGpr (temp_writable_gpr))
|
||||||
(size OperandSize (operand_size_of_type_32_64 ty)))
|
(size OperandSize (operand_size_of_type_32_64 ty)))
|
||||||
(ConsumesFlags.ConsumesFlags (MInst.Cmove size cc consequent alternative dst)
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.Cmove size cc consequent alternative dst)
|
||||||
(gpr_to_reg (writable_gpr_to_gpr dst)))))
|
(gpr_to_reg (writable_gpr_to_gpr dst)))))
|
||||||
|
|
||||||
|
(decl cmove_xmm (Type CC XmmMem Xmm) ConsumesFlags)
|
||||||
|
(rule (cmove_xmm ty cc consequent alternative)
|
||||||
|
(let ((dst WritableXmm (temp_writable_xmm))
|
||||||
|
(size OperandSize (operand_size_of_type_32_64 ty)))
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.XmmCmove size cc consequent alternative dst)
|
||||||
|
(xmm_to_reg (writable_xmm_to_xmm dst)))))
|
||||||
|
|
||||||
|
;; Helper for creating `cmove` instructions directly from values. This allows us
|
||||||
|
;; to special-case the `I128` types and default to the `cmove` helper otherwise.
|
||||||
|
;; It also eliminates some `put_in_reg*` boilerplate in the lowering ISLE code.
|
||||||
|
(decl cmove_from_values (Type CC Value Value) ConsumesFlags)
|
||||||
|
(rule (cmove_from_values $I128 cc consequent alternative)
|
||||||
|
(let ((cons ValueRegs (put_in_regs consequent))
|
||||||
|
(alt ValueRegs (put_in_regs alternative))
|
||||||
|
(dst1 WritableGpr (temp_writable_gpr))
|
||||||
|
(dst2 WritableGpr (temp_writable_gpr))
|
||||||
|
(size OperandSize (OperandSize.Size64))
|
||||||
|
(lower_cmove MInst (MInst.Cmove
|
||||||
|
size cc
|
||||||
|
(gpr_to_gpr_mem (value_regs_get_gpr cons 0))
|
||||||
|
(value_regs_get_gpr alt 0) dst1))
|
||||||
|
(upper_cmove MInst (MInst.Cmove
|
||||||
|
size cc
|
||||||
|
(gpr_to_gpr_mem (value_regs_get_gpr cons 1))
|
||||||
|
(value_regs_get_gpr alt 1) dst2)))
|
||||||
|
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
|
||||||
|
lower_cmove
|
||||||
|
upper_cmove
|
||||||
|
(value_regs
|
||||||
|
(gpr_to_reg (writable_gpr_to_gpr dst1))
|
||||||
|
(gpr_to_reg (writable_gpr_to_gpr dst2))))))
|
||||||
|
|
||||||
|
(rule (cmove_from_values (is_gpr_type (is_single_register_type ty)) cc consequent alternative)
|
||||||
|
(cmove ty cc (put_in_gpr_mem consequent) (put_in_gpr alternative)))
|
||||||
|
|
||||||
|
(rule (cmove_from_values (is_xmm_type (is_single_register_type ty)) cc consequent alternative)
|
||||||
|
(cmove_xmm ty cc (put_in_xmm_mem consequent) (put_in_xmm alternative)))
|
||||||
|
|
||||||
|
;; Helper for creating `cmove` instructions with the logical OR of multiple
|
||||||
|
;; flags. Note that these instructions will always result in more than one
|
||||||
|
;; emitted x86 instruction.
|
||||||
|
(decl cmove_or (Type CC CC GprMem Gpr) ConsumesFlags)
|
||||||
|
(rule (cmove_or ty cc1 cc2 consequent alternative)
|
||||||
|
(let ((dst WritableGpr (temp_writable_gpr))
|
||||||
|
(size OperandSize (operand_size_of_type_32_64 ty)))
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.CmoveOr size cc1 cc2 consequent alternative dst)
|
||||||
|
(gpr_to_reg (writable_gpr_to_gpr dst)))))
|
||||||
|
|
||||||
|
(decl cmove_or_xmm (Type CC CC XmmMem Xmm) ConsumesFlags)
|
||||||
|
(rule (cmove_or_xmm ty cc1 cc2 consequent alternative)
|
||||||
|
(let ((dst WritableXmm (temp_writable_xmm))
|
||||||
|
(size OperandSize (operand_size_of_type_32_64 ty)))
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
|
(MInst.XmmCmoveOr size cc1 cc2 consequent alternative dst)
|
||||||
|
(xmm_to_reg (writable_xmm_to_xmm dst)))))
|
||||||
|
|
||||||
|
;; Helper for creating `cmove_or` instructions directly from values. This allows
|
||||||
|
;; us to special-case the `I128` types and default to the `cmove_or` helper
|
||||||
|
;; otherwise.
|
||||||
|
(decl cmove_or_from_values (Type CC CC Value Value) ConsumesFlags)
|
||||||
|
(rule (cmove_or_from_values $I128 cc1 cc2 consequent alternative)
|
||||||
|
(let ((cons ValueRegs (put_in_regs consequent))
|
||||||
|
(alt ValueRegs (put_in_regs alternative))
|
||||||
|
(dst1 WritableGpr (temp_writable_gpr))
|
||||||
|
(dst2 WritableGpr (temp_writable_gpr))
|
||||||
|
(size OperandSize (OperandSize.Size64))
|
||||||
|
(lower_cmove MInst (MInst.CmoveOr size cc1 cc2 (gpr_to_gpr_mem (value_regs_get_gpr cons 0)) (value_regs_get_gpr alt 0) dst1))
|
||||||
|
(upper_cmove MInst (MInst.CmoveOr size cc1 cc2 (gpr_to_gpr_mem (value_regs_get_gpr cons 1)) (value_regs_get_gpr alt 1) dst2)))
|
||||||
|
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
|
||||||
|
lower_cmove
|
||||||
|
upper_cmove
|
||||||
|
(value_regs (gpr_to_reg (writable_gpr_to_gpr dst1))
|
||||||
|
(gpr_to_reg (writable_gpr_to_gpr dst2))))))
|
||||||
|
|
||||||
|
(rule (cmove_or_from_values (is_gpr_type (is_single_register_type ty)) cc1 cc2 consequent alternative)
|
||||||
|
(cmove_or ty cc1 cc2 (put_in_gpr_mem consequent) (put_in_gpr alternative)))
|
||||||
|
|
||||||
|
(rule (cmove_or_from_values (is_xmm_type (is_single_register_type ty)) cc1 cc2 consequent alternative)
|
||||||
|
(cmove_or_xmm ty cc1 cc2 (put_in_xmm_mem consequent) (put_in_xmm alternative)))
|
||||||
|
|
||||||
;; Helper for creating `MInst.MovzxRmR` instructions.
|
;; Helper for creating `MInst.MovzxRmR` instructions.
|
||||||
(decl movzx (Type ExtMode GprMem) Gpr)
|
(decl movzx (Type ExtMode GprMem) Gpr)
|
||||||
(rule (movzx ty mode src)
|
(rule (movzx ty mode src)
|
||||||
|
|||||||
@@ -1064,9 +1064,9 @@ pub(crate) fn emit(
|
|||||||
cc,
|
cc,
|
||||||
consequent,
|
consequent,
|
||||||
alternative,
|
alternative,
|
||||||
dst: reg_g,
|
dst,
|
||||||
} => {
|
} => {
|
||||||
debug_assert_eq!(*alternative, reg_g.to_reg());
|
debug_assert_eq!(*alternative, dst.to_reg());
|
||||||
let rex_flags = RexFlags::from(*size);
|
let rex_flags = RexFlags::from(*size);
|
||||||
let prefix = match size {
|
let prefix = match size {
|
||||||
OperandSize::Size16 => LegacyPrefixes::_66,
|
OperandSize::Size16 => LegacyPrefixes::_66,
|
||||||
@@ -1076,14 +1076,14 @@ pub(crate) fn emit(
|
|||||||
};
|
};
|
||||||
let opcode = 0x0F40 + cc.get_enc() as u32;
|
let opcode = 0x0F40 + cc.get_enc() as u32;
|
||||||
match consequent.clone().to_reg_mem() {
|
match consequent.clone().to_reg_mem() {
|
||||||
RegMem::Reg { reg: reg_e } => {
|
RegMem::Reg { reg } => {
|
||||||
emit_std_reg_reg(
|
emit_std_reg_reg(
|
||||||
sink,
|
sink,
|
||||||
prefix,
|
prefix,
|
||||||
opcode,
|
opcode,
|
||||||
2,
|
2,
|
||||||
reg_g.to_reg().to_reg(),
|
dst.to_reg().to_reg(),
|
||||||
reg_e,
|
reg,
|
||||||
rex_flags,
|
rex_flags,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -1096,7 +1096,7 @@ pub(crate) fn emit(
|
|||||||
prefix,
|
prefix,
|
||||||
opcode,
|
opcode,
|
||||||
2,
|
2,
|
||||||
reg_g.to_reg().to_reg(),
|
dst.to_reg().to_reg(),
|
||||||
addr,
|
addr,
|
||||||
rex_flags,
|
rex_flags,
|
||||||
);
|
);
|
||||||
@@ -1104,7 +1104,42 @@ pub(crate) fn emit(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::XmmCmove { size, cc, src, dst } => {
|
Inst::CmoveOr {
|
||||||
|
size,
|
||||||
|
cc1,
|
||||||
|
cc2,
|
||||||
|
consequent,
|
||||||
|
alternative,
|
||||||
|
dst,
|
||||||
|
} => {
|
||||||
|
let first_cmove = Inst::Cmove {
|
||||||
|
cc: *cc1,
|
||||||
|
size: *size,
|
||||||
|
consequent: consequent.clone(),
|
||||||
|
alternative: alternative.clone(),
|
||||||
|
dst: dst.clone(),
|
||||||
|
};
|
||||||
|
first_cmove.emit(sink, info, state);
|
||||||
|
|
||||||
|
let second_cmove = Inst::Cmove {
|
||||||
|
cc: *cc2,
|
||||||
|
size: *size,
|
||||||
|
consequent: consequent.clone(),
|
||||||
|
alternative: alternative.clone(),
|
||||||
|
dst: dst.clone(),
|
||||||
|
};
|
||||||
|
second_cmove.emit(sink, info, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
Inst::XmmCmove {
|
||||||
|
size,
|
||||||
|
cc,
|
||||||
|
consequent,
|
||||||
|
alternative,
|
||||||
|
dst,
|
||||||
|
} => {
|
||||||
|
debug_assert_eq!(*alternative, dst.to_reg());
|
||||||
|
|
||||||
// Lowering of the Select IR opcode when the input is an fcmp relies on the fact that
|
// Lowering of the Select IR opcode when the input is an fcmp relies on the fact that
|
||||||
// this doesn't clobber flags. Make sure to not do so here.
|
// this doesn't clobber flags. Make sure to not do so here.
|
||||||
let next = sink.get_label();
|
let next = sink.get_label();
|
||||||
@@ -1117,12 +1152,46 @@ pub(crate) fn emit(
|
|||||||
} else {
|
} else {
|
||||||
SseOpcode::Movss
|
SseOpcode::Movss
|
||||||
};
|
};
|
||||||
let inst = Inst::xmm_unary_rm_r(op, src.clone().to_reg_mem(), dst.to_writable_reg());
|
let inst =
|
||||||
|
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
|
||||||
inst.emit(sink, info, state);
|
inst.emit(sink, info, state);
|
||||||
|
|
||||||
sink.bind_label(next);
|
sink.bind_label(next);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmCmoveOr {
|
||||||
|
size,
|
||||||
|
cc1,
|
||||||
|
cc2,
|
||||||
|
consequent,
|
||||||
|
alternative,
|
||||||
|
dst,
|
||||||
|
} => {
|
||||||
|
debug_assert_eq!(*alternative, dst.to_reg());
|
||||||
|
|
||||||
|
let op = if *size == OperandSize::Size64 {
|
||||||
|
SseOpcode::Movsd
|
||||||
|
} else {
|
||||||
|
SseOpcode::Movss
|
||||||
|
};
|
||||||
|
let second_test = sink.get_label();
|
||||||
|
let next_instruction = sink.get_label();
|
||||||
|
|
||||||
|
// Jump to second test if `cc1` is *not* set.
|
||||||
|
one_way_jmp(sink, cc1.invert(), next_instruction);
|
||||||
|
let inst =
|
||||||
|
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
|
||||||
|
inst.emit(sink, info, state);
|
||||||
|
sink.bind_label(second_test);
|
||||||
|
|
||||||
|
// Jump to next instruction if `cc2` is *not* set.
|
||||||
|
one_way_jmp(sink, cc2.invert(), next_instruction);
|
||||||
|
let inst =
|
||||||
|
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
|
||||||
|
inst.emit(sink, info, state);
|
||||||
|
sink.bind_label(next_instruction);
|
||||||
|
}
|
||||||
|
|
||||||
Inst::Push64 { src } => {
|
Inst::Push64 { src } => {
|
||||||
if info.flags.enable_probestack() {
|
if info.flags.enable_probestack() {
|
||||||
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
|
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ impl Inst {
|
|||||||
| Inst::CallUnknown { .. }
|
| Inst::CallUnknown { .. }
|
||||||
| Inst::CheckedDivOrRemSeq { .. }
|
| Inst::CheckedDivOrRemSeq { .. }
|
||||||
| Inst::Cmove { .. }
|
| Inst::Cmove { .. }
|
||||||
|
| Inst::CmoveOr { .. }
|
||||||
| Inst::CmpRmiR { .. }
|
| Inst::CmpRmiR { .. }
|
||||||
| Inst::CvtFloatToSintSeq { .. }
|
| Inst::CvtFloatToSintSeq { .. }
|
||||||
| Inst::CvtFloatToUintSeq { .. }
|
| Inst::CvtFloatToUintSeq { .. }
|
||||||
@@ -88,6 +89,7 @@ impl Inst {
|
|||||||
| Inst::Ud2 { .. }
|
| Inst::Ud2 { .. }
|
||||||
| Inst::VirtualSPOffsetAdj { .. }
|
| Inst::VirtualSPOffsetAdj { .. }
|
||||||
| Inst::XmmCmove { .. }
|
| Inst::XmmCmove { .. }
|
||||||
|
| Inst::XmmCmoveOr { .. }
|
||||||
| Inst::XmmCmpRmR { .. }
|
| Inst::XmmCmpRmR { .. }
|
||||||
| Inst::XmmLoadConst { .. }
|
| Inst::XmmLoadConst { .. }
|
||||||
| Inst::XmmMinMaxSeq { .. }
|
| Inst::XmmMinMaxSeq { .. }
|
||||||
@@ -629,7 +631,13 @@ impl Inst {
|
|||||||
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
||||||
let src = XmmMem::new(src).unwrap();
|
let src = XmmMem::new(src).unwrap();
|
||||||
let dst = WritableXmm::from_writable_reg(dst).unwrap();
|
let dst = WritableXmm::from_writable_reg(dst).unwrap();
|
||||||
Inst::XmmCmove { size, cc, src, dst }
|
Inst::XmmCmove {
|
||||||
|
size,
|
||||||
|
cc,
|
||||||
|
consequent: src,
|
||||||
|
alternative: dst.to_reg(),
|
||||||
|
dst,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn push64(src: RegMemImm) -> Inst {
|
pub(crate) fn push64(src: RegMemImm) -> Inst {
|
||||||
@@ -898,6 +906,12 @@ impl Inst {
|
|||||||
alternative,
|
alternative,
|
||||||
dst,
|
dst,
|
||||||
..
|
..
|
||||||
|
}
|
||||||
|
| Inst::CmoveOr {
|
||||||
|
size,
|
||||||
|
alternative,
|
||||||
|
dst,
|
||||||
|
..
|
||||||
} => {
|
} => {
|
||||||
if *alternative != dst.to_reg() {
|
if *alternative != dst.to_reg() {
|
||||||
debug_assert!(alternative.is_virtual());
|
debug_assert!(alternative.is_virtual());
|
||||||
@@ -910,6 +924,23 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
insts.push(self);
|
insts.push(self);
|
||||||
}
|
}
|
||||||
|
Inst::XmmCmove {
|
||||||
|
alternative, dst, ..
|
||||||
|
}
|
||||||
|
| Inst::XmmCmoveOr {
|
||||||
|
alternative, dst, ..
|
||||||
|
} => {
|
||||||
|
if *alternative != dst.to_reg() {
|
||||||
|
debug_assert!(alternative.is_virtual());
|
||||||
|
insts.push(Self::gen_move(
|
||||||
|
dst.to_writable_reg(),
|
||||||
|
alternative.to_reg(),
|
||||||
|
types::F32X4,
|
||||||
|
));
|
||||||
|
*alternative = dst.to_reg();
|
||||||
|
}
|
||||||
|
insts.push(self);
|
||||||
|
}
|
||||||
Inst::Not { src, dst, .. } | Inst::Neg { src, dst, .. } => {
|
Inst::Not { src, dst, .. } | Inst::Neg { src, dst, .. } => {
|
||||||
if *src != dst.to_reg() {
|
if *src != dst.to_reg() {
|
||||||
debug_assert!(src.is_virtual());
|
debug_assert!(src.is_virtual());
|
||||||
@@ -1588,7 +1619,34 @@ impl PrettyPrint for Inst {
|
|||||||
show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
|
show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
|
||||||
),
|
),
|
||||||
|
|
||||||
Inst::XmmCmove { size, cc, src, dst } => {
|
Inst::CmoveOr {
|
||||||
|
size,
|
||||||
|
cc1,
|
||||||
|
cc2,
|
||||||
|
consequent: src,
|
||||||
|
alternative: _,
|
||||||
|
dst,
|
||||||
|
} => {
|
||||||
|
let src = src.show_rru_sized(mb_rru, size.to_bytes());
|
||||||
|
let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes());
|
||||||
|
format!(
|
||||||
|
"{} {}, {}; {} {}, {}",
|
||||||
|
ljustify(format!("cmov{}{}", cc1.to_string(), suffix_bwlq(*size))),
|
||||||
|
src,
|
||||||
|
dst,
|
||||||
|
ljustify(format!("cmov{}{}", cc2.to_string(), suffix_bwlq(*size))),
|
||||||
|
src,
|
||||||
|
dst,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
Inst::XmmCmove {
|
||||||
|
size,
|
||||||
|
cc,
|
||||||
|
consequent: src,
|
||||||
|
dst,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
format!(
|
format!(
|
||||||
"j{} $next; mov{} {}, {}; $next: ",
|
"j{} $next; mov{} {}, {}; $next: ",
|
||||||
cc.invert().to_string(),
|
cc.invert().to_string(),
|
||||||
@@ -1602,6 +1660,34 @@ impl PrettyPrint for Inst {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmCmoveOr {
|
||||||
|
size,
|
||||||
|
cc1,
|
||||||
|
cc2,
|
||||||
|
consequent: src,
|
||||||
|
dst,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
let suffix = if *size == OperandSize::Size64 {
|
||||||
|
"sd"
|
||||||
|
} else {
|
||||||
|
"ss"
|
||||||
|
};
|
||||||
|
let src = src.show_rru_sized(mb_rru, size.to_bytes());
|
||||||
|
let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes());
|
||||||
|
format!(
|
||||||
|
"j{} $check; mov{} {}, {}; $check: j{} $next; mov{} {}, {}; $next",
|
||||||
|
cc1.invert().to_string(),
|
||||||
|
suffix,
|
||||||
|
src,
|
||||||
|
dst,
|
||||||
|
cc2.invert().to_string(),
|
||||||
|
suffix,
|
||||||
|
src,
|
||||||
|
dst,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
Inst::Push64 { src } => {
|
Inst::Push64 { src } => {
|
||||||
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
|
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
|
||||||
}
|
}
|
||||||
@@ -2000,11 +2086,25 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
consequent: src,
|
consequent: src,
|
||||||
dst,
|
dst,
|
||||||
..
|
..
|
||||||
|
}
|
||||||
|
| Inst::CmoveOr {
|
||||||
|
consequent: src,
|
||||||
|
dst,
|
||||||
|
..
|
||||||
} => {
|
} => {
|
||||||
src.get_regs_as_uses(collector);
|
src.get_regs_as_uses(collector);
|
||||||
collector.add_mod(dst.to_writable_reg());
|
collector.add_mod(dst.to_writable_reg());
|
||||||
}
|
}
|
||||||
Inst::XmmCmove { src, dst, .. } => {
|
Inst::XmmCmove {
|
||||||
|
consequent: src,
|
||||||
|
dst,
|
||||||
|
..
|
||||||
|
}
|
||||||
|
| Inst::XmmCmoveOr {
|
||||||
|
consequent: src,
|
||||||
|
dst,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
src.get_regs_as_uses(collector);
|
src.get_regs_as_uses(collector);
|
||||||
collector.add_mod(dst.to_writable_reg());
|
collector.add_mod(dst.to_writable_reg());
|
||||||
}
|
}
|
||||||
@@ -2454,18 +2554,32 @@ pub(crate) fn x64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
|
|||||||
ref mut dst,
|
ref mut dst,
|
||||||
ref mut alternative,
|
ref mut alternative,
|
||||||
..
|
..
|
||||||
|
}
|
||||||
|
| Inst::CmoveOr {
|
||||||
|
consequent: ref mut src,
|
||||||
|
ref mut dst,
|
||||||
|
ref mut alternative,
|
||||||
|
..
|
||||||
} => {
|
} => {
|
||||||
src.map_uses(mapper);
|
src.map_uses(mapper);
|
||||||
dst.map_mod(mapper);
|
dst.map_mod(mapper);
|
||||||
*alternative = dst.to_reg();
|
*alternative = dst.to_reg();
|
||||||
}
|
}
|
||||||
Inst::XmmCmove {
|
Inst::XmmCmove {
|
||||||
ref mut src,
|
consequent: ref mut src,
|
||||||
ref mut dst,
|
ref mut dst,
|
||||||
|
ref mut alternative,
|
||||||
|
..
|
||||||
|
}
|
||||||
|
| Inst::XmmCmoveOr {
|
||||||
|
consequent: ref mut src,
|
||||||
|
ref mut dst,
|
||||||
|
ref mut alternative,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
src.map_uses(mapper);
|
src.map_uses(mapper);
|
||||||
dst.map_mod(mapper);
|
dst.map_mod(mapper);
|
||||||
|
*alternative = dst.to_reg();
|
||||||
}
|
}
|
||||||
Inst::Push64 { ref mut src } => src.map_uses(mapper),
|
Inst::Push64 { ref mut src } => src.map_uses(mapper),
|
||||||
Inst::Pop64 { ref mut dst } => {
|
Inst::Pop64 { ref mut dst } => {
|
||||||
|
|||||||
@@ -124,8 +124,8 @@
|
|||||||
(y_lo Gpr (value_regs_get_gpr y_regs 0))
|
(y_lo Gpr (value_regs_get_gpr y_regs 0))
|
||||||
(y_hi Gpr (value_regs_get_gpr y_regs 1)))
|
(y_hi Gpr (value_regs_get_gpr y_regs 1)))
|
||||||
;; Do an add followed by an add-with-carry.
|
;; Do an add followed by an add-with-carry.
|
||||||
(with_flags (add_with_flags $I64 x_lo (gpr_to_gpr_mem_imm y_lo))
|
(with_flags (add_with_flags_paired $I64 x_lo (gpr_to_gpr_mem_imm y_lo))
|
||||||
(adc $I64 x_hi (gpr_to_gpr_mem_imm y_hi))))))
|
(adc_paired $I64 x_hi (gpr_to_gpr_mem_imm y_hi))))))
|
||||||
|
|
||||||
;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -225,8 +225,8 @@
|
|||||||
(let ((y_regs ValueRegs (put_in_regs y))
|
(let ((y_regs ValueRegs (put_in_regs y))
|
||||||
(y_lo Gpr (value_regs_get_gpr y_regs 0))
|
(y_lo Gpr (value_regs_get_gpr y_regs 0))
|
||||||
(y_hi Gpr (value_regs_get_gpr y_regs 1)))
|
(y_hi Gpr (value_regs_get_gpr y_regs 1)))
|
||||||
(with_flags (add_with_flags $I64 y_lo x)
|
(with_flags (add_with_flags_paired $I64 y_lo x)
|
||||||
(adc $I64 y_hi (gpr_mem_imm_new (RegMemImm.Imm 0))))))
|
(adc_paired $I64 y_hi (gpr_mem_imm_new (RegMemImm.Imm 0))))))
|
||||||
|
|
||||||
;; Otherwise, put the immediate into a register.
|
;; Otherwise, put the immediate into a register.
|
||||||
(rule (lower (has_type $I128 (iadd_imm y (u64_from_imm64 x))))
|
(rule (lower (has_type $I128 (iadd_imm y (u64_from_imm64 x))))
|
||||||
@@ -234,8 +234,8 @@
|
|||||||
(y_lo Gpr (value_regs_get_gpr y_regs 0))
|
(y_lo Gpr (value_regs_get_gpr y_regs 0))
|
||||||
(y_hi Gpr (value_regs_get_gpr y_regs 1))
|
(y_hi Gpr (value_regs_get_gpr y_regs 1))
|
||||||
(x_lo Gpr (gpr_new (imm $I64 x))))
|
(x_lo Gpr (gpr_new (imm $I64 x))))
|
||||||
(with_flags (add_with_flags $I64 y_lo (gpr_to_gpr_mem_imm x_lo))
|
(with_flags (add_with_flags_paired $I64 y_lo (gpr_to_gpr_mem_imm x_lo))
|
||||||
(adc $I64 y_hi (gpr_mem_imm_new (RegMemImm.Imm 0))))))
|
(adc_paired $I64 y_hi (gpr_mem_imm_new (RegMemImm.Imm 0))))))
|
||||||
|
|
||||||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -293,8 +293,8 @@
|
|||||||
(y_lo Gpr (value_regs_get_gpr y_regs 0))
|
(y_lo Gpr (value_regs_get_gpr y_regs 0))
|
||||||
(y_hi Gpr (value_regs_get_gpr y_regs 1)))
|
(y_hi Gpr (value_regs_get_gpr y_regs 1)))
|
||||||
;; Do a sub followed by an sub-with-borrow.
|
;; Do a sub followed by an sub-with-borrow.
|
||||||
(with_flags (sub_with_flags $I64 x_lo (gpr_to_gpr_mem_imm y_lo))
|
(with_flags (sub_with_flags_paired $I64 x_lo (gpr_to_gpr_mem_imm y_lo))
|
||||||
(sbb $I64 x_hi (gpr_to_gpr_mem_imm y_hi))))))
|
(sbb_paired $I64 x_hi (gpr_to_gpr_mem_imm y_hi))))))
|
||||||
|
|
||||||
;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -562,7 +562,7 @@
|
|||||||
(gpr_to_gpr_mem_imm amt)))))
|
(gpr_to_gpr_mem_imm amt)))))
|
||||||
(zero Gpr (gpr_new (imm $I64 0)))
|
(zero Gpr (gpr_new (imm $I64 0)))
|
||||||
;; Nullify the carry if we are shifting in by a multiple of 128.
|
;; Nullify the carry if we are shifting in by a multiple of 128.
|
||||||
(carry_ Gpr (gpr_new (with_flags_1 (test (OperandSize.Size64)
|
(carry_ Gpr (gpr_new (with_flags_reg (test (OperandSize.Size64)
|
||||||
(gpr_mem_imm_new (RegMemImm.Imm 127))
|
(gpr_mem_imm_new (RegMemImm.Imm 127))
|
||||||
amt)
|
amt)
|
||||||
(cmove $I64
|
(cmove $I64
|
||||||
@@ -574,11 +574,10 @@
|
|||||||
;; Combine the two shifted halves. However, if we are shifting by >= 64
|
;; Combine the two shifted halves. However, if we are shifting by >= 64
|
||||||
;; (modulo 128), then the low bits are zero and the high bits are our
|
;; (modulo 128), then the low bits are zero and the high bits are our
|
||||||
;; low bits.
|
;; low bits.
|
||||||
(with_flags_2 (test (OperandSize.Size64)
|
(with_flags (test (OperandSize.Size64) (gpr_mem_imm_new (RegMemImm.Imm 64)) amt)
|
||||||
(gpr_mem_imm_new (RegMemImm.Imm 64))
|
(consumes_flags_concat
|
||||||
amt)
|
|
||||||
(cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted) zero)
|
(cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted) zero)
|
||||||
(cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted_) lo_shifted))))
|
(cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted_) lo_shifted)))))
|
||||||
|
|
||||||
(rule (lower (has_type $I128 (ishl src amt)))
|
(rule (lower (has_type $I128 (ishl src amt)))
|
||||||
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
||||||
@@ -674,23 +673,17 @@
|
|||||||
(gpr_new (imm $I64 64))
|
(gpr_new (imm $I64 64))
|
||||||
(gpr_to_gpr_mem_imm amt)))))
|
(gpr_to_gpr_mem_imm amt)))))
|
||||||
;; Nullify the carry if we are shifting by a multiple of 128.
|
;; Nullify the carry if we are shifting by a multiple of 128.
|
||||||
(carry_ Gpr (gpr_new (with_flags_1 (test (OperandSize.Size64)
|
(carry_ Gpr (gpr_new (with_flags_reg (test (OperandSize.Size64) (gpr_mem_imm_new (RegMemImm.Imm 127)) amt)
|
||||||
(gpr_mem_imm_new (RegMemImm.Imm 127))
|
(cmove $I64 (CC.Z) (gpr_to_gpr_mem (gpr_new (imm $I64 0))) carry))))
|
||||||
amt)
|
|
||||||
(cmove $I64
|
|
||||||
(CC.Z)
|
|
||||||
(gpr_to_gpr_mem (gpr_new (imm $I64 0)))
|
|
||||||
carry))))
|
|
||||||
;; Add the carry bits into the lo.
|
;; Add the carry bits into the lo.
|
||||||
(lo_shifted_ Gpr (or $I64 carry_ (gpr_to_gpr_mem_imm lo_shifted))))
|
(lo_shifted_ Gpr (or $I64 carry_ (gpr_to_gpr_mem_imm lo_shifted))))
|
||||||
;; Combine the two shifted halves. However, if we are shifting by >= 64
|
;; Combine the two shifted halves. However, if we are shifting by >= 64
|
||||||
;; (modulo 128), then the hi bits are zero and the lo bits are what
|
;; (modulo 128), then the hi bits are zero and the lo bits are what
|
||||||
;; would otherwise be our hi bits.
|
;; would otherwise be our hi bits.
|
||||||
(with_flags_2 (test (OperandSize.Size64)
|
(with_flags (test (OperandSize.Size64) (gpr_mem_imm_new (RegMemImm.Imm 64)) amt)
|
||||||
(gpr_mem_imm_new (RegMemImm.Imm 64))
|
(consumes_flags_concat
|
||||||
amt)
|
|
||||||
(cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted_) hi_shifted)
|
(cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted_) hi_shifted)
|
||||||
(cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted) (gpr_new (imm $I64 0))))))
|
(cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted) (gpr_new (imm $I64 0)))))))
|
||||||
|
|
||||||
(rule (lower (has_type $I128 (ushr src amt)))
|
(rule (lower (has_type $I128 (ushr src amt)))
|
||||||
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
||||||
@@ -787,13 +780,8 @@
|
|||||||
(gpr_new (imm $I64 64))
|
(gpr_new (imm $I64 64))
|
||||||
(gpr_to_gpr_mem_imm amt)))))
|
(gpr_to_gpr_mem_imm amt)))))
|
||||||
;; Nullify the carry if we are shifting by a multiple of 128.
|
;; Nullify the carry if we are shifting by a multiple of 128.
|
||||||
(carry_ Gpr (gpr_new (with_flags_1 (test (OperandSize.Size64)
|
(carry_ Gpr (gpr_new (with_flags_reg (test (OperandSize.Size64) (gpr_mem_imm_new (RegMemImm.Imm 127)) amt)
|
||||||
(gpr_mem_imm_new (RegMemImm.Imm 127))
|
(cmove $I64 (CC.Z) (gpr_to_gpr_mem (gpr_new (imm $I64 0))) carry))))
|
||||||
amt)
|
|
||||||
(cmove $I64
|
|
||||||
(CC.Z)
|
|
||||||
(gpr_to_gpr_mem (gpr_new (imm $I64 0)))
|
|
||||||
carry))))
|
|
||||||
;; Add the carry into the low half.
|
;; Add the carry into the low half.
|
||||||
(lo_shifted_ Gpr (or $I64 lo_shifted (gpr_to_gpr_mem_imm carry_)))
|
(lo_shifted_ Gpr (or $I64 lo_shifted (gpr_to_gpr_mem_imm carry_)))
|
||||||
;; Get all sign bits.
|
;; Get all sign bits.
|
||||||
@@ -801,11 +789,10 @@
|
|||||||
;; Combine the two shifted halves. However, if we are shifting by >= 64
|
;; Combine the two shifted halves. However, if we are shifting by >= 64
|
||||||
;; (modulo 128), then the hi bits are all sign bits and the lo bits are
|
;; (modulo 128), then the hi bits are all sign bits and the lo bits are
|
||||||
;; what would otherwise be our hi bits.
|
;; what would otherwise be our hi bits.
|
||||||
(with_flags_2 (test (OperandSize.Size64)
|
(with_flags (test (OperandSize.Size64) (gpr_mem_imm_new (RegMemImm.Imm 64)) amt)
|
||||||
(gpr_mem_imm_new (RegMemImm.Imm 64))
|
(consumes_flags_concat
|
||||||
amt)
|
|
||||||
(cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted_) hi_shifted)
|
(cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted_) hi_shifted)
|
||||||
(cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted) sign_bits))))
|
(cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted) sign_bits)))))
|
||||||
|
|
||||||
(rule (lower (has_type $I128 (sshr src amt)))
|
(rule (lower (has_type $I128 (sshr src amt)))
|
||||||
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
;; NB: Only the low bits of `amt` matter since we logically mask the shift
|
||||||
@@ -1468,7 +1455,7 @@
|
|||||||
(let ((x_reg Gpr (put_in_gpr x))
|
(let ((x_reg Gpr (put_in_gpr x))
|
||||||
(y_reg Gpr (put_in_gpr y))
|
(y_reg Gpr (put_in_gpr y))
|
||||||
(size OperandSize (raw_operand_size_of_type ty)))
|
(size OperandSize (raw_operand_size_of_type ty)))
|
||||||
(value_reg (with_flags_1 (cmp size (gpr_to_gpr_mem_imm x_reg) y_reg)
|
(value_reg (with_flags_reg (cmp size (gpr_to_gpr_mem_imm x_reg) y_reg)
|
||||||
(cmove ty cc (gpr_to_gpr_mem y_reg) x_reg)))))
|
(cmove ty cc (gpr_to_gpr_mem y_reg) x_reg)))))
|
||||||
|
|
||||||
(rule (lower (has_type (fits_in_64 ty) (umin x y)))
|
(rule (lower (has_type (fits_in_64 ty) (umin x y)))
|
||||||
@@ -1536,3 +1523,90 @@
|
|||||||
|
|
||||||
(rule (lower (resumable_trap code))
|
(rule (lower (resumable_trap code))
|
||||||
(safepoint (ud2 code)))
|
(safepoint (ud2 code)))
|
||||||
|
|
||||||
|
;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; CLIF `select` instructions receive a testable argument (i.e. boolean or
|
||||||
|
;; integer) that determines which of the other two arguments is selected as
|
||||||
|
;; output. Since Cranelift booleans are typically generated by a comparison, the
|
||||||
|
;; lowerings in this section "look upwards in the tree" to emit the proper
|
||||||
|
;; sequence of "selection" instructions.
|
||||||
|
;;
|
||||||
|
;; The following rules--for selecting on a floating-point comparison--emit a
|
||||||
|
;; `UCOMIS*` instruction and then a conditional move, `cmove`. Note that for
|
||||||
|
;; values contained in XMM registers, `cmove` and `cmove_or` may in fact emit a
|
||||||
|
;; jump sequence, not `CMOV`. The `cmove` instruction operates on the flags set
|
||||||
|
;; by `UCOMIS*`; the key to understanding these is the UCOMIS* documentation
|
||||||
|
;; (see Intel's Software Developer's Manual, volume 2, chapter 4):
|
||||||
|
;; - unordered assigns Z = 1, P = 1, C = 1
|
||||||
|
;; - greater than assigns Z = 0, P = 0, C = 0
|
||||||
|
;; - less than assigns Z = 0, P = 0, C = 1
|
||||||
|
;; - equal assigns Z = 1, P = 0, C = 0
|
||||||
|
;;
|
||||||
|
;; Note that prefixing the flag with `N` means "not," so that `CC.P -> P = 1`
|
||||||
|
;; and `CC.NP -> P = 0`. Also, x86 uses mnemonics for certain combinations of
|
||||||
|
;; flags; e.g.:
|
||||||
|
;; - `CC.B -> C = 1` (below)
|
||||||
|
;; - `CC.NB -> C = 0` (not below)
|
||||||
|
;; - `CC.BE -> C = 1 OR Z = 1` (below or equal)
|
||||||
|
;; - `CC.NBE -> C = 0 AND Z = 0` (not below or equal)
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Ordered) a b)) x y)))
|
||||||
|
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NP) x y)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Unordered) a b)) x y)))
|
||||||
|
(with_flags (fpcmp b a) (cmove_from_values ty (CC.P) x y)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.GreaterThan) a b)) x y)))
|
||||||
|
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NBE) x y)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.GreaterThanOrEqual) a b)) x y)))
|
||||||
|
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NB) x y)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrLessThan) a b)) x y)))
|
||||||
|
(with_flags (fpcmp b a) (cmove_from_values ty (CC.B) x y)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a b)) x y)))
|
||||||
|
(with_flags (fpcmp b a) (cmove_from_values ty (CC.BE) x y)))
|
||||||
|
|
||||||
|
;; Certain FloatCC variants are implemented by flipping the operands of the
|
||||||
|
;; comparison (e.g., "greater than" is lowered the same as "less than" but the
|
||||||
|
;; comparison is reversed). This allows us to use a single flag for the `cmove`,
|
||||||
|
;; which involves fewer instructions than `cmove_or`.
|
||||||
|
;;
|
||||||
|
;; But why flip at all, you may ask? Can't we just use `CC.B` (i.e., below) for
|
||||||
|
;; `FloatCC.LessThan`? Recall that in these floating-point lowerings, values may
|
||||||
|
;; be unordered and we must we want to express that `FloatCC.LessThan` is `LT`,
|
||||||
|
;; not `LT | UNO`. By flipping the operands AND inverting the comparison (e.g.,
|
||||||
|
;; to `CC.NBE`), we also avoid these unordered cases.
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.LessThan) a b)) x y)))
|
||||||
|
(with_flags (fpcmp a b) (cmove_from_values ty (CC.NBE) x y)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.LessThanOrEqual) a b)) x y)))
|
||||||
|
(with_flags (fpcmp a b) (cmove_from_values ty (CC.NB) x y)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrGreaterThan) a b)) x y)))
|
||||||
|
(with_flags (fpcmp a b) (cmove_from_values ty (CC.B) x y)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a b)) x y)))
|
||||||
|
(with_flags (fpcmp a b) (cmove_from_values ty (CC.BE) x y)))
|
||||||
|
|
||||||
|
;; `FloatCC.Equal` and `FloatCC.NotEqual` can only be implemented with multiple
|
||||||
|
;; flag checks. Recall from the flag assignment chart above that equality, e.g.,
|
||||||
|
;; will assign `Z = 1`. But so does an unordered comparison: `Z = 1, P = 1, C =
|
||||||
|
;; 1`. In order to avoid semantics like `EQ | UNO` for equality, we must ensure
|
||||||
|
;; that the values are actually ordered, checking that `P = 0` (note that the
|
||||||
|
;; `C` flag is irrelevant here). Since we cannot find a single instruction that
|
||||||
|
;; implements a `Z = 1 AND P = 0` check, we invert the flag checks (i.e., `Z = 1
|
||||||
|
;; AND P = 0` becomes `Z = 0 OR P = 1`) and also flip the select operands, `x`
|
||||||
|
;; and `y`. The same argument applies to `FloatCC.NotEqual`.
|
||||||
|
;;
|
||||||
|
;; More details about the CLIF semantics for `fcmp` are available at
|
||||||
|
;; https://docs.rs/cranelift-codegen/latest/cranelift_codegen/ir/trait.InstBuilder.html#method.fcmp.
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Equal) a b)) x y)))
|
||||||
|
(with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) y x)))
|
||||||
|
|
||||||
|
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.NotEqual) a b)) x y)))
|
||||||
|
(with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) x y)))
|
||||||
|
|||||||
@@ -530,6 +530,7 @@ enum FcmpSpec {
|
|||||||
/// This is useful in contexts where it is hard/inefficient to produce a single instruction (or
|
/// This is useful in contexts where it is hard/inefficient to produce a single instruction (or
|
||||||
/// sequence of instructions) that check for an "AND" combination of condition codes; see for
|
/// sequence of instructions) that check for an "AND" combination of condition codes; see for
|
||||||
/// instance lowering of Select.
|
/// instance lowering of Select.
|
||||||
|
#[allow(dead_code)]
|
||||||
InvertEqual,
|
InvertEqual,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4252,80 +4253,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
|
|
||||||
Opcode::Select => {
|
Opcode::Select => {
|
||||||
let flag_input = inputs[0];
|
let flag_input = inputs[0];
|
||||||
if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
|
if let Some(_) = matches_input(ctx, flag_input, Opcode::Fcmp) {
|
||||||
let cond_code = ctx.data(fcmp).fp_cond_code().unwrap();
|
implemented_in_isle(ctx);
|
||||||
|
|
||||||
// For equal, we flip the operands, because we can't test a conjunction of
|
|
||||||
// CPU flags with a single cmove; see InvertedEqualOrConditions doc comment.
|
|
||||||
let (lhs_input, rhs_input) = match cond_code {
|
|
||||||
FloatCC::Equal => (inputs[2], inputs[1]),
|
|
||||||
_ => (inputs[1], inputs[2]),
|
|
||||||
};
|
|
||||||
|
|
||||||
let ty = ctx.output_ty(insn, 0);
|
|
||||||
let rhs = put_input_in_regs(ctx, rhs_input);
|
|
||||||
let dst = get_output_reg(ctx, outputs[0]);
|
|
||||||
let lhs = put_input_in_regs(ctx, lhs_input);
|
|
||||||
|
|
||||||
// We request inversion of Equal to NotEqual here: taking LHS if equal would mean
|
|
||||||
// take it if both CC::NP and CC::Z are set, the conjunction of which can't be
|
|
||||||
// modeled with a single cmov instruction. Instead, we'll swap LHS and RHS in the
|
|
||||||
// select operation, and invert the equal to a not-equal here.
|
|
||||||
let fcmp_results = emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::InvertEqual);
|
|
||||||
|
|
||||||
if let FcmpCondResult::InvertedEqualOrConditions(_, _) = &fcmp_results {
|
|
||||||
// Keep this sync'd with the lowering of the select inputs above.
|
|
||||||
assert_eq!(cond_code, FloatCC::Equal);
|
|
||||||
}
|
|
||||||
|
|
||||||
emit_moves(ctx, dst, rhs, ty);
|
|
||||||
|
|
||||||
let operand_size = if ty == types::F64 {
|
|
||||||
OperandSize::Size64
|
|
||||||
} else {
|
|
||||||
OperandSize::Size32
|
|
||||||
};
|
|
||||||
match fcmp_results {
|
|
||||||
FcmpCondResult::Condition(cc) => {
|
|
||||||
if is_int_or_ref_ty(ty) || ty == types::I128 || ty == types::B128 {
|
|
||||||
let size = ty.bytes() as u8;
|
|
||||||
emit_cmoves(ctx, size, cc, lhs, dst);
|
|
||||||
} else {
|
|
||||||
ctx.emit(Inst::xmm_cmove(
|
|
||||||
operand_size,
|
|
||||||
cc,
|
|
||||||
RegMem::reg(lhs.only_reg().unwrap()),
|
|
||||||
dst.only_reg().unwrap(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FcmpCondResult::AndConditions(_, _) => {
|
|
||||||
unreachable!(
|
|
||||||
"can't AND with select; see above comment about inverting equal"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
FcmpCondResult::InvertedEqualOrConditions(cc1, cc2)
|
|
||||||
| FcmpCondResult::OrConditions(cc1, cc2) => {
|
|
||||||
if is_int_or_ref_ty(ty) || ty == types::I128 {
|
|
||||||
let size = ty.bytes() as u8;
|
|
||||||
emit_cmoves(ctx, size, cc1, lhs.clone(), dst);
|
|
||||||
emit_cmoves(ctx, size, cc2, lhs, dst);
|
|
||||||
} else {
|
|
||||||
ctx.emit(Inst::xmm_cmove(
|
|
||||||
operand_size,
|
|
||||||
cc1,
|
|
||||||
RegMem::reg(lhs.only_reg().unwrap()),
|
|
||||||
dst.only_reg().unwrap(),
|
|
||||||
));
|
|
||||||
ctx.emit(Inst::xmm_cmove(
|
|
||||||
operand_size,
|
|
||||||
cc2,
|
|
||||||
RegMem::reg(lhs.only_reg().unwrap()),
|
|
||||||
dst.only_reg().unwrap(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -6,11 +6,11 @@ use generated_code::MInst;
|
|||||||
use regalloc::Writable;
|
use regalloc::Writable;
|
||||||
|
|
||||||
// Types that the generated ISLE code uses via `use super::*`.
|
// Types that the generated ISLE code uses via `use super::*`.
|
||||||
use super::{is_mergeable_load, lower_to_amode, Reg};
|
use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, Reg};
|
||||||
use crate::{
|
use crate::{
|
||||||
ir::{
|
ir::{
|
||||||
immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode, Value, ValueLabel,
|
condcodes::FloatCC, immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode,
|
||||||
ValueList,
|
Value, ValueLabel, ValueList,
|
||||||
},
|
},
|
||||||
isa::{
|
isa::{
|
||||||
settings::Flags,
|
settings::Flags,
|
||||||
@@ -440,6 +440,32 @@ where
|
|||||||
fn imm8_to_imm8_gpr(&mut self, imm: u8) -> Imm8Gpr {
|
fn imm8_to_imm8_gpr(&mut self, imm: u8) -> Imm8Gpr {
|
||||||
Imm8Gpr::new(Imm8Reg::Imm8 { imm }).unwrap()
|
Imm8Gpr::new(Imm8Reg::Imm8 { imm }).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_gpr_type(&mut self, ty: Type) -> Option<Type> {
|
||||||
|
if is_int_or_ref_ty(ty) || ty == I128 || ty == B128 {
|
||||||
|
Some(ty)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_xmm_type(&mut self, ty: Type) -> Option<Type> {
|
||||||
|
if ty == F32 || ty == F64 || (ty.is_vector() && ty.bits() == 128) {
|
||||||
|
Some(ty)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_single_register_type(&mut self, ty: Type) -> Option<Type> {
|
||||||
|
if ty != I128 {
|
||||||
|
Some(ty)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we
|
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
src/clif.isle 9ea75a6f790b5c03
|
src/clif.isle 9ea75a6f790b5c03
|
||||||
src/prelude.isle 73285cd431346d53
|
src/prelude.isle 980b300b3ec3e338
|
||||||
src/isa/x64/inst.isle 301db31d5f1118ae
|
src/isa/x64/inst.isle ac88a0ae153ed210
|
||||||
src/isa/x64/lower.isle cdc94aec26c0bc5b
|
src/isa/x64/lower.isle 1ebdd4469355e2cf
|
||||||
|
|||||||
1695
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
generated
1695
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
generated
File diff suppressed because it is too large
Load Diff
@@ -324,47 +324,79 @@
|
|||||||
|
|
||||||
;; Newtype wrapper around `MInst` for instructions that are used for their
|
;; Newtype wrapper around `MInst` for instructions that are used for their
|
||||||
;; effect on flags.
|
;; effect on flags.
|
||||||
(type ProducesFlags (enum (ProducesFlags (inst MInst) (result Reg))))
|
;;
|
||||||
|
;; Variant determines how result is given when combined with a
|
||||||
|
;; ConsumesFlags. See `with_flags` below for more.
|
||||||
|
(type ProducesFlags (enum
|
||||||
|
(ProducesFlagsSideEffect (inst MInst))
|
||||||
|
;; Not directly combinable with a ConsumesFlags;
|
||||||
|
;; used in s390x and unwrapped directly by `trapif`.
|
||||||
|
(ProducesFlagsReturnsReg (inst MInst) (result Reg))
|
||||||
|
(ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg))))
|
||||||
|
|
||||||
;; Newtype wrapper around `MInst` for instructions that consume flags.
|
;; Newtype wrapper around `MInst` for instructions that consume flags.
|
||||||
(type ConsumesFlags (enum (ConsumesFlags (inst MInst) (result Reg))))
|
;;
|
||||||
|
;; Variant determines how result is given when combined with a
|
||||||
|
;; ProducesFlags. See `with_flags` below for more.
|
||||||
|
(type ConsumesFlags (enum
|
||||||
|
(ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg))
|
||||||
|
(ConsumesFlagsReturnsReg (inst MInst) (result Reg))
|
||||||
|
(ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst)
|
||||||
|
(inst2 MInst)
|
||||||
|
(result ValueRegs))))
|
||||||
|
|
||||||
|
|
||||||
|
;; Helper for combining two flags-consumer instructions that return a
|
||||||
|
;; single Reg, giving a ConsumesFlags that returns both values in a
|
||||||
|
;; ValueRegs.
|
||||||
|
(decl consumes_flags_concat (ConsumesFlags ConsumesFlags) ConsumesFlags)
|
||||||
|
(rule (consumes_flags_concat (ConsumesFlags.ConsumesFlagsReturnsReg inst1 reg1)
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsReg inst2 reg2))
|
||||||
|
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
|
||||||
|
inst1
|
||||||
|
inst2
|
||||||
|
(value_regs reg1 reg2)))
|
||||||
|
|
||||||
;; Combine flags-producing and -consuming instructions together, ensuring that
|
;; Combine flags-producing and -consuming instructions together, ensuring that
|
||||||
;; they are emitted back-to-back and no other instructions can be emitted
|
;; they are emitted back-to-back and no other instructions can be emitted
|
||||||
;; between them and potentially clobber the flags.
|
;; between them and potentially clobber the flags.
|
||||||
;;
|
;;
|
||||||
;; Returns a `ValueRegs` where the first register is the result of the
|
;; Returns a `ValueRegs` according to the specific combination of ProducesFlags and ConsumesFlags modes:
|
||||||
;; `ProducesFlags` instruction and the second is the result of the
|
;; - SideEffect + ReturnsReg --> ValueReg with one Reg from consumer
|
||||||
;; `ConsumesFlags` instruction.
|
;; - SideEffect + ReturnsValueRegs --> ValueReg as given from consumer
|
||||||
|
;; - ReturnsResultWithProducer + ReturnsResultWithConsumer --> ValueReg with low part from producer, high part from consumer
|
||||||
|
;;
|
||||||
|
;; See `with_flags_reg` below for a variant that extracts out just the lower Reg.
|
||||||
(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs)
|
(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs)
|
||||||
(rule (with_flags (ProducesFlags.ProducesFlags producer_inst producer_result)
|
|
||||||
(ConsumesFlags.ConsumesFlags consumer_inst consumer_result))
|
(rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result)
|
||||||
|
(ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consumer_inst consumer_result))
|
||||||
(let ((_x Unit (emit producer_inst))
|
(let ((_x Unit (emit producer_inst))
|
||||||
(_y Unit (emit consumer_inst)))
|
(_y Unit (emit consumer_inst)))
|
||||||
(value_regs producer_result consumer_result)))
|
(value_regs producer_result consumer_result)))
|
||||||
|
|
||||||
;; Like `with_flags` but returns only the result of the consumer operation.
|
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
|
||||||
(decl with_flags_1 (ProducesFlags ConsumesFlags) Reg)
|
(ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
|
||||||
(rule (with_flags_1 (ProducesFlags.ProducesFlags producer_inst _producer_result)
|
|
||||||
(ConsumesFlags.ConsumesFlags consumer_inst consumer_result))
|
|
||||||
(let ((_x Unit (emit producer_inst))
|
(let ((_x Unit (emit producer_inst))
|
||||||
(_y Unit (emit consumer_inst)))
|
(_y Unit (emit consumer_inst)))
|
||||||
consumer_result))
|
(value_reg consumer_result)))
|
||||||
|
|
||||||
;; Like `with_flags` but allows two consumers of the same flags. The result is a
|
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
|
||||||
;; `ValueRegs` containing the first consumer's result and then the second
|
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
|
||||||
;; consumer's result.
|
consumer_inst_2
|
||||||
(decl with_flags_2 (ProducesFlags ConsumesFlags ConsumesFlags) ValueRegs)
|
consumer_result))
|
||||||
(rule (with_flags_2 (ProducesFlags.ProducesFlags producer_inst _producer_result)
|
|
||||||
(ConsumesFlags.ConsumesFlags consumer_inst_1 consumer_result_1)
|
|
||||||
(ConsumesFlags.ConsumesFlags consumer_inst_2 consumer_result_2))
|
|
||||||
(let ((_x Unit (emit producer_inst))
|
(let ((_x Unit (emit producer_inst))
|
||||||
;; Note that the order of emission here is swapped, as this seems
|
;; Note that the order of emission here is swapped, as this seems
|
||||||
;; to generate better register allocation for now with fewer
|
;; to generate better register allocation for now with fewer
|
||||||
;; `mov` instructions.
|
;; `mov` instructions.
|
||||||
(_y Unit (emit consumer_inst_2))
|
(_y Unit (emit consumer_inst_2))
|
||||||
(_z Unit (emit consumer_inst_1)))
|
(_z Unit (emit consumer_inst_1)))
|
||||||
(value_regs consumer_result_1 consumer_result_2)))
|
consumer_result))
|
||||||
|
|
||||||
|
(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg)
|
||||||
|
(rule (with_flags_reg p c)
|
||||||
|
(let ((v ValueRegs (with_flags p c)))
|
||||||
|
(value_regs_get v 0)))
|
||||||
|
|
||||||
;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ block0(v0: f64, v1: i64):
|
|||||||
; Entry block: 0
|
; Entry block: 0
|
||||||
; Block 0:
|
; Block 0:
|
||||||
; (original IR block: block0)
|
; (original IR block: block0)
|
||||||
; (instruction range: 0 .. 17)
|
; (instruction range: 0 .. 16)
|
||||||
; Inst 0: pushq %rbp
|
; Inst 0: pushq %rbp
|
||||||
; Inst 1: movq %rsp, %rbp
|
; Inst 1: movq %rsp, %rbp
|
||||||
; Inst 2: movsd 0(%rdi), %xmm1
|
; Inst 2: movsd 0(%rdi), %xmm1
|
||||||
@@ -52,14 +52,12 @@ block0(v0: f64, v1: i64):
|
|||||||
; Inst 5: setz %sil
|
; Inst 5: setz %sil
|
||||||
; Inst 6: andl %edi, %esi
|
; Inst 6: andl %edi, %esi
|
||||||
; Inst 7: andq $1, %rsi
|
; Inst 7: andq $1, %rsi
|
||||||
; Inst 8: ucomisd %xmm1, %xmm0
|
; Inst 8: ucomisd %xmm0, %xmm1
|
||||||
; Inst 9: movaps %xmm0, %xmm1
|
; Inst 9: movaps %xmm0, %xmm1
|
||||||
; Inst 10: jnp $next; movsd %xmm0, %xmm1; $next:
|
; Inst 10: jz $check; movsd %xmm0, %xmm1; $check: jnp $next; movsd %xmm0, %xmm1; $next
|
||||||
; Inst 11: jz $next; movsd %xmm0, %xmm1; $next:
|
; Inst 11: movq %rsi, %rax
|
||||||
; Inst 12: movq %rsi, %rax
|
; Inst 12: movaps %xmm1, %xmm0
|
||||||
; Inst 13: movaps %xmm1, %xmm0
|
; Inst 13: movq %rbp, %rsp
|
||||||
; Inst 14: movq %rbp, %rsp
|
; Inst 14: popq %rbp
|
||||||
; Inst 15: popq %rbp
|
; Inst 15: ret
|
||||||
; Inst 16: ret
|
|
||||||
; }}
|
; }}
|
||||||
|
|
||||||
|
|||||||
80
cranelift/filetests/filetests/runtests/select.clif
Normal file
80
cranelift/filetests/filetests/runtests/select.clif
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
test interpret
|
||||||
|
test run
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
function %select_eq_f32(f32, f32) -> i32 {
|
||||||
|
block0(v0: f32, v1: f32):
|
||||||
|
v2 = fcmp eq v0, v1
|
||||||
|
v3 = iconst.i32 1
|
||||||
|
v4 = iconst.i32 0
|
||||||
|
v5 = select v2, v3, v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
; run: %select_eq_f32(0x42.42, 0x42.42) == 1
|
||||||
|
; run: %select_eq_f32(0x42.42, 0.0) == 0
|
||||||
|
; run: %select_eq_f32(0x42.42, NaN) == 0
|
||||||
|
|
||||||
|
function %select_ne_f64(f64, f64) -> i32 {
|
||||||
|
block0(v0: f64, v1: f64):
|
||||||
|
v2 = fcmp ne v0, v1
|
||||||
|
v3 = iconst.i32 1
|
||||||
|
v4 = iconst.i32 0
|
||||||
|
v5 = select v2, v3, v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
; run: %select_ne_f64(0x42.42, 0x42.42) == 0
|
||||||
|
; run: %select_ne_f64(0x42.42, 0.0) == 1
|
||||||
|
; run: %select_ne_f64(NaN, NaN) == 1
|
||||||
|
|
||||||
|
function %select_gt_f64(f64, f64) -> b1 {
|
||||||
|
block0(v0: f64, v1: f64):
|
||||||
|
v2 = fcmp gt v0, v1
|
||||||
|
v3 = bconst.b1 true
|
||||||
|
v4 = bconst.b1 false
|
||||||
|
v5 = select v2, v3, v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
; run: %select_gt_f64(0x42.42, 0.0) == true
|
||||||
|
; run: %select_gt_f64(0.0, 0.0) == false
|
||||||
|
; run: %select_gt_f64(0x0.0, 0x42.42) == false
|
||||||
|
; run: %select_gt_f64(NaN, 0x42.42) == false
|
||||||
|
|
||||||
|
function %select_ge_f64(f64, f64) -> i64 {
|
||||||
|
block0(v0: f64, v1: f64):
|
||||||
|
v2 = fcmp ge v0, v1
|
||||||
|
v3 = iconst.i64 1
|
||||||
|
v4 = iconst.i64 0
|
||||||
|
v5 = select v2, v3, v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
; run: %select_ge_f64(0x42.42, 0.0) == 1
|
||||||
|
; run: %select_ge_f64(0.0, 0.0) == 1
|
||||||
|
; run: %select_ge_f64(0x0.0, 0x42.42) == 0
|
||||||
|
; run: %select_ge_f64(0x0.0, NaN) == 0
|
||||||
|
|
||||||
|
function %select_le_f32(f32, f32) -> f32 {
|
||||||
|
block0(v0: f32, v1: f32):
|
||||||
|
v2 = fcmp le v0, v1
|
||||||
|
v3 = f32const 0x1.0
|
||||||
|
v4 = f32const 0x0.0
|
||||||
|
v5 = select v2, v3, v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
; runx: %select_le_f32(0x42.42, 0.0) == 0x0.0
|
||||||
|
; run: %select_le_f32(0.0, 0.0) == 0x1.0
|
||||||
|
; run: %select_le_f32(0x0.0, 0x42.42) == 0x1.0
|
||||||
|
; run: %select_le_f32(0x0.0, NaN) == 0x0.0
|
||||||
|
|
||||||
|
function %select_uno_f32(f32, f32) -> i8 {
|
||||||
|
block0(v0: f32, v1: f32):
|
||||||
|
v2 = fcmp uno v0, v1
|
||||||
|
v3 = iconst.i8 1
|
||||||
|
v4 = iconst.i8 0
|
||||||
|
v5 = select v2, v3, v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
; run: %select_uno_f32(0x42.42, 0.0) == 0
|
||||||
|
; run: %select_uno_f32(0.0, 0.0) == 0
|
||||||
|
; run: %select_uno_f32(0x0.0, 0x42.42) == 0
|
||||||
|
; run: %select_uno_f32(0x0.0, NaN) == 1
|
||||||
|
; run: %select_uno_f32(-NaN, 0x42.42) == 1
|
||||||
@@ -460,7 +460,7 @@ impl<'a> Codegen<'a> {
|
|||||||
};
|
};
|
||||||
let valuename = self.value_binder(&value, /* is_ref = */ true, ty);
|
let valuename = self.value_binder(&value, /* is_ref = */ true, ty);
|
||||||
let fieldname = &self.typeenv.syms[field.name.index()];
|
let fieldname = &self.typeenv.syms[field.name.index()];
|
||||||
self.define_val(&value, ctx, /* is_ref = */ false, field.ty);
|
self.define_val(&value, ctx, /* is_ref = */ true, field.ty);
|
||||||
format!("{}: {}", fieldname, valuename)
|
format!("{}: {}", fieldname, valuename)
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
|
|||||||
Reference in New Issue
Block a user