x64: port select to ISLE (#3682)

* x64: port `select` using an FP comparison to ISLE

This change includes quite a few interlocking parts, required mainly by
the current x64 conventions in ISLE:
 - it adds a way to emit a `cmove` with multiple OR-ing conditions;
   because x64 ISLE cannot currently safely emit a comparison followed
   by several jumps, this adds `MachInst::CmoveOr` and
   `MachInst::XmmCmoveOr` macro instructions. Unfortunately, these macro
   instructions hide the multi-instruction sequence in `lower.isle`
 - to properly keep track of what instructions consume and produce
   flags, @cfallin added a way to pass around variants of
   `ConsumesFlags` and `ProducesFlags`--these changes affect all
   backends
 - then, to lower the `fcmp + select` CLIF, this change adds several
   `cmove*_from_values` helpers that perform all of the awkward
   conversions between `Value`, `ValueReg`, `Reg`, and `Gpr/Xmm`; one
   upside is that now these lowerings have much-improved documentation
   explaining why the various `FloatCC` and `CC` choices are made the
   the way they are.

Co-authored-by: Chris Fallin <chris@cfallin.org>
This commit is contained in:
Andrew Brown
2022-02-23 10:03:16 -08:00
committed by GitHub
parent 5a5e401a9c
commit f87c61176a
20 changed files with 3163 additions and 2272 deletions

View File

@@ -1461,36 +1461,41 @@
(writable_reg_to_reg dst))) (writable_reg_to_reg dst)))
;; Helper for emitting `adds` instructions. ;; Helper for emitting `adds` instructions.
(decl add_with_flags (Type Reg Reg) ProducesFlags) (decl add_with_flags_paired (Type Reg Reg) ProducesFlags)
(rule (add_with_flags ty src1 src2) (rule (add_with_flags_paired ty src1 src2)
(let ((dst WritableReg (temp_writable_reg $I64))) (let ((dst WritableReg (temp_writable_reg $I64)))
(ProducesFlags.ProducesFlags (MInst.AluRRR (ALUOp.AddS) (operand_size ty) dst src1 src2) (ProducesFlags.ProducesFlagsReturnsResultWithConsumer
(MInst.AluRRR (ALUOp.AddS) (operand_size ty) dst src1 src2)
(writable_reg_to_reg dst)))) (writable_reg_to_reg dst))))
;; Helper for emitting `adc` instructions. ;; Helper for emitting `adc` instructions.
(decl adc (Type Reg Reg) ConsumesFlags) (decl adc_paired (Type Reg Reg) ConsumesFlags)
(rule (adc ty src1 src2) (rule (adc_paired ty src1 src2)
(let ((dst WritableReg (temp_writable_reg $I64))) (let ((dst WritableReg (temp_writable_reg $I64)))
(ConsumesFlags.ConsumesFlags (MInst.AluRRR (ALUOp.Adc) (operand_size ty) dst src1 src2) (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer
(MInst.AluRRR (ALUOp.Adc) (operand_size ty) dst src1 src2)
(writable_reg_to_reg dst)))) (writable_reg_to_reg dst))))
;; Helper for emitting `subs` instructions. ;; Helper for emitting `subs` instructions.
(decl sub_with_flags (Type Reg Reg) ProducesFlags) (decl sub_with_flags_paired (Type Reg Reg) ProducesFlags)
(rule (sub_with_flags ty src1 src2) (rule (sub_with_flags_paired ty src1 src2)
(let ((dst WritableReg (temp_writable_reg $I64))) (let ((dst WritableReg (temp_writable_reg $I64)))
(ProducesFlags.ProducesFlags (MInst.AluRRR (ALUOp.SubS) (operand_size ty) dst src1 src2) (ProducesFlags.ProducesFlagsReturnsResultWithConsumer
(MInst.AluRRR (ALUOp.SubS) (operand_size ty) dst src1 src2)
(writable_reg_to_reg dst)))) (writable_reg_to_reg dst))))
(decl cmp64_imm (Reg Imm12) ProducesFlags) (decl cmp64_imm (Reg Imm12) ProducesFlags)
(rule (cmp64_imm src1 src2) (rule (cmp64_imm src1 src2)
(ProducesFlags.ProducesFlags (MInst.AluRRImm12 (ALUOp.SubS) (OperandSize.Size64) (writable_zero_reg) src1 src2) (ProducesFlags.ProducesFlagsSideEffect
(zero_reg))) (MInst.AluRRImm12 (ALUOp.SubS) (OperandSize.Size64) (writable_zero_reg)
src1 src2)))
;; Helper for emitting `sbc` instructions. ;; Helper for emitting `sbc` instructions.
(decl sbc (Type Reg Reg) ConsumesFlags) (decl sbc_paired (Type Reg Reg) ConsumesFlags)
(rule (sbc ty src1 src2) (rule (sbc_paired ty src1 src2)
(let ((dst WritableReg (temp_writable_reg $I64))) (let ((dst WritableReg (temp_writable_reg $I64)))
(ConsumesFlags.ConsumesFlags (MInst.AluRRR (ALUOp.Sbc) (operand_size ty) dst src1 src2) (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer
(MInst.AluRRR (ALUOp.Sbc) (operand_size ty) dst src1 src2)
(writable_reg_to_reg dst)))) (writable_reg_to_reg dst))))
;; Helper for emitting `MInst.VecMisc` instructions. ;; Helper for emitting `MInst.VecMisc` instructions.
@@ -1581,12 +1586,12 @@
;; which must be paired with `with_flags*` helpers. ;; which must be paired with `with_flags*` helpers.
(decl tst_imm (Type Reg ImmLogic) ProducesFlags) (decl tst_imm (Type Reg ImmLogic) ProducesFlags)
(rule (tst_imm ty reg imm) (rule (tst_imm ty reg imm)
(ProducesFlags.ProducesFlags (MInst.AluRRImmLogic (ALUOp.AndS) (ProducesFlags.ProducesFlagsSideEffect
(MInst.AluRRImmLogic (ALUOp.AndS)
(operand_size ty) (operand_size ty)
(writable_zero_reg) (writable_zero_reg)
reg reg
imm) imm)))
(invalid_reg)))
;; Helper for generating a `CSel` instruction. ;; Helper for generating a `CSel` instruction.
;; ;;
@@ -1596,7 +1601,8 @@
(decl csel (Cond Reg Reg) ConsumesFlags) (decl csel (Cond Reg Reg) ConsumesFlags)
(rule (csel cond if_true if_false) (rule (csel cond if_true if_false)
(let ((dst WritableReg (temp_writable_reg $I64))) (let ((dst WritableReg (temp_writable_reg $I64)))
(ConsumesFlags.ConsumesFlags (MInst.CSel dst cond if_true if_false) (ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.CSel dst cond if_true if_false)
(writable_reg_to_reg dst)))) (writable_reg_to_reg dst))))
;; Helpers for generating `add` instructions. ;; Helpers for generating `add` instructions.

View File

@@ -91,8 +91,8 @@
;; the actual addition is `adds` followed by `adc` which comprises the ;; the actual addition is `adds` followed by `adc` which comprises the
;; low/high bits of the result ;; low/high bits of the result
(with_flags (with_flags
(add_with_flags $I64 x_lo y_lo) (add_with_flags_paired $I64 x_lo y_lo)
(adc $I64 x_hi y_hi)))) (adc_paired $I64 x_hi y_hi))))
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -142,8 +142,8 @@
;; the actual subtraction is `subs` followed by `sbc` which comprises ;; the actual subtraction is `subs` followed by `sbc` which comprises
;; the low/high bits of the result ;; the low/high bits of the result
(with_flags (with_flags
(sub_with_flags $I64 x_lo y_lo) (sub_with_flags_paired $I64 x_lo y_lo)
(sbc $I64 x_hi y_hi)))) (sbc_paired $I64 x_hi y_hi))))
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -708,10 +708,11 @@
inv_amt)) inv_amt))
(maybe_hi Reg (orr $I64 hi_lshift lo_rshift)) (maybe_hi Reg (orr $I64 hi_lshift lo_rshift))
) )
(with_flags_2 (with_flags
(tst_imm $I64 amt (u64_into_imm_logic $I64 64)) (tst_imm $I64 amt (u64_into_imm_logic $I64 64))
(consumes_flags_concat
(csel (Cond.Ne) (zero_reg) lo_lshift) (csel (Cond.Ne) (zero_reg) lo_lshift)
(csel (Cond.Ne) lo_lshift maybe_hi)))) (csel (Cond.Ne) lo_lshift maybe_hi)))))
;; Shift for vector types. ;; Shift for vector types.
(rule (lower (has_type (vec128 ty) (ishl x y))) (rule (lower (has_type (vec128 ty) (ishl x y)))
@@ -805,10 +806,11 @@
inv_amt)) inv_amt))
(maybe_lo Reg (orr $I64 lo_rshift hi_lshift)) (maybe_lo Reg (orr $I64 lo_rshift hi_lshift))
) )
(with_flags_2 (with_flags
(tst_imm $I64 amt (u64_into_imm_logic $I64 64)) (tst_imm $I64 amt (u64_into_imm_logic $I64 64))
(consumes_flags_concat
(csel (Cond.Ne) hi_rshift maybe_lo) (csel (Cond.Ne) hi_rshift maybe_lo)
(csel (Cond.Ne) (zero_reg) hi_rshift)))) (csel (Cond.Ne) (zero_reg) hi_rshift)))))
;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -858,10 +860,11 @@
(hi_sign Reg (asr_imm $I64 src_hi (imm_shift_from_u8 63))) (hi_sign Reg (asr_imm $I64 src_hi (imm_shift_from_u8 63)))
(maybe_lo Reg (orr $I64 lo_rshift hi_lshift)) (maybe_lo Reg (orr $I64 lo_rshift hi_lshift))
) )
(with_flags_2 (with_flags
(tst_imm $I64 amt (u64_into_imm_logic $I64 64)) (tst_imm $I64 amt (u64_into_imm_logic $I64 64))
(consumes_flags_concat
(csel (Cond.Ne) hi_rshift maybe_lo) (csel (Cond.Ne) hi_rshift maybe_lo)
(csel (Cond.Ne) hi_sign hi_rshift)))) (csel (Cond.Ne) hi_sign hi_rshift)))))
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1123,7 +1126,7 @@
(sign_eq_eon Reg (eon $I64 hi lo)) (sign_eq_eon Reg (eon $I64 hi lo))
(sign_eq Reg (lsr_imm $I64 sign_eq_eon (imm_shift_from_u8 63))) (sign_eq Reg (lsr_imm $I64 sign_eq_eon (imm_shift_from_u8 63)))
(lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq)) (lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
(maybe_lo Reg (with_flags_1 (maybe_lo Reg (with_flags_reg
(cmp64_imm hi_cls (u8_into_imm12 63)) (cmp64_imm hi_cls (u8_into_imm12 63))
(csel (Cond.Eq) lo_sign_bits (zero_reg)) (csel (Cond.Eq) lo_sign_bits (zero_reg))
)) ))

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03 src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 73285cd431346d53 src/prelude.isle 980b300b3ec3e338
src/isa/aarch64/inst.isle 4c176462894836e5 src/isa/aarch64/inst.isle a7f3572a5cf2f201
src/isa/aarch64/lower.isle aff657984bf30686 src/isa/aarch64/lower.isle 534c135b5f535f33

File diff suppressed because it is too large Load Diff

View File

@@ -1427,9 +1427,8 @@
;; Helper for emitting `MInst.RxSBGTest` instructions. ;; Helper for emitting `MInst.RxSBGTest` instructions.
(decl rxsbg_test (RxSBGOp Reg Reg u8 u8 i8) ProducesFlags) (decl rxsbg_test (RxSBGOp Reg Reg u8 u8 i8) ProducesFlags)
(rule (rxsbg_test op src1 src2 start_bit end_bit rotate_amt) (rule (rxsbg_test op src1 src2 start_bit end_bit rotate_amt)
(ProducesFlags.ProducesFlags (MInst.RxSBGTest op src1 src2 (ProducesFlags.ProducesFlagsSideEffect
start_bit end_bit rotate_amt) (MInst.RxSBGTest op src1 src2 start_bit end_bit rotate_amt)))
(invalid_reg)))
;; Helper for emitting `MInst.UnaryRR` instructions. ;; Helper for emitting `MInst.UnaryRR` instructions.
(decl unary_rr (Type UnaryOp Reg) Reg) (decl unary_rr (Type UnaryOp Reg) Reg)
@@ -1441,32 +1440,27 @@
;; Helper for emitting `MInst.CmpRR` instructions. ;; Helper for emitting `MInst.CmpRR` instructions.
(decl cmp_rr (CmpOp Reg Reg) ProducesFlags) (decl cmp_rr (CmpOp Reg Reg) ProducesFlags)
(rule (cmp_rr op src1 src2) (rule (cmp_rr op src1 src2)
(ProducesFlags.ProducesFlags (MInst.CmpRR op src1 src2) (ProducesFlags.ProducesFlagsSideEffect (MInst.CmpRR op src1 src2)))
(invalid_reg)))
;; Helper for emitting `MInst.CmpRX` instructions. ;; Helper for emitting `MInst.CmpRX` instructions.
(decl cmp_rx (CmpOp Reg MemArg) ProducesFlags) (decl cmp_rx (CmpOp Reg MemArg) ProducesFlags)
(rule (cmp_rx op src mem) (rule (cmp_rx op src mem)
(ProducesFlags.ProducesFlags (MInst.CmpRX op src mem) (ProducesFlags.ProducesFlagsSideEffect (MInst.CmpRX op src mem)))
(invalid_reg)))
;; Helper for emitting `MInst.CmpRSImm16` instructions. ;; Helper for emitting `MInst.CmpRSImm16` instructions.
(decl cmp_rsimm16 (CmpOp Reg i16) ProducesFlags) (decl cmp_rsimm16 (CmpOp Reg i16) ProducesFlags)
(rule (cmp_rsimm16 op src imm) (rule (cmp_rsimm16 op src imm)
(ProducesFlags.ProducesFlags (MInst.CmpRSImm16 op src imm) (ProducesFlags.ProducesFlagsSideEffect (MInst.CmpRSImm16 op src imm)))
(invalid_reg)))
;; Helper for emitting `MInst.CmpRSImm32` instructions. ;; Helper for emitting `MInst.CmpRSImm32` instructions.
(decl cmp_rsimm32 (CmpOp Reg i32) ProducesFlags) (decl cmp_rsimm32 (CmpOp Reg i32) ProducesFlags)
(rule (cmp_rsimm32 op src imm) (rule (cmp_rsimm32 op src imm)
(ProducesFlags.ProducesFlags (MInst.CmpRSImm32 op src imm) (ProducesFlags.ProducesFlagsSideEffect (MInst.CmpRSImm32 op src imm)))
(invalid_reg)))
;; Helper for emitting `MInst.CmpRUImm32` instructions. ;; Helper for emitting `MInst.CmpRUImm32` instructions.
(decl cmp_ruimm32 (CmpOp Reg u32) ProducesFlags) (decl cmp_ruimm32 (CmpOp Reg u32) ProducesFlags)
(rule (cmp_ruimm32 op src imm) (rule (cmp_ruimm32 op src imm)
(ProducesFlags.ProducesFlags (MInst.CmpRUImm32 op src imm) (ProducesFlags.ProducesFlagsSideEffect (MInst.CmpRUImm32 op src imm)))
(invalid_reg)))
;; Helper for emitting `MInst.AtomicRmw` instructions. ;; Helper for emitting `MInst.AtomicRmw` instructions.
(decl atomic_rmw_impl (Type ALUOp Reg MemArg) Reg) (decl atomic_rmw_impl (Type ALUOp Reg MemArg) Reg)
@@ -1615,20 +1609,18 @@
;; Helper for emitting `MInst.FpuCmp32` instructions. ;; Helper for emitting `MInst.FpuCmp32` instructions.
(decl fpu_cmp32 (Reg Reg) ProducesFlags) (decl fpu_cmp32 (Reg Reg) ProducesFlags)
(rule (fpu_cmp32 src1 src2) (rule (fpu_cmp32 src1 src2)
(ProducesFlags.ProducesFlags (MInst.FpuCmp32 src1 src2) (ProducesFlags.ProducesFlagsSideEffect (MInst.FpuCmp32 src1 src2)))
(invalid_reg)))
;; Helper for emitting `MInst.FpuCmp64` instructions. ;; Helper for emitting `MInst.FpuCmp64` instructions.
(decl fpu_cmp64 (Reg Reg) ProducesFlags) (decl fpu_cmp64 (Reg Reg) ProducesFlags)
(rule (fpu_cmp64 src1 src2) (rule (fpu_cmp64 src1 src2)
(ProducesFlags.ProducesFlags (MInst.FpuCmp64 src1 src2) (ProducesFlags.ProducesFlagsSideEffect (MInst.FpuCmp64 src1 src2)))
(invalid_reg)))
;; Helper for emitting `MInst.FpuToInt` instructions. ;; Helper for emitting `MInst.FpuToInt` instructions.
(decl fpu_to_int (Type FpuToIntOp Reg) ProducesFlags) (decl fpu_to_int (Type FpuToIntOp Reg) ProducesFlags)
(rule (fpu_to_int ty op src) (rule (fpu_to_int ty op src)
(let ((dst WritableReg (temp_writable_reg ty))) (let ((dst WritableReg (temp_writable_reg ty)))
(ProducesFlags.ProducesFlags (MInst.FpuToInt op dst src) (ProducesFlags.ProducesFlagsReturnsReg (MInst.FpuToInt op dst src)
(writable_reg_to_reg dst)))) (writable_reg_to_reg dst))))
;; Helper for emitting `MInst.IntToFpu` instructions. ;; Helper for emitting `MInst.IntToFpu` instructions.
@@ -1751,7 +1743,7 @@
;; Emit a `ProducesFlags` instruction when the flags are not actually needed. ;; Emit a `ProducesFlags` instruction when the flags are not actually needed.
(decl drop_flags (ProducesFlags) Reg) (decl drop_flags (ProducesFlags) Reg)
(rule (drop_flags (ProducesFlags.ProducesFlags inst result)) (rule (drop_flags (ProducesFlags.ProducesFlagsReturnsReg inst result))
(let ((_ Unit (emit inst))) (let ((_ Unit (emit inst)))
result)) result))
@@ -1834,10 +1826,10 @@
;; Push instructions to break out of the loop if condition is met. ;; Push instructions to break out of the loop if condition is met.
(decl push_break_if (VecMInstBuilder ProducesFlags Cond) Reg) (decl push_break_if (VecMInstBuilder ProducesFlags Cond) Reg)
(rule (push_break_if ib (ProducesFlags.ProducesFlags inst result) cond) (rule (push_break_if ib (ProducesFlags.ProducesFlagsSideEffect inst) cond)
(let ((_1 Unit (inst_builder_push ib inst)) (let ((_1 Unit (inst_builder_push ib inst))
(_2 Unit (inst_builder_push ib (MInst.CondBreak cond)))) (_2 Unit (inst_builder_push ib (MInst.CondBreak cond))))
result)) (invalid_reg)))
;; Emit a `MInst.Loop` instruction holding a loop body instruction sequence. ;; Emit a `MInst.Loop` instruction holding a loop body instruction sequence.
(decl emit_loop (VecMInstBuilder Cond) Unit) (decl emit_loop (VecMInstBuilder Cond) Unit)
@@ -2215,10 +2207,10 @@
;; Conditionally move immediate value into destination register. (Non-SSA form.) ;; Conditionally move immediate value into destination register. (Non-SSA form.)
(decl emit_cmov_imm (Type WritableReg Cond i16) ConsumesFlags) (decl emit_cmov_imm (Type WritableReg Cond i16) ConsumesFlags)
(rule (emit_cmov_imm (gpr32_ty _ty) dst cond imm) (rule (emit_cmov_imm (gpr32_ty _ty) dst cond imm)
(ConsumesFlags.ConsumesFlags (MInst.CMov32SImm16 dst cond imm) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov32SImm16 dst cond imm)
(writable_reg_to_reg dst))) (writable_reg_to_reg dst)))
(rule (emit_cmov_imm (gpr64_ty _ty) dst cond imm) (rule (emit_cmov_imm (gpr64_ty _ty) dst cond imm)
(ConsumesFlags.ConsumesFlags (MInst.CMov64SImm16 dst cond imm) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov64SImm16 dst cond imm)
(writable_reg_to_reg dst))) (writable_reg_to_reg dst)))
;; Conditionally select between immediate and source register. ;; Conditionally select between immediate and source register.
@@ -2233,7 +2225,7 @@
(rule (cmov_imm_regpair_lo ty producer cond imm src) (rule (cmov_imm_regpair_lo ty producer cond imm src)
(let ((dst WritableRegPair (copy_writable_regpair src)) (let ((dst WritableRegPair (copy_writable_regpair src))
(consumer ConsumesFlags (emit_cmov_imm ty (writable_regpair_lo dst) cond imm)) (consumer ConsumesFlags (emit_cmov_imm ty (writable_regpair_lo dst) cond imm))
(_ Reg (with_flags_1 producer consumer))) (_ Reg (with_flags_reg producer consumer)))
(writable_regpair_to_regpair dst))) (writable_regpair_to_regpair dst)))
;; Conditionally modify the high word of a register pair. ;; Conditionally modify the high word of a register pair.
@@ -2242,22 +2234,22 @@
(rule (cmov_imm_regpair_hi ty producer cond imm src) (rule (cmov_imm_regpair_hi ty producer cond imm src)
(let ((dst WritableRegPair (copy_writable_regpair src)) (let ((dst WritableRegPair (copy_writable_regpair src))
(consumer ConsumesFlags (emit_cmov_imm ty (writable_regpair_hi dst) cond imm)) (consumer ConsumesFlags (emit_cmov_imm ty (writable_regpair_hi dst) cond imm))
(_ Reg (with_flags_1 producer consumer))) (_ Reg (with_flags_reg producer consumer)))
(writable_regpair_to_regpair dst))) (writable_regpair_to_regpair dst)))
;; Conditionally select between two source registers. (Non-SSA form.) ;; Conditionally select between two source registers. (Non-SSA form.)
(decl emit_cmov_reg (Type WritableReg Cond Reg) ConsumesFlags) (decl emit_cmov_reg (Type WritableReg Cond Reg) ConsumesFlags)
(rule (emit_cmov_reg (gpr32_ty _ty) dst cond src) (rule (emit_cmov_reg (gpr32_ty _ty) dst cond src)
(ConsumesFlags.ConsumesFlags (MInst.CMov32 dst cond src) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov32 dst cond src)
(writable_reg_to_reg dst))) (writable_reg_to_reg dst)))
(rule (emit_cmov_reg (gpr64_ty _ty) dst cond src) (rule (emit_cmov_reg (gpr64_ty _ty) dst cond src)
(ConsumesFlags.ConsumesFlags (MInst.CMov64 dst cond src) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov64 dst cond src)
(writable_reg_to_reg dst))) (writable_reg_to_reg dst)))
(rule (emit_cmov_reg $F32 dst cond src) (rule (emit_cmov_reg $F32 dst cond src)
(ConsumesFlags.ConsumesFlags (MInst.FpuCMov32 dst cond src) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.FpuCMov32 dst cond src)
(writable_reg_to_reg dst))) (writable_reg_to_reg dst)))
(rule (emit_cmov_reg $F64 dst cond src) (rule (emit_cmov_reg $F64 dst cond src)
(ConsumesFlags.ConsumesFlags (MInst.FpuCMov64 dst cond src) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.FpuCMov64 dst cond src)
(writable_reg_to_reg dst))) (writable_reg_to_reg dst)))
;; Conditionally select between two source registers. ;; Conditionally select between two source registers.
@@ -2270,10 +2262,14 @@
;; Helpers for generating conditional traps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Helpers for generating conditional traps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl trap_if (ProducesFlags Cond TrapCode) Reg) (decl trap_if (ProducesFlags Cond TrapCode) Reg)
(rule (trap_if (ProducesFlags.ProducesFlags inst result) cond trap_code) (rule (trap_if (ProducesFlags.ProducesFlagsReturnsReg inst result) cond trap_code)
(let ((_1 Unit (emit inst)) (let ((_1 Unit (emit inst))
(_2 Unit (emit (MInst.TrapIf cond trap_code)))) (_2 Unit (emit (MInst.TrapIf cond trap_code))))
result)) result))
(rule (trap_if (ProducesFlags.ProducesFlagsSideEffect inst) cond trap_code)
(let ((_1 Unit (emit inst))
(_2 Unit (emit (MInst.TrapIf cond trap_code))))
(invalid_reg)))
(decl icmps_reg_and_trap (Type Reg Reg Cond TrapCode) Reg) (decl icmps_reg_and_trap (Type Reg Reg Cond TrapCode) Reg)
(rule (icmps_reg_and_trap ty src1 src2 cond trap_code) (rule (icmps_reg_and_trap ty src1 src2 cond trap_code)
@@ -2332,9 +2328,9 @@
;; instruction in between the producer and consumer. (This use is only valid ;; instruction in between the producer and consumer. (This use is only valid
;; if that unrelated instruction does not modify the condition code.) ;; if that unrelated instruction does not modify the condition code.)
(decl emit_producer (ProducesFlags) Unit) (decl emit_producer (ProducesFlags) Unit)
(rule (emit_producer (ProducesFlags.ProducesFlags insn _)) (emit insn)) (rule (emit_producer (ProducesFlags.ProducesFlagsSideEffect insn)) (emit insn))
(decl emit_consumer (ConsumesFlags) Unit) (decl emit_consumer (ConsumesFlags) Unit)
(rule (emit_consumer (ConsumesFlags.ConsumesFlags insn _)) (emit insn)) (rule (emit_consumer (ConsumesFlags.ConsumesFlagsReturnsReg insn _)) (emit insn))
;; Use a boolean condition to select between two registers. ;; Use a boolean condition to select between two registers.
(decl select_bool_reg (Type ProducesBool Reg Reg) Reg) (decl select_bool_reg (Type ProducesBool Reg Reg) Reg)

View File

@@ -1102,7 +1102,7 @@
;; result expected by Cranelift semantics. The only exception ;; result expected by Cranelift semantics. The only exception
;; it the case where the input was a NaN. We explicitly check ;; it the case where the input was a NaN. We explicitly check
;; for that and force the output to 0 in that case. ;; for that and force the output to 0 in that case.
(sat Reg (with_flags_1 (fcmp_reg src_ty src src) (sat Reg (with_flags_reg (fcmp_reg src_ty src src)
(cmov_imm dst_ty (cmov_imm dst_ty
(floatcc_as_cond (FloatCC.Unordered)) 0 dst)))) (floatcc_as_cond (FloatCC.Unordered)) 0 dst))))
(value_reg sat))) (value_reg sat)))
@@ -1119,7 +1119,7 @@
;; result expected by Cranelift semantics. The only exception ;; result expected by Cranelift semantics. The only exception
;; it the case where the input was a NaN. We explicitly check ;; it the case where the input was a NaN. We explicitly check
;; for that and force the output to 0 in that case. ;; for that and force the output to 0 in that case.
(sat Reg (with_flags_1 (fcmp_reg src_ty src src) (sat Reg (with_flags_reg (fcmp_reg src_ty src src)
(cmov_imm dst_ty (cmov_imm dst_ty
(floatcc_as_cond (FloatCC.Unordered)) 0 dst)))) (floatcc_as_cond (FloatCC.Unordered)) 0 dst))))
(value_reg sat))) (value_reg sat)))

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03 src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 73285cd431346d53 src/prelude.isle 980b300b3ec3e338
src/isa/s390x/inst.isle 87a2d7c0c69d0324 src/isa/s390x/inst.isle b0f53fcf0cdadde1
src/isa/s390x/lower.isle 3c124e26bc411983 src/isa/s390x/lower.isle 59264a7442cf6e1c

File diff suppressed because it is too large Load Diff

View File

@@ -149,15 +149,41 @@
(Setcc (cc CC) (Setcc (cc CC)
(dst WritableGpr)) (dst WritableGpr))
;; Integer conditional move. ;; =========================================
;; ;; Conditional moves.
;; Overwrites the destination register.
;; GPR conditional move; overwrites the destination register.
(Cmove (size OperandSize) (Cmove (size OperandSize)
(cc CC) (cc CC)
(consequent GprMem) (consequent GprMem)
(alternative Gpr) (alternative Gpr)
(dst WritableGpr)) (dst WritableGpr))
;; GPR conditional move with the `OR` of two conditions; overwrites
;; the destination register.
(CmoveOr (size OperandSize)
(cc1 CC)
(cc2 CC)
(consequent GprMem)
(alternative Gpr)
(dst WritableGpr))
;; XMM conditional move; overwrites the destination register.
(XmmCmove (size OperandSize)
(cc CC)
(consequent XmmMem)
(alternative Xmm)
(dst WritableXmm))
;; XMM conditional move with the `OR` of two conditions; overwrites
;; the destination register.
(XmmCmoveOr (size OperandSize)
(cc1 CC)
(cc2 CC)
(consequent XmmMem)
(alternative Xmm)
(dst WritableXmm))
;; ========================================= ;; =========================================
;; Stack manipulation. ;; Stack manipulation.
@@ -275,14 +301,6 @@
(lhs Xmm) (lhs Xmm)
(rhs_dst WritableXmm)) (rhs_dst WritableXmm))
;; XMM (scalar) conditional move.
;;
;; Overwrites the destination register if cc is set.
(XmmCmove (size OperandSize)
(cc CC)
(src XmmMem)
(dst WritableXmm))
;; Float comparisons/tests: cmp (b w l q) (reg addr imm) reg. ;; Float comparisons/tests: cmp (b w l q) (reg addr imm) reg.
(XmmCmpRmR (op SseOpcode) (XmmCmpRmR (op SseOpcode)
(src XmmMem) (src XmmMem)
@@ -1027,6 +1045,17 @@
(decl xmm0 () WritableXmm) (decl xmm0 () WritableXmm)
(extern constructor xmm0 xmm0) (extern constructor xmm0 xmm0)
;;;; Helpers for determining the register class of a value type ;;;;;;;;;;;;;;;;
(decl is_xmm_type (Type) Type)
(extern extractor is_xmm_type is_xmm_type)
(decl is_gpr_type (Type) Type)
(extern extractor is_gpr_type is_gpr_type)
(decl is_single_register_type (Type) Type)
(extern extractor is_single_register_type is_single_register_type)
;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl avx512vl_enabled () Type) (decl avx512vl_enabled () Type)
@@ -1256,10 +1285,11 @@
src2)) src2))
;; Helper for creating `add` instructions whose flags are also used. ;; Helper for creating `add` instructions whose flags are also used.
(decl add_with_flags (Type Gpr GprMemImm) ProducesFlags) (decl add_with_flags_paired (Type Gpr GprMemImm) ProducesFlags)
(rule (add_with_flags ty src1 src2) (rule (add_with_flags_paired ty src1 src2)
(let ((dst WritableGpr (temp_writable_gpr))) (let ((dst WritableGpr (temp_writable_gpr)))
(ProducesFlags.ProducesFlags (MInst.AluRmiR (operand_size_of_type_32_64 ty) (ProducesFlags.ProducesFlagsReturnsResultWithConsumer
(MInst.AluRmiR (operand_size_of_type_32_64 ty)
(AluRmiROpcode.Add) (AluRmiROpcode.Add)
src1 src1
src2 src2
@@ -1267,10 +1297,11 @@
(gpr_to_reg (writable_gpr_to_gpr dst))))) (gpr_to_reg (writable_gpr_to_gpr dst)))))
;; Helper for creating `adc` instructions. ;; Helper for creating `adc` instructions.
(decl adc (Type Gpr GprMemImm) ConsumesFlags) (decl adc_paired (Type Gpr GprMemImm) ConsumesFlags)
(rule (adc ty src1 src2) (rule (adc_paired ty src1 src2)
(let ((dst WritableGpr (temp_writable_gpr))) (let ((dst WritableGpr (temp_writable_gpr)))
(ConsumesFlags.ConsumesFlags (MInst.AluRmiR (operand_size_of_type_32_64 ty) (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer
(MInst.AluRmiR (operand_size_of_type_32_64 ty)
(AluRmiROpcode.Adc) (AluRmiROpcode.Adc)
src1 src1
src2 src2
@@ -1286,10 +1317,11 @@
src2)) src2))
;; Helper for creating `sub` instructions whose flags are also used. ;; Helper for creating `sub` instructions whose flags are also used.
(decl sub_with_flags (Type Gpr GprMemImm) ProducesFlags) (decl sub_with_flags_paired (Type Gpr GprMemImm) ProducesFlags)
(rule (sub_with_flags ty src1 src2) (rule (sub_with_flags_paired ty src1 src2)
(let ((dst WritableGpr (temp_writable_gpr))) (let ((dst WritableGpr (temp_writable_gpr)))
(ProducesFlags.ProducesFlags (MInst.AluRmiR (operand_size_of_type_32_64 ty) (ProducesFlags.ProducesFlagsReturnsResultWithConsumer
(MInst.AluRmiR (operand_size_of_type_32_64 ty)
(AluRmiROpcode.Sub) (AluRmiROpcode.Sub)
src1 src1
src2 src2
@@ -1297,10 +1329,11 @@
(gpr_to_reg (writable_gpr_to_gpr dst))))) (gpr_to_reg (writable_gpr_to_gpr dst)))))
;; Helper for creating `sbb` instructions. ;; Helper for creating `sbb` instructions.
(decl sbb (Type Gpr GprMemImm) ConsumesFlags) (decl sbb_paired (Type Gpr GprMemImm) ConsumesFlags)
(rule (sbb ty src1 src2) (rule (sbb_paired ty src1 src2)
(let ((dst WritableGpr (temp_writable_gpr))) (let ((dst WritableGpr (temp_writable_gpr)))
(ConsumesFlags.ConsumesFlags (MInst.AluRmiR (operand_size_of_type_32_64 ty) (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer
(MInst.AluRmiR (operand_size_of_type_32_64 ty)
(AluRmiROpcode.Sbb) (AluRmiROpcode.Sbb)
src1 src1
src2 src2
@@ -1456,30 +1489,129 @@
;; Helper for creating `MInst.CmpRmiR` instructions. ;; Helper for creating `MInst.CmpRmiR` instructions.
(decl cmp_rmi_r (OperandSize CmpOpcode GprMemImm Gpr) ProducesFlags) (decl cmp_rmi_r (OperandSize CmpOpcode GprMemImm Gpr) ProducesFlags)
(rule (cmp_rmi_r size opcode src1 src2) (rule (cmp_rmi_r size opcode src1 src2)
(ProducesFlags.ProducesFlags (MInst.CmpRmiR size (ProducesFlags.ProducesFlagsSideEffect
(MInst.CmpRmiR size
opcode opcode
src1 src1
src2) src2)))
(invalid_reg)))
;; Helper for creating `cmp` instructions. ;; Helper for creating `cmp` instructions.
(decl cmp (OperandSize GprMemImm Gpr) ProducesFlags) (decl cmp (OperandSize GprMemImm Gpr) ProducesFlags)
(rule (cmp size src1 src2) (rule (cmp size src1 src2)
(cmp_rmi_r size (CmpOpcode.Cmp) src1 src2)) (cmp_rmi_r size (CmpOpcode.Cmp) src1 src2))
;; Helper for creating `MInst.XmmCmpRmR` instructions.
(decl xmm_cmp_rm_r (SseOpcode XmmMem Xmm) ProducesFlags)
(rule (xmm_cmp_rm_r opcode src1 src2)
(ProducesFlags.ProducesFlagsSideEffect
(MInst.XmmCmpRmR opcode src1 src2)))
;; Helper for creating `fpcmp` instructions (cannot use `fcmp` as it is taken by
;; `clif.isle`).
(decl fpcmp (Value Value) ProducesFlags)
(rule (fpcmp src1 @ (value_type $F32) src2)
(xmm_cmp_rm_r (SseOpcode.Ucomiss) (put_in_xmm_mem src1) (put_in_xmm src2)))
(rule (fpcmp src1 @ (value_type $F64) src2)
(xmm_cmp_rm_r (SseOpcode.Ucomisd) (put_in_xmm_mem src1) (put_in_xmm src2)))
;; Helper for creating `test` instructions. ;; Helper for creating `test` instructions.
(decl test (OperandSize GprMemImm Gpr) ProducesFlags) (decl test (OperandSize GprMemImm Gpr) ProducesFlags)
(rule (test size src1 src2) (rule (test size src1 src2)
(cmp_rmi_r size (CmpOpcode.Test) src1 src2)) (cmp_rmi_r size (CmpOpcode.Test) src1 src2))
;; Helper for creating `MInst.Cmove` instructions. ;; Helper for creating `cmove` instructions. Note that these instructions do not
;; always result in a single emitted x86 instruction; e.g., XmmCmove uses jumps
;; to conditionally move the selected value into an XMM register.
(decl cmove (Type CC GprMem Gpr) ConsumesFlags) (decl cmove (Type CC GprMem Gpr) ConsumesFlags)
(rule (cmove ty cc consequent alternative) (rule (cmove ty cc consequent alternative)
(let ((dst WritableGpr (temp_writable_gpr)) (let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))) (size OperandSize (operand_size_of_type_32_64 ty)))
(ConsumesFlags.ConsumesFlags (MInst.Cmove size cc consequent alternative dst) (ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.Cmove size cc consequent alternative dst)
(gpr_to_reg (writable_gpr_to_gpr dst))))) (gpr_to_reg (writable_gpr_to_gpr dst)))))
(decl cmove_xmm (Type CC XmmMem Xmm) ConsumesFlags)
(rule (cmove_xmm ty cc consequent alternative)
(let ((dst WritableXmm (temp_writable_xmm))
(size OperandSize (operand_size_of_type_32_64 ty)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.XmmCmove size cc consequent alternative dst)
(xmm_to_reg (writable_xmm_to_xmm dst)))))
;; Helper for creating `cmove` instructions directly from values. This allows us
;; to special-case the `I128` types and default to the `cmove` helper otherwise.
;; It also eliminates some `put_in_reg*` boilerplate in the lowering ISLE code.
(decl cmove_from_values (Type CC Value Value) ConsumesFlags)
(rule (cmove_from_values $I128 cc consequent alternative)
(let ((cons ValueRegs (put_in_regs consequent))
(alt ValueRegs (put_in_regs alternative))
(dst1 WritableGpr (temp_writable_gpr))
(dst2 WritableGpr (temp_writable_gpr))
(size OperandSize (OperandSize.Size64))
(lower_cmove MInst (MInst.Cmove
size cc
(gpr_to_gpr_mem (value_regs_get_gpr cons 0))
(value_regs_get_gpr alt 0) dst1))
(upper_cmove MInst (MInst.Cmove
size cc
(gpr_to_gpr_mem (value_regs_get_gpr cons 1))
(value_regs_get_gpr alt 1) dst2)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
lower_cmove
upper_cmove
(value_regs
(gpr_to_reg (writable_gpr_to_gpr dst1))
(gpr_to_reg (writable_gpr_to_gpr dst2))))))
(rule (cmove_from_values (is_gpr_type (is_single_register_type ty)) cc consequent alternative)
(cmove ty cc (put_in_gpr_mem consequent) (put_in_gpr alternative)))
(rule (cmove_from_values (is_xmm_type (is_single_register_type ty)) cc consequent alternative)
(cmove_xmm ty cc (put_in_xmm_mem consequent) (put_in_xmm alternative)))
;; Helper for creating `cmove` instructions with the logical OR of multiple
;; flags. Note that these instructions will always result in more than one
;; emitted x86 instruction.
(decl cmove_or (Type CC CC GprMem Gpr) ConsumesFlags)
(rule (cmove_or ty cc1 cc2 consequent alternative)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.CmoveOr size cc1 cc2 consequent alternative dst)
(gpr_to_reg (writable_gpr_to_gpr dst)))))
(decl cmove_or_xmm (Type CC CC XmmMem Xmm) ConsumesFlags)
(rule (cmove_or_xmm ty cc1 cc2 consequent alternative)
(let ((dst WritableXmm (temp_writable_xmm))
(size OperandSize (operand_size_of_type_32_64 ty)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.XmmCmoveOr size cc1 cc2 consequent alternative dst)
(xmm_to_reg (writable_xmm_to_xmm dst)))))
;; Helper for creating `cmove_or` instructions directly from values. This allows
;; us to special-case the `I128` types and default to the `cmove_or` helper
;; otherwise.
(decl cmove_or_from_values (Type CC CC Value Value) ConsumesFlags)
(rule (cmove_or_from_values $I128 cc1 cc2 consequent alternative)
(let ((cons ValueRegs (put_in_regs consequent))
(alt ValueRegs (put_in_regs alternative))
(dst1 WritableGpr (temp_writable_gpr))
(dst2 WritableGpr (temp_writable_gpr))
(size OperandSize (OperandSize.Size64))
(lower_cmove MInst (MInst.CmoveOr size cc1 cc2 (gpr_to_gpr_mem (value_regs_get_gpr cons 0)) (value_regs_get_gpr alt 0) dst1))
(upper_cmove MInst (MInst.CmoveOr size cc1 cc2 (gpr_to_gpr_mem (value_regs_get_gpr cons 1)) (value_regs_get_gpr alt 1) dst2)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
lower_cmove
upper_cmove
(value_regs (gpr_to_reg (writable_gpr_to_gpr dst1))
(gpr_to_reg (writable_gpr_to_gpr dst2))))))
(rule (cmove_or_from_values (is_gpr_type (is_single_register_type ty)) cc1 cc2 consequent alternative)
(cmove_or ty cc1 cc2 (put_in_gpr_mem consequent) (put_in_gpr alternative)))
(rule (cmove_or_from_values (is_xmm_type (is_single_register_type ty)) cc1 cc2 consequent alternative)
(cmove_or_xmm ty cc1 cc2 (put_in_xmm_mem consequent) (put_in_xmm alternative)))
;; Helper for creating `MInst.MovzxRmR` instructions. ;; Helper for creating `MInst.MovzxRmR` instructions.
(decl movzx (Type ExtMode GprMem) Gpr) (decl movzx (Type ExtMode GprMem) Gpr)
(rule (movzx ty mode src) (rule (movzx ty mode src)

View File

@@ -1064,9 +1064,9 @@ pub(crate) fn emit(
cc, cc,
consequent, consequent,
alternative, alternative,
dst: reg_g, dst,
} => { } => {
debug_assert_eq!(*alternative, reg_g.to_reg()); debug_assert_eq!(*alternative, dst.to_reg());
let rex_flags = RexFlags::from(*size); let rex_flags = RexFlags::from(*size);
let prefix = match size { let prefix = match size {
OperandSize::Size16 => LegacyPrefixes::_66, OperandSize::Size16 => LegacyPrefixes::_66,
@@ -1076,14 +1076,14 @@ pub(crate) fn emit(
}; };
let opcode = 0x0F40 + cc.get_enc() as u32; let opcode = 0x0F40 + cc.get_enc() as u32;
match consequent.clone().to_reg_mem() { match consequent.clone().to_reg_mem() {
RegMem::Reg { reg: reg_e } => { RegMem::Reg { reg } => {
emit_std_reg_reg( emit_std_reg_reg(
sink, sink,
prefix, prefix,
opcode, opcode,
2, 2,
reg_g.to_reg().to_reg(), dst.to_reg().to_reg(),
reg_e, reg,
rex_flags, rex_flags,
); );
} }
@@ -1096,7 +1096,7 @@ pub(crate) fn emit(
prefix, prefix,
opcode, opcode,
2, 2,
reg_g.to_reg().to_reg(), dst.to_reg().to_reg(),
addr, addr,
rex_flags, rex_flags,
); );
@@ -1104,7 +1104,42 @@ pub(crate) fn emit(
} }
} }
Inst::XmmCmove { size, cc, src, dst } => { Inst::CmoveOr {
size,
cc1,
cc2,
consequent,
alternative,
dst,
} => {
let first_cmove = Inst::Cmove {
cc: *cc1,
size: *size,
consequent: consequent.clone(),
alternative: alternative.clone(),
dst: dst.clone(),
};
first_cmove.emit(sink, info, state);
let second_cmove = Inst::Cmove {
cc: *cc2,
size: *size,
consequent: consequent.clone(),
alternative: alternative.clone(),
dst: dst.clone(),
};
second_cmove.emit(sink, info, state);
}
Inst::XmmCmove {
size,
cc,
consequent,
alternative,
dst,
} => {
debug_assert_eq!(*alternative, dst.to_reg());
// Lowering of the Select IR opcode when the input is an fcmp relies on the fact that // Lowering of the Select IR opcode when the input is an fcmp relies on the fact that
// this doesn't clobber flags. Make sure to not do so here. // this doesn't clobber flags. Make sure to not do so here.
let next = sink.get_label(); let next = sink.get_label();
@@ -1117,12 +1152,46 @@ pub(crate) fn emit(
} else { } else {
SseOpcode::Movss SseOpcode::Movss
}; };
let inst = Inst::xmm_unary_rm_r(op, src.clone().to_reg_mem(), dst.to_writable_reg()); let inst =
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
inst.emit(sink, info, state); inst.emit(sink, info, state);
sink.bind_label(next); sink.bind_label(next);
} }
Inst::XmmCmoveOr {
size,
cc1,
cc2,
consequent,
alternative,
dst,
} => {
debug_assert_eq!(*alternative, dst.to_reg());
let op = if *size == OperandSize::Size64 {
SseOpcode::Movsd
} else {
SseOpcode::Movss
};
let second_test = sink.get_label();
let next_instruction = sink.get_label();
// Jump to second test if `cc1` is *not* set.
one_way_jmp(sink, cc1.invert(), next_instruction);
let inst =
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
inst.emit(sink, info, state);
sink.bind_label(second_test);
// Jump to next instruction if `cc2` is *not* set.
one_way_jmp(sink, cc2.invert(), next_instruction);
let inst =
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
inst.emit(sink, info, state);
sink.bind_label(next_instruction);
}
Inst::Push64 { src } => { Inst::Push64 { src } => {
if info.flags.enable_probestack() { if info.flags.enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow); sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);

View File

@@ -52,6 +52,7 @@ impl Inst {
| Inst::CallUnknown { .. } | Inst::CallUnknown { .. }
| Inst::CheckedDivOrRemSeq { .. } | Inst::CheckedDivOrRemSeq { .. }
| Inst::Cmove { .. } | Inst::Cmove { .. }
| Inst::CmoveOr { .. }
| Inst::CmpRmiR { .. } | Inst::CmpRmiR { .. }
| Inst::CvtFloatToSintSeq { .. } | Inst::CvtFloatToSintSeq { .. }
| Inst::CvtFloatToUintSeq { .. } | Inst::CvtFloatToUintSeq { .. }
@@ -88,6 +89,7 @@ impl Inst {
| Inst::Ud2 { .. } | Inst::Ud2 { .. }
| Inst::VirtualSPOffsetAdj { .. } | Inst::VirtualSPOffsetAdj { .. }
| Inst::XmmCmove { .. } | Inst::XmmCmove { .. }
| Inst::XmmCmoveOr { .. }
| Inst::XmmCmpRmR { .. } | Inst::XmmCmpRmR { .. }
| Inst::XmmLoadConst { .. } | Inst::XmmLoadConst { .. }
| Inst::XmmMinMaxSeq { .. } | Inst::XmmMinMaxSeq { .. }
@@ -629,7 +631,13 @@ impl Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::V128); debug_assert!(dst.to_reg().get_class() == RegClass::V128);
let src = XmmMem::new(src).unwrap(); let src = XmmMem::new(src).unwrap();
let dst = WritableXmm::from_writable_reg(dst).unwrap(); let dst = WritableXmm::from_writable_reg(dst).unwrap();
Inst::XmmCmove { size, cc, src, dst } Inst::XmmCmove {
size,
cc,
consequent: src,
alternative: dst.to_reg(),
dst,
}
} }
pub(crate) fn push64(src: RegMemImm) -> Inst { pub(crate) fn push64(src: RegMemImm) -> Inst {
@@ -898,6 +906,12 @@ impl Inst {
alternative, alternative,
dst, dst,
.. ..
}
| Inst::CmoveOr {
size,
alternative,
dst,
..
} => { } => {
if *alternative != dst.to_reg() { if *alternative != dst.to_reg() {
debug_assert!(alternative.is_virtual()); debug_assert!(alternative.is_virtual());
@@ -910,6 +924,23 @@ impl Inst {
} }
insts.push(self); insts.push(self);
} }
Inst::XmmCmove {
alternative, dst, ..
}
| Inst::XmmCmoveOr {
alternative, dst, ..
} => {
if *alternative != dst.to_reg() {
debug_assert!(alternative.is_virtual());
insts.push(Self::gen_move(
dst.to_writable_reg(),
alternative.to_reg(),
types::F32X4,
));
*alternative = dst.to_reg();
}
insts.push(self);
}
Inst::Not { src, dst, .. } | Inst::Neg { src, dst, .. } => { Inst::Not { src, dst, .. } | Inst::Neg { src, dst, .. } => {
if *src != dst.to_reg() { if *src != dst.to_reg() {
debug_assert!(src.is_virtual()); debug_assert!(src.is_virtual());
@@ -1588,7 +1619,34 @@ impl PrettyPrint for Inst {
show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()) show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
), ),
Inst::XmmCmove { size, cc, src, dst } => { Inst::CmoveOr {
size,
cc1,
cc2,
consequent: src,
alternative: _,
dst,
} => {
let src = src.show_rru_sized(mb_rru, size.to_bytes());
let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes());
format!(
"{} {}, {}; {} {}, {}",
ljustify(format!("cmov{}{}", cc1.to_string(), suffix_bwlq(*size))),
src,
dst,
ljustify(format!("cmov{}{}", cc2.to_string(), suffix_bwlq(*size))),
src,
dst,
)
}
Inst::XmmCmove {
size,
cc,
consequent: src,
dst,
..
} => {
format!( format!(
"j{} $next; mov{} {}, {}; $next: ", "j{} $next; mov{} {}, {}; $next: ",
cc.invert().to_string(), cc.invert().to_string(),
@@ -1602,6 +1660,34 @@ impl PrettyPrint for Inst {
) )
} }
Inst::XmmCmoveOr {
size,
cc1,
cc2,
consequent: src,
dst,
..
} => {
let suffix = if *size == OperandSize::Size64 {
"sd"
} else {
"ss"
};
let src = src.show_rru_sized(mb_rru, size.to_bytes());
let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes());
format!(
"j{} $check; mov{} {}, {}; $check: j{} $next; mov{} {}, {}; $next",
cc1.invert().to_string(),
suffix,
src,
dst,
cc2.invert().to_string(),
suffix,
src,
dst,
)
}
Inst::Push64 { src } => { Inst::Push64 { src } => {
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru)) format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
} }
@@ -2000,11 +2086,25 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
consequent: src, consequent: src,
dst, dst,
.. ..
}
| Inst::CmoveOr {
consequent: src,
dst,
..
} => { } => {
src.get_regs_as_uses(collector); src.get_regs_as_uses(collector);
collector.add_mod(dst.to_writable_reg()); collector.add_mod(dst.to_writable_reg());
} }
Inst::XmmCmove { src, dst, .. } => { Inst::XmmCmove {
consequent: src,
dst,
..
}
| Inst::XmmCmoveOr {
consequent: src,
dst,
..
} => {
src.get_regs_as_uses(collector); src.get_regs_as_uses(collector);
collector.add_mod(dst.to_writable_reg()); collector.add_mod(dst.to_writable_reg());
} }
@@ -2454,18 +2554,32 @@ pub(crate) fn x64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
ref mut dst, ref mut dst,
ref mut alternative, ref mut alternative,
.. ..
}
| Inst::CmoveOr {
consequent: ref mut src,
ref mut dst,
ref mut alternative,
..
} => { } => {
src.map_uses(mapper); src.map_uses(mapper);
dst.map_mod(mapper); dst.map_mod(mapper);
*alternative = dst.to_reg(); *alternative = dst.to_reg();
} }
Inst::XmmCmove { Inst::XmmCmove {
ref mut src, consequent: ref mut src,
ref mut dst, ref mut dst,
ref mut alternative,
..
}
| Inst::XmmCmoveOr {
consequent: ref mut src,
ref mut dst,
ref mut alternative,
.. ..
} => { } => {
src.map_uses(mapper); src.map_uses(mapper);
dst.map_mod(mapper); dst.map_mod(mapper);
*alternative = dst.to_reg();
} }
Inst::Push64 { ref mut src } => src.map_uses(mapper), Inst::Push64 { ref mut src } => src.map_uses(mapper),
Inst::Pop64 { ref mut dst } => { Inst::Pop64 { ref mut dst } => {

View File

@@ -124,8 +124,8 @@
(y_lo Gpr (value_regs_get_gpr y_regs 0)) (y_lo Gpr (value_regs_get_gpr y_regs 0))
(y_hi Gpr (value_regs_get_gpr y_regs 1))) (y_hi Gpr (value_regs_get_gpr y_regs 1)))
;; Do an add followed by an add-with-carry. ;; Do an add followed by an add-with-carry.
(with_flags (add_with_flags $I64 x_lo (gpr_to_gpr_mem_imm y_lo)) (with_flags (add_with_flags_paired $I64 x_lo (gpr_to_gpr_mem_imm y_lo))
(adc $I64 x_hi (gpr_to_gpr_mem_imm y_hi)))))) (adc_paired $I64 x_hi (gpr_to_gpr_mem_imm y_hi))))))
;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -225,8 +225,8 @@
(let ((y_regs ValueRegs (put_in_regs y)) (let ((y_regs ValueRegs (put_in_regs y))
(y_lo Gpr (value_regs_get_gpr y_regs 0)) (y_lo Gpr (value_regs_get_gpr y_regs 0))
(y_hi Gpr (value_regs_get_gpr y_regs 1))) (y_hi Gpr (value_regs_get_gpr y_regs 1)))
(with_flags (add_with_flags $I64 y_lo x) (with_flags (add_with_flags_paired $I64 y_lo x)
(adc $I64 y_hi (gpr_mem_imm_new (RegMemImm.Imm 0)))))) (adc_paired $I64 y_hi (gpr_mem_imm_new (RegMemImm.Imm 0))))))
;; Otherwise, put the immediate into a register. ;; Otherwise, put the immediate into a register.
(rule (lower (has_type $I128 (iadd_imm y (u64_from_imm64 x)))) (rule (lower (has_type $I128 (iadd_imm y (u64_from_imm64 x))))
@@ -234,8 +234,8 @@
(y_lo Gpr (value_regs_get_gpr y_regs 0)) (y_lo Gpr (value_regs_get_gpr y_regs 0))
(y_hi Gpr (value_regs_get_gpr y_regs 1)) (y_hi Gpr (value_regs_get_gpr y_regs 1))
(x_lo Gpr (gpr_new (imm $I64 x)))) (x_lo Gpr (gpr_new (imm $I64 x))))
(with_flags (add_with_flags $I64 y_lo (gpr_to_gpr_mem_imm x_lo)) (with_flags (add_with_flags_paired $I64 y_lo (gpr_to_gpr_mem_imm x_lo))
(adc $I64 y_hi (gpr_mem_imm_new (RegMemImm.Imm 0)))))) (adc_paired $I64 y_hi (gpr_mem_imm_new (RegMemImm.Imm 0))))))
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -293,8 +293,8 @@
(y_lo Gpr (value_regs_get_gpr y_regs 0)) (y_lo Gpr (value_regs_get_gpr y_regs 0))
(y_hi Gpr (value_regs_get_gpr y_regs 1))) (y_hi Gpr (value_regs_get_gpr y_regs 1)))
;; Do a sub followed by an sub-with-borrow. ;; Do a sub followed by an sub-with-borrow.
(with_flags (sub_with_flags $I64 x_lo (gpr_to_gpr_mem_imm y_lo)) (with_flags (sub_with_flags_paired $I64 x_lo (gpr_to_gpr_mem_imm y_lo))
(sbb $I64 x_hi (gpr_to_gpr_mem_imm y_hi)))))) (sbb_paired $I64 x_hi (gpr_to_gpr_mem_imm y_hi))))))
;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -562,7 +562,7 @@
(gpr_to_gpr_mem_imm amt))))) (gpr_to_gpr_mem_imm amt)))))
(zero Gpr (gpr_new (imm $I64 0))) (zero Gpr (gpr_new (imm $I64 0)))
;; Nullify the carry if we are shifting in by a multiple of 128. ;; Nullify the carry if we are shifting in by a multiple of 128.
(carry_ Gpr (gpr_new (with_flags_1 (test (OperandSize.Size64) (carry_ Gpr (gpr_new (with_flags_reg (test (OperandSize.Size64)
(gpr_mem_imm_new (RegMemImm.Imm 127)) (gpr_mem_imm_new (RegMemImm.Imm 127))
amt) amt)
(cmove $I64 (cmove $I64
@@ -574,11 +574,10 @@
;; Combine the two shifted halves. However, if we are shifting by >= 64 ;; Combine the two shifted halves. However, if we are shifting by >= 64
;; (modulo 128), then the low bits are zero and the high bits are our ;; (modulo 128), then the low bits are zero and the high bits are our
;; low bits. ;; low bits.
(with_flags_2 (test (OperandSize.Size64) (with_flags (test (OperandSize.Size64) (gpr_mem_imm_new (RegMemImm.Imm 64)) amt)
(gpr_mem_imm_new (RegMemImm.Imm 64)) (consumes_flags_concat
amt)
(cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted) zero) (cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted) zero)
(cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted_) lo_shifted)))) (cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted_) lo_shifted)))))
(rule (lower (has_type $I128 (ishl src amt))) (rule (lower (has_type $I128 (ishl src amt)))
;; NB: Only the low bits of `amt` matter since we logically mask the shift ;; NB: Only the low bits of `amt` matter since we logically mask the shift
@@ -674,23 +673,17 @@
(gpr_new (imm $I64 64)) (gpr_new (imm $I64 64))
(gpr_to_gpr_mem_imm amt))))) (gpr_to_gpr_mem_imm amt)))))
;; Nullify the carry if we are shifting by a multiple of 128. ;; Nullify the carry if we are shifting by a multiple of 128.
(carry_ Gpr (gpr_new (with_flags_1 (test (OperandSize.Size64) (carry_ Gpr (gpr_new (with_flags_reg (test (OperandSize.Size64) (gpr_mem_imm_new (RegMemImm.Imm 127)) amt)
(gpr_mem_imm_new (RegMemImm.Imm 127)) (cmove $I64 (CC.Z) (gpr_to_gpr_mem (gpr_new (imm $I64 0))) carry))))
amt)
(cmove $I64
(CC.Z)
(gpr_to_gpr_mem (gpr_new (imm $I64 0)))
carry))))
;; Add the carry bits into the lo. ;; Add the carry bits into the lo.
(lo_shifted_ Gpr (or $I64 carry_ (gpr_to_gpr_mem_imm lo_shifted)))) (lo_shifted_ Gpr (or $I64 carry_ (gpr_to_gpr_mem_imm lo_shifted))))
;; Combine the two shifted halves. However, if we are shifting by >= 64 ;; Combine the two shifted halves. However, if we are shifting by >= 64
;; (modulo 128), then the hi bits are zero and the lo bits are what ;; (modulo 128), then the hi bits are zero and the lo bits are what
;; would otherwise be our hi bits. ;; would otherwise be our hi bits.
(with_flags_2 (test (OperandSize.Size64) (with_flags (test (OperandSize.Size64) (gpr_mem_imm_new (RegMemImm.Imm 64)) amt)
(gpr_mem_imm_new (RegMemImm.Imm 64)) (consumes_flags_concat
amt)
(cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted_) hi_shifted) (cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted_) hi_shifted)
(cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted) (gpr_new (imm $I64 0)))))) (cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted) (gpr_new (imm $I64 0)))))))
(rule (lower (has_type $I128 (ushr src amt))) (rule (lower (has_type $I128 (ushr src amt)))
;; NB: Only the low bits of `amt` matter since we logically mask the shift ;; NB: Only the low bits of `amt` matter since we logically mask the shift
@@ -787,13 +780,8 @@
(gpr_new (imm $I64 64)) (gpr_new (imm $I64 64))
(gpr_to_gpr_mem_imm amt))))) (gpr_to_gpr_mem_imm amt)))))
;; Nullify the carry if we are shifting by a multiple of 128. ;; Nullify the carry if we are shifting by a multiple of 128.
(carry_ Gpr (gpr_new (with_flags_1 (test (OperandSize.Size64) (carry_ Gpr (gpr_new (with_flags_reg (test (OperandSize.Size64) (gpr_mem_imm_new (RegMemImm.Imm 127)) amt)
(gpr_mem_imm_new (RegMemImm.Imm 127)) (cmove $I64 (CC.Z) (gpr_to_gpr_mem (gpr_new (imm $I64 0))) carry))))
amt)
(cmove $I64
(CC.Z)
(gpr_to_gpr_mem (gpr_new (imm $I64 0)))
carry))))
;; Add the carry into the low half. ;; Add the carry into the low half.
(lo_shifted_ Gpr (or $I64 lo_shifted (gpr_to_gpr_mem_imm carry_))) (lo_shifted_ Gpr (or $I64 lo_shifted (gpr_to_gpr_mem_imm carry_)))
;; Get all sign bits. ;; Get all sign bits.
@@ -801,11 +789,10 @@
;; Combine the two shifted halves. However, if we are shifting by >= 64 ;; Combine the two shifted halves. However, if we are shifting by >= 64
;; (modulo 128), then the hi bits are all sign bits and the lo bits are ;; (modulo 128), then the hi bits are all sign bits and the lo bits are
;; what would otherwise be our hi bits. ;; what would otherwise be our hi bits.
(with_flags_2 (test (OperandSize.Size64) (with_flags (test (OperandSize.Size64) (gpr_mem_imm_new (RegMemImm.Imm 64)) amt)
(gpr_mem_imm_new (RegMemImm.Imm 64)) (consumes_flags_concat
amt)
(cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted_) hi_shifted) (cmove $I64 (CC.Z) (gpr_to_gpr_mem lo_shifted_) hi_shifted)
(cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted) sign_bits)))) (cmove $I64 (CC.Z) (gpr_to_gpr_mem hi_shifted) sign_bits)))))
(rule (lower (has_type $I128 (sshr src amt))) (rule (lower (has_type $I128 (sshr src amt)))
;; NB: Only the low bits of `amt` matter since we logically mask the shift ;; NB: Only the low bits of `amt` matter since we logically mask the shift
@@ -1468,7 +1455,7 @@
(let ((x_reg Gpr (put_in_gpr x)) (let ((x_reg Gpr (put_in_gpr x))
(y_reg Gpr (put_in_gpr y)) (y_reg Gpr (put_in_gpr y))
(size OperandSize (raw_operand_size_of_type ty))) (size OperandSize (raw_operand_size_of_type ty)))
(value_reg (with_flags_1 (cmp size (gpr_to_gpr_mem_imm x_reg) y_reg) (value_reg (with_flags_reg (cmp size (gpr_to_gpr_mem_imm x_reg) y_reg)
(cmove ty cc (gpr_to_gpr_mem y_reg) x_reg))))) (cmove ty cc (gpr_to_gpr_mem y_reg) x_reg)))))
(rule (lower (has_type (fits_in_64 ty) (umin x y))) (rule (lower (has_type (fits_in_64 ty) (umin x y)))
@@ -1536,3 +1523,90 @@
(rule (lower (resumable_trap code)) (rule (lower (resumable_trap code))
(safepoint (ud2 code))) (safepoint (ud2 code)))
;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; CLIF `select` instructions receive a testable argument (i.e. boolean or
;; integer) that determines which of the other two arguments is selected as
;; output. Since Cranelift booleans are typically generated by a comparison, the
;; lowerings in this section "look upwards in the tree" to emit the proper
;; sequence of "selection" instructions.
;;
;; The following rules--for selecting on a floating-point comparison--emit a
;; `UCOMIS*` instruction and then a conditional move, `cmove`. Note that for
;; values contained in XMM registers, `cmove` and `cmove_or` may in fact emit a
;; jump sequence, not `CMOV`. The `cmove` instruction operates on the flags set
;; by `UCOMIS*`; the key to understanding these is the UCOMIS* documentation
;; (see Intel's Software Developer's Manual, volume 2, chapter 4):
;; - unordered assigns Z = 1, P = 1, C = 1
;; - greater than assigns Z = 0, P = 0, C = 0
;; - less than assigns Z = 0, P = 0, C = 1
;; - equal assigns Z = 1, P = 0, C = 0
;;
;; Note that prefixing the flag with `N` means "not," so that `CC.P -> P = 1`
;; and `CC.NP -> P = 0`. Also, x86 uses mnemonics for certain combinations of
;; flags; e.g.:
;; - `CC.B -> C = 1` (below)
;; - `CC.NB -> C = 0` (not below)
;; - `CC.BE -> C = 1 OR Z = 1` (below or equal)
;; - `CC.NBE -> C = 0 AND Z = 0` (not below or equal)
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Ordered) a b)) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NP) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Unordered) a b)) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.P) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.GreaterThan) a b)) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NBE) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.GreaterThanOrEqual) a b)) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NB) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrLessThan) a b)) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.B) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a b)) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.BE) x y)))
;; Certain FloatCC variants are implemented by flipping the operands of the
;; comparison (e.g., "greater than" is lowered the same as "less than" but the
;; comparison is reversed). This allows us to use a single flag for the `cmove`,
;; which involves fewer instructions than `cmove_or`.
;;
;; But why flip at all, you may ask? Can't we just use `CC.B` (i.e., below) for
;; `FloatCC.LessThan`? Recall that in these floating-point lowerings, values may
;; be unordered and we must we want to express that `FloatCC.LessThan` is `LT`,
;; not `LT | UNO`. By flipping the operands AND inverting the comparison (e.g.,
;; to `CC.NBE`), we also avoid these unordered cases.
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.LessThan) a b)) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.NBE) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.LessThanOrEqual) a b)) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.NB) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrGreaterThan) a b)) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.B) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a b)) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.BE) x y)))
;; `FloatCC.Equal` and `FloatCC.NotEqual` can only be implemented with multiple
;; flag checks. Recall from the flag assignment chart above that equality, e.g.,
;; will assign `Z = 1`. But so does an unordered comparison: `Z = 1, P = 1, C =
;; 1`. In order to avoid semantics like `EQ | UNO` for equality, we must ensure
;; that the values are actually ordered, checking that `P = 0` (note that the
;; `C` flag is irrelevant here). Since we cannot find a single instruction that
;; implements a `Z = 1 AND P = 0` check, we invert the flag checks (i.e., `Z = 1
;; AND P = 0` becomes `Z = 0 OR P = 1`) and also flip the select operands, `x`
;; and `y`. The same argument applies to `FloatCC.NotEqual`.
;;
;; More details about the CLIF semantics for `fcmp` are available at
;; https://docs.rs/cranelift-codegen/latest/cranelift_codegen/ir/trait.InstBuilder.html#method.fcmp.
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Equal) a b)) x y)))
(with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) y x)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.NotEqual) a b)) x y)))
(with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) x y)))

View File

@@ -530,6 +530,7 @@ enum FcmpSpec {
/// This is useful in contexts where it is hard/inefficient to produce a single instruction (or /// This is useful in contexts where it is hard/inefficient to produce a single instruction (or
/// sequence of instructions) that check for an "AND" combination of condition codes; see for /// sequence of instructions) that check for an "AND" combination of condition codes; see for
/// instance lowering of Select. /// instance lowering of Select.
#[allow(dead_code)]
InvertEqual, InvertEqual,
} }
@@ -4252,80 +4253,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Select => { Opcode::Select => {
let flag_input = inputs[0]; let flag_input = inputs[0];
if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) { if let Some(_) = matches_input(ctx, flag_input, Opcode::Fcmp) {
let cond_code = ctx.data(fcmp).fp_cond_code().unwrap(); implemented_in_isle(ctx);
// For equal, we flip the operands, because we can't test a conjunction of
// CPU flags with a single cmove; see InvertedEqualOrConditions doc comment.
let (lhs_input, rhs_input) = match cond_code {
FloatCC::Equal => (inputs[2], inputs[1]),
_ => (inputs[1], inputs[2]),
};
let ty = ctx.output_ty(insn, 0);
let rhs = put_input_in_regs(ctx, rhs_input);
let dst = get_output_reg(ctx, outputs[0]);
let lhs = put_input_in_regs(ctx, lhs_input);
// We request inversion of Equal to NotEqual here: taking LHS if equal would mean
// take it if both CC::NP and CC::Z are set, the conjunction of which can't be
// modeled with a single cmov instruction. Instead, we'll swap LHS and RHS in the
// select operation, and invert the equal to a not-equal here.
let fcmp_results = emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::InvertEqual);
if let FcmpCondResult::InvertedEqualOrConditions(_, _) = &fcmp_results {
// Keep this sync'd with the lowering of the select inputs above.
assert_eq!(cond_code, FloatCC::Equal);
}
emit_moves(ctx, dst, rhs, ty);
let operand_size = if ty == types::F64 {
OperandSize::Size64
} else {
OperandSize::Size32
};
match fcmp_results {
FcmpCondResult::Condition(cc) => {
if is_int_or_ref_ty(ty) || ty == types::I128 || ty == types::B128 {
let size = ty.bytes() as u8;
emit_cmoves(ctx, size, cc, lhs, dst);
} else {
ctx.emit(Inst::xmm_cmove(
operand_size,
cc,
RegMem::reg(lhs.only_reg().unwrap()),
dst.only_reg().unwrap(),
));
}
}
FcmpCondResult::AndConditions(_, _) => {
unreachable!(
"can't AND with select; see above comment about inverting equal"
);
}
FcmpCondResult::InvertedEqualOrConditions(cc1, cc2)
| FcmpCondResult::OrConditions(cc1, cc2) => {
if is_int_or_ref_ty(ty) || ty == types::I128 {
let size = ty.bytes() as u8;
emit_cmoves(ctx, size, cc1, lhs.clone(), dst);
emit_cmoves(ctx, size, cc2, lhs, dst);
} else {
ctx.emit(Inst::xmm_cmove(
operand_size,
cc1,
RegMem::reg(lhs.only_reg().unwrap()),
dst.only_reg().unwrap(),
));
ctx.emit(Inst::xmm_cmove(
operand_size,
cc2,
RegMem::reg(lhs.only_reg().unwrap()),
dst.only_reg().unwrap(),
));
}
}
}
} else { } else {
let ty = ty.unwrap(); let ty = ty.unwrap();

View File

@@ -6,11 +6,11 @@ use generated_code::MInst;
use regalloc::Writable; use regalloc::Writable;
// Types that the generated ISLE code uses via `use super::*`. // Types that the generated ISLE code uses via `use super::*`.
use super::{is_mergeable_load, lower_to_amode, Reg}; use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, Reg};
use crate::{ use crate::{
ir::{ ir::{
immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode, Value, ValueLabel, condcodes::FloatCC, immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode,
ValueList, Value, ValueLabel, ValueList,
}, },
isa::{ isa::{
settings::Flags, settings::Flags,
@@ -440,6 +440,32 @@ where
fn imm8_to_imm8_gpr(&mut self, imm: u8) -> Imm8Gpr { fn imm8_to_imm8_gpr(&mut self, imm: u8) -> Imm8Gpr {
Imm8Gpr::new(Imm8Reg::Imm8 { imm }).unwrap() Imm8Gpr::new(Imm8Reg::Imm8 { imm }).unwrap()
} }
fn is_gpr_type(&mut self, ty: Type) -> Option<Type> {
if is_int_or_ref_ty(ty) || ty == I128 || ty == B128 {
Some(ty)
} else {
None
}
}
#[inline]
fn is_xmm_type(&mut self, ty: Type) -> Option<Type> {
if ty == F32 || ty == F64 || (ty.is_vector() && ty.bits() == 128) {
Some(ty)
} else {
None
}
}
#[inline]
fn is_single_register_type(&mut self, ty: Type) -> Option<Type> {
if ty != I128 {
Some(ty)
} else {
None
}
}
} }
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we // Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03 src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 73285cd431346d53 src/prelude.isle 980b300b3ec3e338
src/isa/x64/inst.isle 301db31d5f1118ae src/isa/x64/inst.isle ac88a0ae153ed210
src/isa/x64/lower.isle cdc94aec26c0bc5b src/isa/x64/lower.isle 1ebdd4469355e2cf

File diff suppressed because it is too large Load Diff

View File

@@ -324,47 +324,79 @@
;; Newtype wrapper around `MInst` for instructions that are used for their ;; Newtype wrapper around `MInst` for instructions that are used for their
;; effect on flags. ;; effect on flags.
(type ProducesFlags (enum (ProducesFlags (inst MInst) (result Reg)))) ;;
;; Variant determines how result is given when combined with a
;; ConsumesFlags. See `with_flags` below for more.
(type ProducesFlags (enum
(ProducesFlagsSideEffect (inst MInst))
;; Not directly combinable with a ConsumesFlags;
;; used in s390x and unwrapped directly by `trapif`.
(ProducesFlagsReturnsReg (inst MInst) (result Reg))
(ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg))))
;; Newtype wrapper around `MInst` for instructions that consume flags. ;; Newtype wrapper around `MInst` for instructions that consume flags.
(type ConsumesFlags (enum (ConsumesFlags (inst MInst) (result Reg)))) ;;
;; Variant determines how result is given when combined with a
;; ProducesFlags. See `with_flags` below for more.
(type ConsumesFlags (enum
(ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg))
(ConsumesFlagsReturnsReg (inst MInst) (result Reg))
(ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst)
(inst2 MInst)
(result ValueRegs))))
;; Helper for combining two flags-consumer instructions that return a
;; single Reg, giving a ConsumesFlags that returns both values in a
;; ValueRegs.
(decl consumes_flags_concat (ConsumesFlags ConsumesFlags) ConsumesFlags)
(rule (consumes_flags_concat (ConsumesFlags.ConsumesFlagsReturnsReg inst1 reg1)
(ConsumesFlags.ConsumesFlagsReturnsReg inst2 reg2))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
inst1
inst2
(value_regs reg1 reg2)))
;; Combine flags-producing and -consuming instructions together, ensuring that ;; Combine flags-producing and -consuming instructions together, ensuring that
;; they are emitted back-to-back and no other instructions can be emitted ;; they are emitted back-to-back and no other instructions can be emitted
;; between them and potentially clobber the flags. ;; between them and potentially clobber the flags.
;; ;;
;; Returns a `ValueRegs` where the first register is the result of the ;; Returns a `ValueRegs` according to the specific combination of ProducesFlags and ConsumesFlags modes:
;; `ProducesFlags` instruction and the second is the result of the ;; - SideEffect + ReturnsReg --> ValueReg with one Reg from consumer
;; `ConsumesFlags` instruction. ;; - SideEffect + ReturnsValueRegs --> ValueReg as given from consumer
;; - ReturnsResultWithProducer + ReturnsResultWithConsumer --> ValueReg with low part from producer, high part from consumer
;;
;; See `with_flags_reg` below for a variant that extracts out just the lower Reg.
(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs) (decl with_flags (ProducesFlags ConsumesFlags) ValueRegs)
(rule (with_flags (ProducesFlags.ProducesFlags producer_inst producer_result)
(ConsumesFlags.ConsumesFlags consumer_inst consumer_result)) (rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result)
(ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consumer_inst consumer_result))
(let ((_x Unit (emit producer_inst)) (let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst))) (_y Unit (emit consumer_inst)))
(value_regs producer_result consumer_result))) (value_regs producer_result consumer_result)))
;; Like `with_flags` but returns only the result of the consumer operation. (rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
(decl with_flags_1 (ProducesFlags ConsumesFlags) Reg) (ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
(rule (with_flags_1 (ProducesFlags.ProducesFlags producer_inst _producer_result)
(ConsumesFlags.ConsumesFlags consumer_inst consumer_result))
(let ((_x Unit (emit producer_inst)) (let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst))) (_y Unit (emit consumer_inst)))
consumer_result)) (value_reg consumer_result)))
;; Like `with_flags` but allows two consumers of the same flags. The result is a (rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
;; `ValueRegs` containing the first consumer's result and then the second (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
;; consumer's result. consumer_inst_2
(decl with_flags_2 (ProducesFlags ConsumesFlags ConsumesFlags) ValueRegs) consumer_result))
(rule (with_flags_2 (ProducesFlags.ProducesFlags producer_inst _producer_result)
(ConsumesFlags.ConsumesFlags consumer_inst_1 consumer_result_1)
(ConsumesFlags.ConsumesFlags consumer_inst_2 consumer_result_2))
(let ((_x Unit (emit producer_inst)) (let ((_x Unit (emit producer_inst))
;; Note that the order of emission here is swapped, as this seems ;; Note that the order of emission here is swapped, as this seems
;; to generate better register allocation for now with fewer ;; to generate better register allocation for now with fewer
;; `mov` instructions. ;; `mov` instructions.
(_y Unit (emit consumer_inst_2)) (_y Unit (emit consumer_inst_2))
(_z Unit (emit consumer_inst_1))) (_z Unit (emit consumer_inst_1)))
(value_regs consumer_result_1 consumer_result_2))) consumer_result))
(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg)
(rule (with_flags_reg p c)
(let ((v ValueRegs (with_flags p c)))
(value_regs_get v 0)))
;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -43,7 +43,7 @@ block0(v0: f64, v1: i64):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 17) ; (instruction range: 0 .. 16)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movsd 0(%rdi), %xmm1 ; Inst 2: movsd 0(%rdi), %xmm1
@@ -52,14 +52,12 @@ block0(v0: f64, v1: i64):
; Inst 5: setz %sil ; Inst 5: setz %sil
; Inst 6: andl %edi, %esi ; Inst 6: andl %edi, %esi
; Inst 7: andq $1, %rsi ; Inst 7: andq $1, %rsi
; Inst 8: ucomisd %xmm1, %xmm0 ; Inst 8: ucomisd %xmm0, %xmm1
; Inst 9: movaps %xmm0, %xmm1 ; Inst 9: movaps %xmm0, %xmm1
; Inst 10: jnp $next; movsd %xmm0, %xmm1; $next: ; Inst 10: jz $check; movsd %xmm0, %xmm1; $check: jnp $next; movsd %xmm0, %xmm1; $next
; Inst 11: jz $next; movsd %xmm0, %xmm1; $next: ; Inst 11: movq %rsi, %rax
; Inst 12: movq %rsi, %rax ; Inst 12: movaps %xmm1, %xmm0
; Inst 13: movaps %xmm1, %xmm0 ; Inst 13: movq %rbp, %rsp
; Inst 14: movq %rbp, %rsp ; Inst 14: popq %rbp
; Inst 15: popq %rbp ; Inst 15: ret
; Inst 16: ret
; }} ; }}

View File

@@ -0,0 +1,80 @@
test interpret
test run
target x86_64
function %select_eq_f32(f32, f32) -> i32 {
block0(v0: f32, v1: f32):
v2 = fcmp eq v0, v1
v3 = iconst.i32 1
v4 = iconst.i32 0
v5 = select v2, v3, v4
return v5
}
; run: %select_eq_f32(0x42.42, 0x42.42) == 1
; run: %select_eq_f32(0x42.42, 0.0) == 0
; run: %select_eq_f32(0x42.42, NaN) == 0
function %select_ne_f64(f64, f64) -> i32 {
block0(v0: f64, v1: f64):
v2 = fcmp ne v0, v1
v3 = iconst.i32 1
v4 = iconst.i32 0
v5 = select v2, v3, v4
return v5
}
; run: %select_ne_f64(0x42.42, 0x42.42) == 0
; run: %select_ne_f64(0x42.42, 0.0) == 1
; run: %select_ne_f64(NaN, NaN) == 1
function %select_gt_f64(f64, f64) -> b1 {
block0(v0: f64, v1: f64):
v2 = fcmp gt v0, v1
v3 = bconst.b1 true
v4 = bconst.b1 false
v5 = select v2, v3, v4
return v5
}
; run: %select_gt_f64(0x42.42, 0.0) == true
; run: %select_gt_f64(0.0, 0.0) == false
; run: %select_gt_f64(0x0.0, 0x42.42) == false
; run: %select_gt_f64(NaN, 0x42.42) == false
function %select_ge_f64(f64, f64) -> i64 {
block0(v0: f64, v1: f64):
v2 = fcmp ge v0, v1
v3 = iconst.i64 1
v4 = iconst.i64 0
v5 = select v2, v3, v4
return v5
}
; run: %select_ge_f64(0x42.42, 0.0) == 1
; run: %select_ge_f64(0.0, 0.0) == 1
; run: %select_ge_f64(0x0.0, 0x42.42) == 0
; run: %select_ge_f64(0x0.0, NaN) == 0
function %select_le_f32(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fcmp le v0, v1
v3 = f32const 0x1.0
v4 = f32const 0x0.0
v5 = select v2, v3, v4
return v5
}
; runx: %select_le_f32(0x42.42, 0.0) == 0x0.0
; run: %select_le_f32(0.0, 0.0) == 0x1.0
; run: %select_le_f32(0x0.0, 0x42.42) == 0x1.0
; run: %select_le_f32(0x0.0, NaN) == 0x0.0
function %select_uno_f32(f32, f32) -> i8 {
block0(v0: f32, v1: f32):
v2 = fcmp uno v0, v1
v3 = iconst.i8 1
v4 = iconst.i8 0
v5 = select v2, v3, v4
return v5
}
; run: %select_uno_f32(0x42.42, 0.0) == 0
; run: %select_uno_f32(0.0, 0.0) == 0
; run: %select_uno_f32(0x0.0, 0x42.42) == 0
; run: %select_uno_f32(0x0.0, NaN) == 1
; run: %select_uno_f32(-NaN, 0x42.42) == 1

View File

@@ -460,7 +460,7 @@ impl<'a> Codegen<'a> {
}; };
let valuename = self.value_binder(&value, /* is_ref = */ true, ty); let valuename = self.value_binder(&value, /* is_ref = */ true, ty);
let fieldname = &self.typeenv.syms[field.name.index()]; let fieldname = &self.typeenv.syms[field.name.index()];
self.define_val(&value, ctx, /* is_ref = */ false, field.ty); self.define_val(&value, ctx, /* is_ref = */ true, field.ty);
format!("{}: {}", fieldname, valuename) format!("{}: {}", fieldname, valuename)
}) })
.collect::<Vec<_>>() .collect::<Vec<_>>()