diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 69d59f20b1..2c50844dd6 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -380,6 +380,16 @@ (TrapIf (cc CC) (trap_code TrapCode)) + ;; Traps if both of the condition codes are set. + (TrapIfAnd (cc1 CC) + (cc2 CC) + (trap_code TrapCode)) + + ;; Traps if either of the condition codes are set. + (TrapIfOr (cc1 CC) + (cc2 CC) + (trap_code TrapCode)) + ;; A debug trap. (Hlt) @@ -3002,6 +3012,209 @@ (rule (x64_xor_mem ty addr val) (alu_rm ty (AluRmiROpcode.Xor) addr val)) +;; Trap if the condition code supplied is set. +(decl trap_if (CC TrapCode) ConsumesFlags) +(rule (trap_if cc tc) + (ConsumesFlags.ConsumesFlagsSideEffect (MInst.TrapIf cc tc))) + +;; Trap if both of the condition codes supplied are set. +(decl trap_if_and (CC CC TrapCode) ConsumesFlags) +(rule (trap_if_and cc1 cc2 tc) + (ConsumesFlags.ConsumesFlagsSideEffect (MInst.TrapIfAnd cc1 cc2 tc))) + +;; Trap if either of the condition codes supplied are set. +(decl trap_if_or (CC CC TrapCode) ConsumesFlags) +(rule (trap_if_or cc1 cc2 tc) + (ConsumesFlags.ConsumesFlagsSideEffect (MInst.TrapIfOr cc1 cc2 tc))) + +(decl trap_if_icmp (IcmpCondResult TrapCode) SideEffectNoResult) +(rule (trap_if_icmp (IcmpCondResult.Condition producer cc) tc) + (with_flags_side_effect producer (trap_if cc tc))) + +(decl trap_if_fcmp (FcmpCondResult TrapCode) SideEffectNoResult) +(rule (trap_if_fcmp (FcmpCondResult.Condition producer cc) tc) + (with_flags_side_effect producer (trap_if cc tc))) +(rule (trap_if_fcmp (FcmpCondResult.AndCondition producer cc1 cc2) tc) + (with_flags_side_effect producer (trap_if_and cc1 cc2 tc))) +(rule (trap_if_fcmp (FcmpCondResult.OrCondition producer cc1 cc2) tc) + (with_flags_side_effect producer (trap_if_or cc1 cc2 tc))) + +;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC)))) + +(decl icmp_cond_result (ProducesFlags CC) IcmpCondResult) +(rule (icmp_cond_result producer cc) (IcmpCondResult.Condition producer cc)) + +;; Lower an Icmp result into a boolean value in a register. +(decl lower_icmp_bool (IcmpCondResult) ValueRegs) +(rule (lower_icmp_bool (IcmpCondResult.Condition producer cc)) + (with_flags producer (x64_setcc cc))) + +(decl emit_cmp (IntCC Value Value) IcmpCondResult) + +;; For GPR-held values we only need to emit `CMP + SETCC`. We rely here on +;; Cranelift's verification that `a` and `b` are of the same type. +;; Unfortunately for clarity, the registers are flipped here (TODO). +(rule (emit_cmp cc a @ (value_type ty) b) + (let ((size OperandSize (raw_operand_size_of_type ty))) + (icmp_cond_result (x64_cmp size b a) cc))) + +;; For I128 values (held in two GPRs), the instruction sequences depend on what +;; kind of condition is tested. +(rule (emit_cmp (IntCC.Equal) a @ (value_type $I128) b) + (let ((a_lo Gpr (value_regs_get_gpr a 0)) + (a_hi Gpr (value_regs_get_gpr a 1)) + (b_lo Gpr (value_regs_get_gpr b 0)) + (b_hi Gpr (value_regs_get_gpr b 1)) + (cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo) (x64_setcc (CC.Z)))) + (cmp_hi Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_hi a_hi) (x64_setcc (CC.Z)))) + ;; At this point, `cmp_lo` and `cmp_hi` contain either 0 or 1 in the + ;; lowest 8 bits--`SETcc` guarantees this. The upper bits may be + ;; unchanged so we must compare against 1 below; this instruction + ;; combines `cmp_lo` and `cmp_hi` for that final comparison. + (cmp Reg (x64_and $I64 cmp_lo cmp_hi))) + ;; We must compare one more time against the immediate value 1 to + ;; check if both `cmp_lo` and `cmp_hi` are true. If `cmp AND 1 == 0` + ;; then the `ZF` will be set (see `TEST` definition); if either of + ;; the halves `AND`s to 0, they were not equal, therefore we `SETcc` + ;; with `NZ`. + (icmp_cond_result + (x64_test (OperandSize.Size64) (RegMemImm.Imm 1) cmp) + (CC.NZ)))) + +(rule (emit_cmp (IntCC.NotEqual) a @ (value_type $I128) b) + (let ((a_lo Gpr (value_regs_get_gpr a 0)) + (a_hi Gpr (value_regs_get_gpr a 1)) + (b_lo Gpr (value_regs_get_gpr b 0)) + (b_hi Gpr (value_regs_get_gpr b 1)) + (cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo) (x64_setcc (CC.NZ)))) + (cmp_hi Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_hi a_hi) (x64_setcc (CC.NZ)))) + ;; See comments for `IntCC.Equal`. + (cmp Reg (x64_or $I64 cmp_lo cmp_hi))) + (icmp_cond_result + (x64_test (OperandSize.Size64) (RegMemImm.Imm 1) cmp) + (CC.NZ)))) + +;; Result = (a_hi <> b_hi) || +;; (a_hi == b_hi && a_lo <> b_lo) +(rule (emit_cmp cc a @ (value_type $I128) b) + (if (intcc_neq cc (IntCC.Equal))) + (if (intcc_neq cc (IntCC.NotEqual))) + (let ((a_lo Gpr (value_regs_get_gpr a 0)) + (a_hi Gpr (value_regs_get_gpr a 1)) + (b_lo Gpr (value_regs_get_gpr b 0)) + (b_hi Gpr (value_regs_get_gpr b 1)) + (cmp_hi ValueRegs (with_flags (x64_cmp (OperandSize.Size64) b_hi a_hi) + (consumes_flags_concat + (x64_setcc (intcc_without_eq cc)) + (x64_setcc (CC.Z))))) + (cc_hi Reg (value_regs_get cmp_hi 0)) + (eq_hi Reg (value_regs_get cmp_hi 1)) + + (cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo) + (x64_setcc (intcc_unsigned cc)))) + + (res_lo Reg (x64_and $I64 eq_hi cmp_lo)) + (res Reg (x64_or $I64 cc_hi res_lo))) + (icmp_cond_result + (x64_test (OperandSize.Size64) (RegMemImm.Imm 1) res) + (CC.NZ)))) + +(type FcmpCondResult + (enum + ;; The given condition code must be set. + (Condition (producer ProducesFlags) (cc CC)) + + ;; Both condition codes must be set. + (AndCondition (producer ProducesFlags) (cc1 CC) (cc2 CC)) + + ;; Either of the conditions codes must be set. + (OrCondition (producer ProducesFlags) (cc1 CC) (cc2 CC)))) + +;; Lower a FcmpCondResult to a boolean value in a register. +(decl lower_fcmp_bool (FcmpCondResult) ValueRegs) + +(rule (lower_fcmp_bool (FcmpCondResult.Condition producer cc)) + (with_flags producer (x64_setcc cc))) + +(rule (lower_fcmp_bool (FcmpCondResult.AndCondition producer cc1 cc2)) + (let ((maybe ValueRegs (with_flags producer + (consumes_flags_concat + (x64_setcc cc1) + (x64_setcc cc2)))) + (maybe0 Gpr (value_regs_get_gpr maybe 0)) + (maybe1 Gpr (value_regs_get_gpr maybe 1))) + (value_reg (x64_and $I8 maybe0 maybe1)))) + +(rule (lower_fcmp_bool (FcmpCondResult.OrCondition producer cc1 cc2)) + (let ((maybe ValueRegs (with_flags producer + (consumes_flags_concat + (x64_setcc cc1) + (x64_setcc cc2)))) + (maybe0 Gpr (value_regs_get_gpr maybe 0)) + (maybe1 Gpr (value_regs_get_gpr maybe 1))) + (value_reg (x64_or $I8 maybe0 maybe1)))) + +;; CLIF's `fcmp` instruction always operates on XMM registers--both scalar and +;; vector. For the scalar versions, we use the flag-setting behavior of the +;; `UCOMIS*` instruction to `SETcc` a 0 or 1 in a GPR register. Note that CLIF's +;; `select` uses the same kind of flag-setting behavior but chooses values other +;; than 0 or 1. +;; +;; Checking the result of `UCOMIS*` is unfortunately difficult in some cases +;; because we do not have `SETcc` instructions that explicitly check +;; simultaneously for the condition (i.e., `eq`, `le`, `gt`, etc.) *and* +;; orderedness. Instead, we must check the flags multiple times. The UCOMIS* +;; documentation (see Intel's Software Developer's Manual, volume 2, chapter 4) +;; is helpful: +;; - unordered assigns Z = 1, P = 1, C = 1 +;; - greater than assigns Z = 0, P = 0, C = 0 +;; - less than assigns Z = 0, P = 0, C = 1 +;; - equal assigns Z = 1, P = 0, C = 0 +(decl emit_fcmp (FloatCC Value Value) FcmpCondResult) + +(rule (emit_fcmp (FloatCC.Equal) a @ (value_type (ty_scalar_float _)) b) + (FcmpCondResult.AndCondition (x64_ucomis b a) (CC.NP) (CC.Z))) + +(rule (emit_fcmp (FloatCC.NotEqual) a @ (value_type (ty_scalar_float _)) b) + (FcmpCondResult.OrCondition (x64_ucomis b a) (CC.P) (CC.NZ))) + +;; Some scalar lowerings correspond to one condition code. + +(rule (emit_fcmp (FloatCC.Ordered) a @ (value_type (ty_scalar_float ty)) b) + (FcmpCondResult.Condition (x64_ucomis b a) (CC.NP))) +(rule (emit_fcmp (FloatCC.Unordered) a @ (value_type (ty_scalar_float ty)) b) + (FcmpCondResult.Condition (x64_ucomis b a) (CC.P))) +(rule (emit_fcmp (FloatCC.OrderedNotEqual) a @ (value_type (ty_scalar_float ty)) b) + (FcmpCondResult.Condition (x64_ucomis b a) (CC.NZ))) +(rule (emit_fcmp (FloatCC.UnorderedOrEqual) a @ (value_type (ty_scalar_float ty)) b) + (FcmpCondResult.Condition (x64_ucomis b a) (CC.Z))) +(rule (emit_fcmp (FloatCC.GreaterThan) a @ (value_type (ty_scalar_float ty)) b) + (FcmpCondResult.Condition (x64_ucomis b a) (CC.NBE))) +(rule (emit_fcmp (FloatCC.GreaterThanOrEqual) a @ (value_type (ty_scalar_float ty)) b) + (FcmpCondResult.Condition (x64_ucomis b a) (CC.NB))) +(rule (emit_fcmp (FloatCC.UnorderedOrLessThan) a @ (value_type (ty_scalar_float ty)) b) + (FcmpCondResult.Condition (x64_ucomis b a) (CC.B))) +(rule (emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) a @ (value_type (ty_scalar_float ty)) b) + (FcmpCondResult.Condition (x64_ucomis b a) (CC.BE))) + +;; Other scalar lowerings are made possible by flipping the operands and +;; reversing the condition code. + +(rule (emit_fcmp (FloatCC.LessThan) a @ (value_type (ty_scalar_float ty)) b) + ;; Same flags as `GreaterThan`. + (FcmpCondResult.Condition (x64_ucomis a b) (CC.NBE))) +(rule (emit_fcmp (FloatCC.LessThanOrEqual) a @ (value_type (ty_scalar_float ty)) b) + ;; Same flags as `GreaterThanOrEqual`. + (FcmpCondResult.Condition (x64_ucomis a b) (CC.NB))) +(rule (emit_fcmp (FloatCC.UnorderedOrGreaterThan) a @ (value_type (ty_scalar_float ty)) b) + ;; Same flags as `UnorderedOrLessThan`. + (FcmpCondResult.Condition (x64_ucomis a b) (CC.B))) +(rule (emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a @ (value_type (ty_scalar_float ty)) b) + ;; Same flags as `UnorderedOrLessThanOrEqual`. + (FcmpCondResult.Condition (x64_ucomis a b) (CC.BE))) + ;;;; Atomics ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl x64_mfence () SideEffectNoResult) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 93f478ad77..9552993ee0 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1476,6 +1476,44 @@ pub(crate) fn emit( sink.bind_label(else_label); } + Inst::TrapIfAnd { + cc1, + cc2, + trap_code, + } => { + let else_label = sink.get_label(); + + // Jump over if either condition code is not set. + one_way_jmp(sink, cc1.invert(), else_label); + one_way_jmp(sink, cc2.invert(), else_label); + + // Trap! + let inst = Inst::trap(*trap_code); + inst.emit(&[], sink, info, state); + + sink.bind_label(else_label); + } + + Inst::TrapIfOr { + cc1, + cc2, + trap_code, + } => { + let trap_label = sink.get_label(); + let else_label = sink.get_label(); + + // trap immediately if cc1 is set, otherwise jump over the trap if cc2 is not. + one_way_jmp(sink, *cc1, trap_label); + one_way_jmp(sink, cc2.invert(), else_label); + + // Trap! + sink.bind_label(trap_label); + let inst = Inst::trap(*trap_code); + inst.emit(&[], sink, info, state); + + sink.bind_label(else_label); + } + Inst::XmmUnaryRmR { op, src: src_e, diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index ab22b1a003..4ceda3198b 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -105,6 +105,8 @@ impl Inst { | Inst::ShiftR { .. } | Inst::SignExtendData { .. } | Inst::TrapIf { .. } + | Inst::TrapIfAnd { .. } + | Inst::TrapIfOr { .. } | Inst::Ud2 { .. } | Inst::VirtualSPOffsetAdj { .. } | Inst::XmmCmove { .. } @@ -1664,6 +1666,34 @@ impl PrettyPrint for Inst { format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code) } + Inst::TrapIfAnd { + cc1, + cc2, + trap_code, + .. + } => { + format!( + "trap_if_and {}, {}, {}", + cc1.invert().to_string(), + cc2.invert().to_string(), + trap_code + ) + } + + Inst::TrapIfOr { + cc1, + cc2, + trap_code, + .. + } => { + format!( + "trap_if_or {}, {}, {}", + cc1.to_string(), + cc2.invert().to_string(), + trap_code + ) + } + Inst::LoadExtName { dst, name, offset, .. } => { @@ -2146,6 +2176,8 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol | Inst::JmpCond { .. } | Inst::Nop { .. } | Inst::TrapIf { .. } + | Inst::TrapIfAnd { .. } + | Inst::TrapIfOr { .. } | Inst::VirtualSPOffsetAdj { .. } | Inst::Hlt | Inst::Ud2 { .. } diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 5087e8c5e3..24e0672018 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1452,6 +1452,24 @@ (rule (lower (trap code)) (side_effect (x64_ud2 code))) +;;;; Rules for `trapif` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The flags must not have been clobbered by any other instruction between the +;; iadd_ifcout and this instruction, as verified by the CLIF validator; so we +;; can simply use the flags here. +(rule (lower (trapif cc flags @ (iadd_ifcout _ _) tc)) + (side_effect + (trap_if_icmp (icmp_cond_result (flags_to_producesflags flags) cc) tc))) + +;; Verification ensures that the input is always a single-def ifcmp. +(rule (lower (trapif cc (ifcmp a b) tc)) + (side_effect (trap_if_icmp (emit_cmp cc a b) tc))) + +;;;; Rules for `trapff` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (trapff cc (ffcmp a b) tc)) + (side_effect (trap_if_fcmp (emit_fcmp cc a b) tc))) + ;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (resumable_trap code)) @@ -1475,12 +1493,11 @@ ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; For GPR-held values we only need to emit `CMP + SETCC`. We rely here on -;; Cranelift's verification that `a` and `b` are of the same type. -;; Unfortunately for clarity, the registers are flipped here (TODO). (rule (lower (icmp cc a @ (value_type (fits_in_64 ty)) b)) - (let ((size OperandSize (raw_operand_size_of_type ty))) - (with_flags (x64_cmp size b a) (x64_setcc cc)))) + (lower_icmp_bool (emit_cmp cc a b))) + +(rule (lower (icmp cc a @ (value_type $I128) b)) + (lower_icmp_bool (emit_cmp cc a b))) ;; For XMM-held values, we lower to `PCMP*` instructions, sometimes more than ;; one. To note: what is different here about the output values is that each @@ -1552,61 +1569,6 @@ ;; TODO: not used by WebAssembly translation ;; (rule (lower (icmp (IntCC.UnsignedLessThanOrEqual) a @ (value_type $I64X2) b)) -;; For I128 values (held in two GPRs), the instruction sequences depend on what -;; kind of condition is tested. -(rule (lower (icmp (IntCC.Equal) a @ (value_type $I128) b)) - (let ((a_lo Gpr (value_regs_get_gpr a 0)) - (a_hi Gpr (value_regs_get_gpr a 1)) - (b_lo Gpr (value_regs_get_gpr b 0)) - (b_hi Gpr (value_regs_get_gpr b 1)) - (cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo) (x64_setcc (CC.Z)))) - (cmp_hi Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_hi a_hi) (x64_setcc (CC.Z)))) - ;; At this point, `cmp_lo` and `cmp_hi` contain either 0 or 1 in the - ;; lowest 8 bits--`SETcc` guarantees this. The upper bits may be - ;; unchanged so we must compare against 1 below; this instruction - ;; combines `cmp_lo` and `cmp_hi` for that final comparison. - (cmp Reg (x64_and $I64 cmp_lo cmp_hi))) - ;; We must compare one more time against the immediate value 1 to - ;; check if both `cmp_lo` and `cmp_hi` are true. If `cmp AND 1 == 0` - ;; then the `ZF` will be set (see `TEST` definition); if either of - ;; the halves `AND`s to 0, they were not equal, therefore we `SETcc` - ;; with `NZ`. - (with_flags (x64_test (OperandSize.Size64) (RegMemImm.Imm 1) cmp) (x64_setcc (CC.NZ))))) - -(rule (lower (icmp (IntCC.NotEqual) a @ (value_type $I128) b)) - (let ((a_lo Gpr (value_regs_get_gpr a 0)) - (a_hi Gpr (value_regs_get_gpr a 1)) - (b_lo Gpr (value_regs_get_gpr b 0)) - (b_hi Gpr (value_regs_get_gpr b 1)) - (cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo) (x64_setcc (CC.NZ)))) - (cmp_hi Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_hi a_hi) (x64_setcc (CC.NZ)))) - ;; See comments for `IntCC.Equal`. - (cmp Reg (x64_or $I64 cmp_lo cmp_hi))) - (with_flags (x64_test (OperandSize.Size64) (RegMemImm.Imm 1) cmp) (x64_setcc (CC.NZ))))) - -;; Result = (a_hi <> b_hi) || -;; (a_hi == b_hi && a_lo <> b_lo) -(rule (lower (icmp cc a @ (value_type $I128) b)) - (if (intcc_neq cc (IntCC.Equal))) - (if (intcc_neq cc (IntCC.NotEqual))) - (let ((a_lo Gpr (value_regs_get_gpr a 0)) - (a_hi Gpr (value_regs_get_gpr a 1)) - (b_lo Gpr (value_regs_get_gpr b 0)) - (b_hi Gpr (value_regs_get_gpr b 1)) - (cmp_hi ValueRegs (with_flags (x64_cmp (OperandSize.Size64) b_hi a_hi) - (consumes_flags_concat - (x64_setcc (intcc_without_eq cc)) - (x64_setcc (CC.Z))))) - (cc_hi Reg (value_regs_get cmp_hi 0)) - (eq_hi Reg (value_regs_get cmp_hi 1)) - - (cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo) - (x64_setcc (intcc_unsigned cc)))) - - (res_lo Reg (x64_and $I64 eq_hi cmp_lo)) - (res Reg (x64_or $I64 cc_hi res_lo))) - (x64_and $I64 res (RegMemImm.Imm 1)))) - ;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1627,58 +1589,8 @@ ;; - less than assigns Z = 0, P = 0, C = 1 ;; - equal assigns Z = 1, P = 0, C = 0 -(rule (lower (fcmp (FloatCC.Equal) a @ (value_type (ty_scalar_float ty)) b)) - (let ((maybe ValueRegs (with_flags (x64_ucomis b a) - (consumes_flags_concat - (x64_setcc (CC.NP)) - (x64_setcc (CC.Z))))) - (maybe_np Gpr (value_regs_get_gpr maybe 0)) - (maybe_z Gpr (value_regs_get_gpr maybe 1))) - (x64_and $I32 maybe_np maybe_z))) - -(rule (lower (fcmp (FloatCC.NotEqual) a @ (value_type (ty_scalar_float ty)) b)) - (let ((maybe ValueRegs (with_flags (x64_ucomis b a) - (consumes_flags_concat - (x64_setcc (CC.P)) - (x64_setcc (CC.NZ))))) - (maybe_p Gpr (value_regs_get_gpr maybe 0)) - (maybe_nz Gpr (value_regs_get_gpr maybe 1))) - (x64_or $I32 maybe_p maybe_nz))) - -;; Some scalar lowerings correspond to one condition code. - -(rule (lower (fcmp (FloatCC.Ordered) a @ (value_type (ty_scalar_float ty)) b)) - (with_flags (x64_ucomis b a) (x64_setcc (CC.NP)))) -(rule (lower (fcmp (FloatCC.Unordered) a @ (value_type (ty_scalar_float ty)) b)) - (with_flags (x64_ucomis b a) (x64_setcc (CC.P)))) -(rule (lower (fcmp (FloatCC.OrderedNotEqual) a @ (value_type (ty_scalar_float ty)) b)) - (with_flags (x64_ucomis b a) (x64_setcc (CC.NZ)))) -(rule (lower (fcmp (FloatCC.UnorderedOrEqual) a @ (value_type (ty_scalar_float ty)) b)) - (with_flags (x64_ucomis b a) (x64_setcc (CC.Z)))) -(rule (lower (fcmp (FloatCC.GreaterThan) a @ (value_type (ty_scalar_float ty)) b)) - (with_flags (x64_ucomis b a) (x64_setcc (CC.NBE)))) -(rule (lower (fcmp (FloatCC.GreaterThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)) - (with_flags (x64_ucomis b a) (x64_setcc (CC.NB)))) -(rule (lower (fcmp (FloatCC.UnorderedOrLessThan) a @ (value_type (ty_scalar_float ty)) b)) - (with_flags (x64_ucomis b a) (x64_setcc (CC.B)))) -(rule (lower (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)) - (with_flags (x64_ucomis b a) (x64_setcc (CC.BE)))) - -;; Other scalar lowerings are made possible by flipping the operands and -;; reversing the condition code. - -(rule (lower (fcmp (FloatCC.LessThan) a @ (value_type (ty_scalar_float ty)) b)) - ;; Same flags as `GreaterThan`. - (with_flags (x64_ucomis a b) (x64_setcc (CC.NBE)))) -(rule (lower (fcmp (FloatCC.LessThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)) - ;; Same flags as `GreaterThanOrEqual`. - (with_flags (x64_ucomis a b) (x64_setcc (CC.NB)))) -(rule (lower (fcmp (FloatCC.UnorderedOrGreaterThan) a @ (value_type (ty_scalar_float ty)) b)) - ;; Same flags as `UnorderedOrLessThan`. - (with_flags (x64_ucomis a b) (x64_setcc (CC.B)))) -(rule (lower (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)) - ;; Same flags as `UnorderedOrLessThanOrEqual`. - (with_flags (x64_ucomis a b) (x64_setcc (CC.BE)))) +(rule (lower (fcmp cc a @ (value_type (ty_scalar_float ty)) b)) + (lower_fcmp_bool (emit_fcmp cc a b))) ;; For vector lowerings, we use `CMPP*` instructions with a 3-bit operand that ;; determines the comparison to make. Note that comparisons that succeed will diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index b62ad690a0..2fd5c2e8dd 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -926,65 +926,12 @@ fn lower_insn_to_regs>( | Opcode::FallthroughReturn | Opcode::Return | Opcode::Call - | Opcode::CallIndirect => { + | Opcode::CallIndirect + | Opcode::Trapif + | Opcode::Trapff => { implemented_in_isle(ctx); } - Opcode::Trapif | Opcode::Trapff => { - let trap_code = ctx.data(insn).trap_code().unwrap(); - - if matches_input(ctx, inputs[0], Opcode::IaddIfcout).is_some() { - let cond_code = ctx.data(insn).cond_code().unwrap(); - // The flags must not have been clobbered by any other instruction between the - // iadd_ifcout and this instruction, as verified by the CLIF validator; so we can - // simply use the flags here. - let cc = CC::from_intcc(cond_code); - - ctx.emit(Inst::TrapIf { trap_code, cc }); - } else if op == Opcode::Trapif { - let cond_code = ctx.data(insn).cond_code().unwrap(); - - // Verification ensures that the input is always a single-def ifcmp. - let ifcmp = matches_input(ctx, inputs[0], Opcode::Ifcmp).unwrap(); - let cond_code = emit_cmp(ctx, ifcmp, cond_code); - let cc = CC::from_intcc(cond_code); - - ctx.emit(Inst::TrapIf { trap_code, cc }); - } else { - let cond_code = ctx.data(insn).fp_cond_code().unwrap(); - - // Verification ensures that the input is always a single-def ffcmp. - let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap(); - - match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) { - FcmpCondResult::Condition(cc) => ctx.emit(Inst::TrapIf { trap_code, cc }), - FcmpCondResult::AndConditions(cc1, cc2) => { - // A bit unfortunate, but materialize the flags in their own register, and - // check against this. - let tmp = ctx.alloc_tmp(types::I32).only_reg().unwrap(); - let tmp2 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); - ctx.emit(Inst::setcc(cc1, tmp)); - ctx.emit(Inst::setcc(cc2, tmp2)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::And, - RegMemImm::reg(tmp.to_reg()), - tmp2, - )); - ctx.emit(Inst::TrapIf { - trap_code, - cc: CC::NZ, - }); - } - FcmpCondResult::OrConditions(cc1, cc2) => { - ctx.emit(Inst::TrapIf { trap_code, cc: cc1 }); - ctx.emit(Inst::TrapIf { trap_code, cc: cc2 }); - } - FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(), - }; - }; - } - Opcode::FcvtFromSint => { let output_ty = ty.unwrap(); if !output_ty.is_vector() { diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 5f7b490dcb..f463f6e883 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -144,6 +144,11 @@ macro_rules! isle_prelude_methods { } } + #[inline] + fn mark_value_used(&mut self, val: Value) { + self.lower_ctx.increment_lowered_uses(val); + } + #[inline] fn put_in_reg(&mut self, val: Value) -> Reg { self.lower_ctx.put_value_in_regs(val).only_reg().unwrap() diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 72c8d87245..ff7d84c8d7 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -141,6 +141,8 @@ pub trait LowerCtx { /// Resolves a particular input of an instruction to the `Value` that it is /// represented with. fn input_as_value(&self, ir_inst: Inst, idx: usize) -> Value; + /// Increment the reference count for the Value, ensuring that it gets lowered. + fn increment_lowered_uses(&mut self, val: Value); /// Put the `idx`th input into register(s) and return the assigned register. fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs; /// Put the given value into register(s) and return the assigned register. @@ -1362,6 +1364,10 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { NonRegInput { inst, constant } } + fn increment_lowered_uses(&mut self, val: Value) { + self.value_lowered_uses[val] += 1 + } + fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs { let val = self.f.dfg.inst_args(ir_inst)[idx]; self.put_value_in_regs(val) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 733df9e48b..a6b9c2df56 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -147,6 +147,10 @@ (decl valid_reg () Reg) (extern extractor valid_reg valid_reg) +;; Mark this value as used, to ensure that it gets lowered. +(decl mark_value_used (Value) Unit) +(extern constructor mark_value_used mark_value_used) + ;; Put the given value into a register. ;; ;; Asserts that the value fits into a single register, and doesn't require @@ -563,6 +567,11 @@ ;; Variant determines how result is given when combined with a ;; ConsumesFlags. See `with_flags` below for more. (type ProducesFlags (enum + ;; For cases where the flags have been produced by another + ;; instruction, and we have out-of-band reasons to know + ;; that they won't be clobbered by the time we depend on + ;; them. + (AlreadyExistingFlags) (ProducesFlagsSideEffect (inst MInst)) ;; Not directly combinable with a ConsumesFlags; ;; used in s390x and unwrapped directly by `trapif`. @@ -574,6 +583,7 @@ ;; Variant determines how result is given when combined with a ;; ProducesFlags. See `with_flags` below for more. (type ConsumesFlags (enum + (ConsumesFlagsSideEffect (inst MInst)) (ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg)) (ConsumesFlagsReturnsReg (inst MInst) (result Reg)) (ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst) @@ -667,6 +677,30 @@ (let ((v ValueRegs (with_flags p c))) (value_regs_get v 0))) +;; Indicate that the current state of the flags register from the instruction +;; that produces this Value is relied on. +(decl flags_to_producesflags (Value) ProducesFlags) +(rule (flags_to_producesflags val) + (let ((_ Unit (mark_value_used val))) + (ProducesFlags.AlreadyExistingFlags))) + +;; Combine a flags-producing instruction and a flags-consuming instruction that +;; produces no results. +;; +;; This function handles the following case only: +;; - ProducesFlagsSideEffect + ConsumesFlagsSideEffect +(decl with_flags_side_effect (ProducesFlags ConsumesFlags) SideEffectNoResult) + +(rule (with_flags_side_effect + (ProducesFlags.AlreadyExistingFlags) + (ConsumesFlags.ConsumesFlagsSideEffect c)) + (SideEffectNoResult.Inst c)) + +(rule (with_flags_side_effect + (ProducesFlags.ProducesFlagsSideEffect p) + (ConsumesFlags.ConsumesFlagsSideEffect c)) + (SideEffectNoResult.Inst2 p c)) + ;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl trap_code_division_by_zero () TrapCode) diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 838df0fee4..614e3f56f7 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -208,37 +208,41 @@ block0(v0: i128, v1: i128): ; setnz %r8b ; movq %r8, rsp(0 + virtual offset) ; cmpq %rcx, %rsi -; setl %r8b -; setz %r10b +; setl %r10b +; setz %r11b ; cmpq %rdx, %rdi -; setb %r11b -; andq %r10, %r11, %r10 -; orq %r8, %r10, %r8 -; andq %r8, $1, %r8 +; setb %r9b +; andq %r11, %r9, %r11 +; orq %r10, %r11, %r10 +; testq $1, %r10 +; setnz %r9b ; cmpq %rcx, %rsi ; setl %r10b ; setz %r11b ; cmpq %rdx, %rdi -; setbe %r13b -; andq %r11, %r13, %r11 +; setbe %r14b +; andq %r11, %r14, %r11 ; orq %r10, %r11, %r10 -; andq %r10, $1, %r10 +; testq $1, %r10 +; setnz %r10b ; cmpq %rcx, %rsi ; setnle %r11b -; setz %r14b -; cmpq %rdx, %rdi -; setnbe %r15b -; andq %r14, %r15, %r14 -; orq %r11, %r14, %r11 -; andq %r11, $1, %r11 -; cmpq %rcx, %rsi -; setnle %r12b ; setz %bl ; cmpq %rdx, %rdi -; setnb %r13b -; andq %rbx, %r13, %rbx -; orq %r12, %rbx, %r12 -; andq %r12, $1, %r12 +; setnbe %r12b +; andq %rbx, %r12, %rbx +; orq %r11, %rbx, %r11 +; testq $1, %r11 +; setnz %r11b +; cmpq %rcx, %rsi +; setnle %r14b +; setz %r15b +; cmpq %rdx, %rdi +; setnb %bl +; andq %r15, %rbx, %r15 +; orq %r14, %r15, %r14 +; testq $1, %r14 +; setnz %r12b ; cmpq %rcx, %rsi ; setb %r13b ; setz %r14b @@ -246,41 +250,45 @@ block0(v0: i128, v1: i128): ; setb %r15b ; andq %r14, %r15, %r14 ; orq %r13, %r14, %r13 -; andq %r13, $1, %r13 +; testq $1, %r13 +; setnz %r13b ; cmpq %rcx, %rsi -; setb %bl -; setz %r15b +; setb %r15b +; setz %bl ; cmpq %rdx, %rdi ; setbe %r14b -; andq %r15, %r14, %r15 -; orq %rbx, %r15, %rbx -; andq %rbx, $1, %rbx +; andq %rbx, %r14, %rbx +; orq %r15, %rbx, %r15 +; testq $1, %r15 +; setnz %r14b ; cmpq %rcx, %rsi -; setnbe %r14b -; setz %r15b +; setnbe %r15b +; setz %bl ; cmpq %rdx, %rdi -; setnbe %r9b -; andq %r15, %r9, %r15 -; orq %r14, %r15, %r14 -; andq %r14, $1, %r14 +; setnbe %r8b +; andq %rbx, %r8, %rbx +; orq %r15, %rbx, %r15 +; testq $1, %r15 +; setnz %r15b ; cmpq %rcx, %rsi -; setnbe %sil -; setz %cl +; setnbe %cl +; setz %sil ; cmpq %rdx, %rdi ; setnb %dl -; andq %rcx, %rdx, %rcx -; orq %rsi, %rcx, %rsi -; andq %rsi, $1, %rsi -; movq rsp(0 + virtual offset), %r9 -; andl %eax, %r9d, %eax -; andl %r8d, %r10d, %r8d +; andq %rsi, %rdx, %rsi +; orq %rcx, %rsi, %rcx +; testq $1, %rcx +; setnz %sil +; movq rsp(0 + virtual offset), %rdx +; andl %eax, %edx, %eax +; andl %r9d, %r10d, %r9d ; andl %r11d, %r12d, %r11d -; andl %r13d, %ebx, %r13d -; andl %r14d, %esi, %r14d -; andl %eax, %r8d, %eax +; andl %r13d, %r14d, %r13d +; andl %r15d, %esi, %r15d +; andl %eax, %r9d, %eax ; andl %r11d, %r13d, %r11d ; andl %eax, %r11d, %eax -; andl %eax, %r14d, %eax +; andl %eax, %r15d, %eax ; movq 16(%rsp), %rbx ; movq 24(%rsp), %r12 ; movq 32(%rsp), %r13 diff --git a/cranelift/filetests/filetests/isa/x64/traps.clif b/cranelift/filetests/filetests/isa/x64/traps.clif new file mode 100644 index 0000000000..9a923a9288 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/traps.clif @@ -0,0 +1,30 @@ +test compile precise-output +target x86_64 + +function %trap() { +block0: + trap user0 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; ud2 user0 + + +function %trap_iadd_ifcout(i64, i64) { +block0(v0: i64, v1: i64): + v2, v3 = iadd_ifcout v0, v1 + trapif of v3, user0 + return +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; addq %rdi, %rsi, %rdi +; jno ; ud2 user0 ; +; movq %rbp, %rsp +; popq %rbp +; ret +