x64: Migrate trapif and trapff to ISLE (#4545)

https://github.com/bytecodealliance/wasmtime/pull/4545
This commit is contained in:
Trevor Elliott
2022-08-01 11:24:11 -07:00
committed by GitHub
parent a47a82d2e5
commit 25782b527e
10 changed files with 438 additions and 213 deletions

View File

@@ -380,6 +380,16 @@
(TrapIf (cc CC) (TrapIf (cc CC)
(trap_code TrapCode)) (trap_code TrapCode))
;; Traps if both of the condition codes are set.
(TrapIfAnd (cc1 CC)
(cc2 CC)
(trap_code TrapCode))
;; Traps if either of the condition codes are set.
(TrapIfOr (cc1 CC)
(cc2 CC)
(trap_code TrapCode))
;; A debug trap. ;; A debug trap.
(Hlt) (Hlt)
@@ -3002,6 +3012,209 @@
(rule (x64_xor_mem ty addr val) (rule (x64_xor_mem ty addr val)
(alu_rm ty (AluRmiROpcode.Xor) addr val)) (alu_rm ty (AluRmiROpcode.Xor) addr val))
;; Trap if the condition code supplied is set.
(decl trap_if (CC TrapCode) ConsumesFlags)
(rule (trap_if cc tc)
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.TrapIf cc tc)))
;; Trap if both of the condition codes supplied are set.
(decl trap_if_and (CC CC TrapCode) ConsumesFlags)
(rule (trap_if_and cc1 cc2 tc)
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.TrapIfAnd cc1 cc2 tc)))
;; Trap if either of the condition codes supplied are set.
(decl trap_if_or (CC CC TrapCode) ConsumesFlags)
(rule (trap_if_or cc1 cc2 tc)
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.TrapIfOr cc1 cc2 tc)))
(decl trap_if_icmp (IcmpCondResult TrapCode) SideEffectNoResult)
(rule (trap_if_icmp (IcmpCondResult.Condition producer cc) tc)
(with_flags_side_effect producer (trap_if cc tc)))
(decl trap_if_fcmp (FcmpCondResult TrapCode) SideEffectNoResult)
(rule (trap_if_fcmp (FcmpCondResult.Condition producer cc) tc)
(with_flags_side_effect producer (trap_if cc tc)))
(rule (trap_if_fcmp (FcmpCondResult.AndCondition producer cc1 cc2) tc)
(with_flags_side_effect producer (trap_if_and cc1 cc2 tc)))
(rule (trap_if_fcmp (FcmpCondResult.OrCondition producer cc1 cc2) tc)
(with_flags_side_effect producer (trap_if_or cc1 cc2 tc)))
;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC))))
(decl icmp_cond_result (ProducesFlags CC) IcmpCondResult)
(rule (icmp_cond_result producer cc) (IcmpCondResult.Condition producer cc))
;; Lower an Icmp result into a boolean value in a register.
(decl lower_icmp_bool (IcmpCondResult) ValueRegs)
(rule (lower_icmp_bool (IcmpCondResult.Condition producer cc))
(with_flags producer (x64_setcc cc)))
(decl emit_cmp (IntCC Value Value) IcmpCondResult)
;; For GPR-held values we only need to emit `CMP + SETCC`. We rely here on
;; Cranelift's verification that `a` and `b` are of the same type.
;; Unfortunately for clarity, the registers are flipped here (TODO).
(rule (emit_cmp cc a @ (value_type ty) b)
(let ((size OperandSize (raw_operand_size_of_type ty)))
(icmp_cond_result (x64_cmp size b a) cc)))
;; For I128 values (held in two GPRs), the instruction sequences depend on what
;; kind of condition is tested.
(rule (emit_cmp (IntCC.Equal) a @ (value_type $I128) b)
(let ((a_lo Gpr (value_regs_get_gpr a 0))
(a_hi Gpr (value_regs_get_gpr a 1))
(b_lo Gpr (value_regs_get_gpr b 0))
(b_hi Gpr (value_regs_get_gpr b 1))
(cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo) (x64_setcc (CC.Z))))
(cmp_hi Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_hi a_hi) (x64_setcc (CC.Z))))
;; At this point, `cmp_lo` and `cmp_hi` contain either 0 or 1 in the
;; lowest 8 bits--`SETcc` guarantees this. The upper bits may be
;; unchanged so we must compare against 1 below; this instruction
;; combines `cmp_lo` and `cmp_hi` for that final comparison.
(cmp Reg (x64_and $I64 cmp_lo cmp_hi)))
;; We must compare one more time against the immediate value 1 to
;; check if both `cmp_lo` and `cmp_hi` are true. If `cmp AND 1 == 0`
;; then the `ZF` will be set (see `TEST` definition); if either of
;; the halves `AND`s to 0, they were not equal, therefore we `SETcc`
;; with `NZ`.
(icmp_cond_result
(x64_test (OperandSize.Size64) (RegMemImm.Imm 1) cmp)
(CC.NZ))))
(rule (emit_cmp (IntCC.NotEqual) a @ (value_type $I128) b)
(let ((a_lo Gpr (value_regs_get_gpr a 0))
(a_hi Gpr (value_regs_get_gpr a 1))
(b_lo Gpr (value_regs_get_gpr b 0))
(b_hi Gpr (value_regs_get_gpr b 1))
(cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo) (x64_setcc (CC.NZ))))
(cmp_hi Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_hi a_hi) (x64_setcc (CC.NZ))))
;; See comments for `IntCC.Equal`.
(cmp Reg (x64_or $I64 cmp_lo cmp_hi)))
(icmp_cond_result
(x64_test (OperandSize.Size64) (RegMemImm.Imm 1) cmp)
(CC.NZ))))
;; Result = (a_hi <> b_hi) ||
;; (a_hi == b_hi && a_lo <> b_lo)
(rule (emit_cmp cc a @ (value_type $I128) b)
(if (intcc_neq cc (IntCC.Equal)))
(if (intcc_neq cc (IntCC.NotEqual)))
(let ((a_lo Gpr (value_regs_get_gpr a 0))
(a_hi Gpr (value_regs_get_gpr a 1))
(b_lo Gpr (value_regs_get_gpr b 0))
(b_hi Gpr (value_regs_get_gpr b 1))
(cmp_hi ValueRegs (with_flags (x64_cmp (OperandSize.Size64) b_hi a_hi)
(consumes_flags_concat
(x64_setcc (intcc_without_eq cc))
(x64_setcc (CC.Z)))))
(cc_hi Reg (value_regs_get cmp_hi 0))
(eq_hi Reg (value_regs_get cmp_hi 1))
(cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo)
(x64_setcc (intcc_unsigned cc))))
(res_lo Reg (x64_and $I64 eq_hi cmp_lo))
(res Reg (x64_or $I64 cc_hi res_lo)))
(icmp_cond_result
(x64_test (OperandSize.Size64) (RegMemImm.Imm 1) res)
(CC.NZ))))
(type FcmpCondResult
(enum
;; The given condition code must be set.
(Condition (producer ProducesFlags) (cc CC))
;; Both condition codes must be set.
(AndCondition (producer ProducesFlags) (cc1 CC) (cc2 CC))
;; Either of the conditions codes must be set.
(OrCondition (producer ProducesFlags) (cc1 CC) (cc2 CC))))
;; Lower a FcmpCondResult to a boolean value in a register.
(decl lower_fcmp_bool (FcmpCondResult) ValueRegs)
(rule (lower_fcmp_bool (FcmpCondResult.Condition producer cc))
(with_flags producer (x64_setcc cc)))
(rule (lower_fcmp_bool (FcmpCondResult.AndCondition producer cc1 cc2))
(let ((maybe ValueRegs (with_flags producer
(consumes_flags_concat
(x64_setcc cc1)
(x64_setcc cc2))))
(maybe0 Gpr (value_regs_get_gpr maybe 0))
(maybe1 Gpr (value_regs_get_gpr maybe 1)))
(value_reg (x64_and $I8 maybe0 maybe1))))
(rule (lower_fcmp_bool (FcmpCondResult.OrCondition producer cc1 cc2))
(let ((maybe ValueRegs (with_flags producer
(consumes_flags_concat
(x64_setcc cc1)
(x64_setcc cc2))))
(maybe0 Gpr (value_regs_get_gpr maybe 0))
(maybe1 Gpr (value_regs_get_gpr maybe 1)))
(value_reg (x64_or $I8 maybe0 maybe1))))
;; CLIF's `fcmp` instruction always operates on XMM registers--both scalar and
;; vector. For the scalar versions, we use the flag-setting behavior of the
;; `UCOMIS*` instruction to `SETcc` a 0 or 1 in a GPR register. Note that CLIF's
;; `select` uses the same kind of flag-setting behavior but chooses values other
;; than 0 or 1.
;;
;; Checking the result of `UCOMIS*` is unfortunately difficult in some cases
;; because we do not have `SETcc` instructions that explicitly check
;; simultaneously for the condition (i.e., `eq`, `le`, `gt`, etc.) *and*
;; orderedness. Instead, we must check the flags multiple times. The UCOMIS*
;; documentation (see Intel's Software Developer's Manual, volume 2, chapter 4)
;; is helpful:
;; - unordered assigns Z = 1, P = 1, C = 1
;; - greater than assigns Z = 0, P = 0, C = 0
;; - less than assigns Z = 0, P = 0, C = 1
;; - equal assigns Z = 1, P = 0, C = 0
(decl emit_fcmp (FloatCC Value Value) FcmpCondResult)
(rule (emit_fcmp (FloatCC.Equal) a @ (value_type (ty_scalar_float _)) b)
(FcmpCondResult.AndCondition (x64_ucomis b a) (CC.NP) (CC.Z)))
(rule (emit_fcmp (FloatCC.NotEqual) a @ (value_type (ty_scalar_float _)) b)
(FcmpCondResult.OrCondition (x64_ucomis b a) (CC.P) (CC.NZ)))
;; Some scalar lowerings correspond to one condition code.
(rule (emit_fcmp (FloatCC.Ordered) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.NP)))
(rule (emit_fcmp (FloatCC.Unordered) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.P)))
(rule (emit_fcmp (FloatCC.OrderedNotEqual) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.NZ)))
(rule (emit_fcmp (FloatCC.UnorderedOrEqual) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.Z)))
(rule (emit_fcmp (FloatCC.GreaterThan) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.NBE)))
(rule (emit_fcmp (FloatCC.GreaterThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.NB)))
(rule (emit_fcmp (FloatCC.UnorderedOrLessThan) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.B)))
(rule (emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.BE)))
;; Other scalar lowerings are made possible by flipping the operands and
;; reversing the condition code.
(rule (emit_fcmp (FloatCC.LessThan) a @ (value_type (ty_scalar_float ty)) b)
;; Same flags as `GreaterThan`.
(FcmpCondResult.Condition (x64_ucomis a b) (CC.NBE)))
(rule (emit_fcmp (FloatCC.LessThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)
;; Same flags as `GreaterThanOrEqual`.
(FcmpCondResult.Condition (x64_ucomis a b) (CC.NB)))
(rule (emit_fcmp (FloatCC.UnorderedOrGreaterThan) a @ (value_type (ty_scalar_float ty)) b)
;; Same flags as `UnorderedOrLessThan`.
(FcmpCondResult.Condition (x64_ucomis a b) (CC.B)))
(rule (emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)
;; Same flags as `UnorderedOrLessThanOrEqual`.
(FcmpCondResult.Condition (x64_ucomis a b) (CC.BE)))
;;;; Atomics ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Atomics ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl x64_mfence () SideEffectNoResult) (decl x64_mfence () SideEffectNoResult)

View File

@@ -1476,6 +1476,44 @@ pub(crate) fn emit(
sink.bind_label(else_label); sink.bind_label(else_label);
} }
Inst::TrapIfAnd {
cc1,
cc2,
trap_code,
} => {
let else_label = sink.get_label();
// Jump over if either condition code is not set.
one_way_jmp(sink, cc1.invert(), else_label);
one_way_jmp(sink, cc2.invert(), else_label);
// Trap!
let inst = Inst::trap(*trap_code);
inst.emit(&[], sink, info, state);
sink.bind_label(else_label);
}
Inst::TrapIfOr {
cc1,
cc2,
trap_code,
} => {
let trap_label = sink.get_label();
let else_label = sink.get_label();
// trap immediately if cc1 is set, otherwise jump over the trap if cc2 is not.
one_way_jmp(sink, *cc1, trap_label);
one_way_jmp(sink, cc2.invert(), else_label);
// Trap!
sink.bind_label(trap_label);
let inst = Inst::trap(*trap_code);
inst.emit(&[], sink, info, state);
sink.bind_label(else_label);
}
Inst::XmmUnaryRmR { Inst::XmmUnaryRmR {
op, op,
src: src_e, src: src_e,

View File

@@ -105,6 +105,8 @@ impl Inst {
| Inst::ShiftR { .. } | Inst::ShiftR { .. }
| Inst::SignExtendData { .. } | Inst::SignExtendData { .. }
| Inst::TrapIf { .. } | Inst::TrapIf { .. }
| Inst::TrapIfAnd { .. }
| Inst::TrapIfOr { .. }
| Inst::Ud2 { .. } | Inst::Ud2 { .. }
| Inst::VirtualSPOffsetAdj { .. } | Inst::VirtualSPOffsetAdj { .. }
| Inst::XmmCmove { .. } | Inst::XmmCmove { .. }
@@ -1664,6 +1666,34 @@ impl PrettyPrint for Inst {
format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code) format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code)
} }
Inst::TrapIfAnd {
cc1,
cc2,
trap_code,
..
} => {
format!(
"trap_if_and {}, {}, {}",
cc1.invert().to_string(),
cc2.invert().to_string(),
trap_code
)
}
Inst::TrapIfOr {
cc1,
cc2,
trap_code,
..
} => {
format!(
"trap_if_or {}, {}, {}",
cc1.to_string(),
cc2.invert().to_string(),
trap_code
)
}
Inst::LoadExtName { Inst::LoadExtName {
dst, name, offset, .. dst, name, offset, ..
} => { } => {
@@ -2146,6 +2176,8 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
| Inst::JmpCond { .. } | Inst::JmpCond { .. }
| Inst::Nop { .. } | Inst::Nop { .. }
| Inst::TrapIf { .. } | Inst::TrapIf { .. }
| Inst::TrapIfAnd { .. }
| Inst::TrapIfOr { .. }
| Inst::VirtualSPOffsetAdj { .. } | Inst::VirtualSPOffsetAdj { .. }
| Inst::Hlt | Inst::Hlt
| Inst::Ud2 { .. } | Inst::Ud2 { .. }

View File

@@ -1452,6 +1452,24 @@
(rule (lower (trap code)) (rule (lower (trap code))
(side_effect (x64_ud2 code))) (side_effect (x64_ud2 code)))
;;;; Rules for `trapif` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The flags must not have been clobbered by any other instruction between the
;; iadd_ifcout and this instruction, as verified by the CLIF validator; so we
;; can simply use the flags here.
(rule (lower (trapif cc flags @ (iadd_ifcout _ _) tc))
(side_effect
(trap_if_icmp (icmp_cond_result (flags_to_producesflags flags) cc) tc)))
;; Verification ensures that the input is always a single-def ifcmp.
(rule (lower (trapif cc (ifcmp a b) tc))
(side_effect (trap_if_icmp (emit_cmp cc a b) tc)))
;;;; Rules for `trapff` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (trapff cc (ffcmp a b) tc))
(side_effect (trap_if_fcmp (emit_fcmp cc a b) tc)))
;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (resumable_trap code)) (rule (lower (resumable_trap code))
@@ -1475,12 +1493,11 @@
;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; For GPR-held values we only need to emit `CMP + SETCC`. We rely here on
;; Cranelift's verification that `a` and `b` are of the same type.
;; Unfortunately for clarity, the registers are flipped here (TODO).
(rule (lower (icmp cc a @ (value_type (fits_in_64 ty)) b)) (rule (lower (icmp cc a @ (value_type (fits_in_64 ty)) b))
(let ((size OperandSize (raw_operand_size_of_type ty))) (lower_icmp_bool (emit_cmp cc a b)))
(with_flags (x64_cmp size b a) (x64_setcc cc))))
(rule (lower (icmp cc a @ (value_type $I128) b))
(lower_icmp_bool (emit_cmp cc a b)))
;; For XMM-held values, we lower to `PCMP*` instructions, sometimes more than ;; For XMM-held values, we lower to `PCMP*` instructions, sometimes more than
;; one. To note: what is different here about the output values is that each ;; one. To note: what is different here about the output values is that each
@@ -1552,61 +1569,6 @@
;; TODO: not used by WebAssembly translation ;; TODO: not used by WebAssembly translation
;; (rule (lower (icmp (IntCC.UnsignedLessThanOrEqual) a @ (value_type $I64X2) b)) ;; (rule (lower (icmp (IntCC.UnsignedLessThanOrEqual) a @ (value_type $I64X2) b))
;; For I128 values (held in two GPRs), the instruction sequences depend on what
;; kind of condition is tested.
(rule (lower (icmp (IntCC.Equal) a @ (value_type $I128) b))
(let ((a_lo Gpr (value_regs_get_gpr a 0))
(a_hi Gpr (value_regs_get_gpr a 1))
(b_lo Gpr (value_regs_get_gpr b 0))
(b_hi Gpr (value_regs_get_gpr b 1))
(cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo) (x64_setcc (CC.Z))))
(cmp_hi Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_hi a_hi) (x64_setcc (CC.Z))))
;; At this point, `cmp_lo` and `cmp_hi` contain either 0 or 1 in the
;; lowest 8 bits--`SETcc` guarantees this. The upper bits may be
;; unchanged so we must compare against 1 below; this instruction
;; combines `cmp_lo` and `cmp_hi` for that final comparison.
(cmp Reg (x64_and $I64 cmp_lo cmp_hi)))
;; We must compare one more time against the immediate value 1 to
;; check if both `cmp_lo` and `cmp_hi` are true. If `cmp AND 1 == 0`
;; then the `ZF` will be set (see `TEST` definition); if either of
;; the halves `AND`s to 0, they were not equal, therefore we `SETcc`
;; with `NZ`.
(with_flags (x64_test (OperandSize.Size64) (RegMemImm.Imm 1) cmp) (x64_setcc (CC.NZ)))))
(rule (lower (icmp (IntCC.NotEqual) a @ (value_type $I128) b))
(let ((a_lo Gpr (value_regs_get_gpr a 0))
(a_hi Gpr (value_regs_get_gpr a 1))
(b_lo Gpr (value_regs_get_gpr b 0))
(b_hi Gpr (value_regs_get_gpr b 1))
(cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo) (x64_setcc (CC.NZ))))
(cmp_hi Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_hi a_hi) (x64_setcc (CC.NZ))))
;; See comments for `IntCC.Equal`.
(cmp Reg (x64_or $I64 cmp_lo cmp_hi)))
(with_flags (x64_test (OperandSize.Size64) (RegMemImm.Imm 1) cmp) (x64_setcc (CC.NZ)))))
;; Result = (a_hi <> b_hi) ||
;; (a_hi == b_hi && a_lo <> b_lo)
(rule (lower (icmp cc a @ (value_type $I128) b))
(if (intcc_neq cc (IntCC.Equal)))
(if (intcc_neq cc (IntCC.NotEqual)))
(let ((a_lo Gpr (value_regs_get_gpr a 0))
(a_hi Gpr (value_regs_get_gpr a 1))
(b_lo Gpr (value_regs_get_gpr b 0))
(b_hi Gpr (value_regs_get_gpr b 1))
(cmp_hi ValueRegs (with_flags (x64_cmp (OperandSize.Size64) b_hi a_hi)
(consumes_flags_concat
(x64_setcc (intcc_without_eq cc))
(x64_setcc (CC.Z)))))
(cc_hi Reg (value_regs_get cmp_hi 0))
(eq_hi Reg (value_regs_get cmp_hi 1))
(cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo)
(x64_setcc (intcc_unsigned cc))))
(res_lo Reg (x64_and $I64 eq_hi cmp_lo))
(res Reg (x64_or $I64 cc_hi res_lo)))
(x64_and $I64 res (RegMemImm.Imm 1))))
;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1627,58 +1589,8 @@
;; - less than assigns Z = 0, P = 0, C = 1 ;; - less than assigns Z = 0, P = 0, C = 1
;; - equal assigns Z = 1, P = 0, C = 0 ;; - equal assigns Z = 1, P = 0, C = 0
(rule (lower (fcmp (FloatCC.Equal) a @ (value_type (ty_scalar_float ty)) b)) (rule (lower (fcmp cc a @ (value_type (ty_scalar_float ty)) b))
(let ((maybe ValueRegs (with_flags (x64_ucomis b a) (lower_fcmp_bool (emit_fcmp cc a b)))
(consumes_flags_concat
(x64_setcc (CC.NP))
(x64_setcc (CC.Z)))))
(maybe_np Gpr (value_regs_get_gpr maybe 0))
(maybe_z Gpr (value_regs_get_gpr maybe 1)))
(x64_and $I32 maybe_np maybe_z)))
(rule (lower (fcmp (FloatCC.NotEqual) a @ (value_type (ty_scalar_float ty)) b))
(let ((maybe ValueRegs (with_flags (x64_ucomis b a)
(consumes_flags_concat
(x64_setcc (CC.P))
(x64_setcc (CC.NZ)))))
(maybe_p Gpr (value_regs_get_gpr maybe 0))
(maybe_nz Gpr (value_regs_get_gpr maybe 1)))
(x64_or $I32 maybe_p maybe_nz)))
;; Some scalar lowerings correspond to one condition code.
(rule (lower (fcmp (FloatCC.Ordered) a @ (value_type (ty_scalar_float ty)) b))
(with_flags (x64_ucomis b a) (x64_setcc (CC.NP))))
(rule (lower (fcmp (FloatCC.Unordered) a @ (value_type (ty_scalar_float ty)) b))
(with_flags (x64_ucomis b a) (x64_setcc (CC.P))))
(rule (lower (fcmp (FloatCC.OrderedNotEqual) a @ (value_type (ty_scalar_float ty)) b))
(with_flags (x64_ucomis b a) (x64_setcc (CC.NZ))))
(rule (lower (fcmp (FloatCC.UnorderedOrEqual) a @ (value_type (ty_scalar_float ty)) b))
(with_flags (x64_ucomis b a) (x64_setcc (CC.Z))))
(rule (lower (fcmp (FloatCC.GreaterThan) a @ (value_type (ty_scalar_float ty)) b))
(with_flags (x64_ucomis b a) (x64_setcc (CC.NBE))))
(rule (lower (fcmp (FloatCC.GreaterThanOrEqual) a @ (value_type (ty_scalar_float ty)) b))
(with_flags (x64_ucomis b a) (x64_setcc (CC.NB))))
(rule (lower (fcmp (FloatCC.UnorderedOrLessThan) a @ (value_type (ty_scalar_float ty)) b))
(with_flags (x64_ucomis b a) (x64_setcc (CC.B))))
(rule (lower (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a @ (value_type (ty_scalar_float ty)) b))
(with_flags (x64_ucomis b a) (x64_setcc (CC.BE))))
;; Other scalar lowerings are made possible by flipping the operands and
;; reversing the condition code.
(rule (lower (fcmp (FloatCC.LessThan) a @ (value_type (ty_scalar_float ty)) b))
;; Same flags as `GreaterThan`.
(with_flags (x64_ucomis a b) (x64_setcc (CC.NBE))))
(rule (lower (fcmp (FloatCC.LessThanOrEqual) a @ (value_type (ty_scalar_float ty)) b))
;; Same flags as `GreaterThanOrEqual`.
(with_flags (x64_ucomis a b) (x64_setcc (CC.NB))))
(rule (lower (fcmp (FloatCC.UnorderedOrGreaterThan) a @ (value_type (ty_scalar_float ty)) b))
;; Same flags as `UnorderedOrLessThan`.
(with_flags (x64_ucomis a b) (x64_setcc (CC.B))))
(rule (lower (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a @ (value_type (ty_scalar_float ty)) b))
;; Same flags as `UnorderedOrLessThanOrEqual`.
(with_flags (x64_ucomis a b) (x64_setcc (CC.BE))))
;; For vector lowerings, we use `CMPP*` instructions with a 3-bit operand that ;; For vector lowerings, we use `CMPP*` instructions with a 3-bit operand that
;; determines the comparison to make. Note that comparisons that succeed will ;; determines the comparison to make. Note that comparisons that succeed will

View File

@@ -926,65 +926,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::FallthroughReturn | Opcode::FallthroughReturn
| Opcode::Return | Opcode::Return
| Opcode::Call | Opcode::Call
| Opcode::CallIndirect => { | Opcode::CallIndirect
| Opcode::Trapif
| Opcode::Trapff => {
implemented_in_isle(ctx); implemented_in_isle(ctx);
} }
Opcode::Trapif | Opcode::Trapff => {
let trap_code = ctx.data(insn).trap_code().unwrap();
if matches_input(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
let cond_code = ctx.data(insn).cond_code().unwrap();
// The flags must not have been clobbered by any other instruction between the
// iadd_ifcout and this instruction, as verified by the CLIF validator; so we can
// simply use the flags here.
let cc = CC::from_intcc(cond_code);
ctx.emit(Inst::TrapIf { trap_code, cc });
} else if op == Opcode::Trapif {
let cond_code = ctx.data(insn).cond_code().unwrap();
// Verification ensures that the input is always a single-def ifcmp.
let ifcmp = matches_input(ctx, inputs[0], Opcode::Ifcmp).unwrap();
let cond_code = emit_cmp(ctx, ifcmp, cond_code);
let cc = CC::from_intcc(cond_code);
ctx.emit(Inst::TrapIf { trap_code, cc });
} else {
let cond_code = ctx.data(insn).fp_cond_code().unwrap();
// Verification ensures that the input is always a single-def ffcmp.
let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap();
match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) {
FcmpCondResult::Condition(cc) => ctx.emit(Inst::TrapIf { trap_code, cc }),
FcmpCondResult::AndConditions(cc1, cc2) => {
// A bit unfortunate, but materialize the flags in their own register, and
// check against this.
let tmp = ctx.alloc_tmp(types::I32).only_reg().unwrap();
let tmp2 = ctx.alloc_tmp(types::I32).only_reg().unwrap();
ctx.emit(Inst::setcc(cc1, tmp));
ctx.emit(Inst::setcc(cc2, tmp2));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size32,
AluRmiROpcode::And,
RegMemImm::reg(tmp.to_reg()),
tmp2,
));
ctx.emit(Inst::TrapIf {
trap_code,
cc: CC::NZ,
});
}
FcmpCondResult::OrConditions(cc1, cc2) => {
ctx.emit(Inst::TrapIf { trap_code, cc: cc1 });
ctx.emit(Inst::TrapIf { trap_code, cc: cc2 });
}
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
};
};
}
Opcode::FcvtFromSint => { Opcode::FcvtFromSint => {
let output_ty = ty.unwrap(); let output_ty = ty.unwrap();
if !output_ty.is_vector() { if !output_ty.is_vector() {

View File

@@ -144,6 +144,11 @@ macro_rules! isle_prelude_methods {
} }
} }
#[inline]
fn mark_value_used(&mut self, val: Value) {
self.lower_ctx.increment_lowered_uses(val);
}
#[inline] #[inline]
fn put_in_reg(&mut self, val: Value) -> Reg { fn put_in_reg(&mut self, val: Value) -> Reg {
self.lower_ctx.put_value_in_regs(val).only_reg().unwrap() self.lower_ctx.put_value_in_regs(val).only_reg().unwrap()

View File

@@ -141,6 +141,8 @@ pub trait LowerCtx {
/// Resolves a particular input of an instruction to the `Value` that it is /// Resolves a particular input of an instruction to the `Value` that it is
/// represented with. /// represented with.
fn input_as_value(&self, ir_inst: Inst, idx: usize) -> Value; fn input_as_value(&self, ir_inst: Inst, idx: usize) -> Value;
/// Increment the reference count for the Value, ensuring that it gets lowered.
fn increment_lowered_uses(&mut self, val: Value);
/// Put the `idx`th input into register(s) and return the assigned register. /// Put the `idx`th input into register(s) and return the assigned register.
fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg>; fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg>;
/// Put the given value into register(s) and return the assigned register. /// Put the given value into register(s) and return the assigned register.
@@ -1362,6 +1364,10 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
NonRegInput { inst, constant } NonRegInput { inst, constant }
} }
fn increment_lowered_uses(&mut self, val: Value) {
self.value_lowered_uses[val] += 1
}
fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg> { fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg> {
let val = self.f.dfg.inst_args(ir_inst)[idx]; let val = self.f.dfg.inst_args(ir_inst)[idx];
self.put_value_in_regs(val) self.put_value_in_regs(val)

View File

@@ -147,6 +147,10 @@
(decl valid_reg () Reg) (decl valid_reg () Reg)
(extern extractor valid_reg valid_reg) (extern extractor valid_reg valid_reg)
;; Mark this value as used, to ensure that it gets lowered.
(decl mark_value_used (Value) Unit)
(extern constructor mark_value_used mark_value_used)
;; Put the given value into a register. ;; Put the given value into a register.
;; ;;
;; Asserts that the value fits into a single register, and doesn't require ;; Asserts that the value fits into a single register, and doesn't require
@@ -563,6 +567,11 @@
;; Variant determines how result is given when combined with a ;; Variant determines how result is given when combined with a
;; ConsumesFlags. See `with_flags` below for more. ;; ConsumesFlags. See `with_flags` below for more.
(type ProducesFlags (enum (type ProducesFlags (enum
;; For cases where the flags have been produced by another
;; instruction, and we have out-of-band reasons to know
;; that they won't be clobbered by the time we depend on
;; them.
(AlreadyExistingFlags)
(ProducesFlagsSideEffect (inst MInst)) (ProducesFlagsSideEffect (inst MInst))
;; Not directly combinable with a ConsumesFlags; ;; Not directly combinable with a ConsumesFlags;
;; used in s390x and unwrapped directly by `trapif`. ;; used in s390x and unwrapped directly by `trapif`.
@@ -574,6 +583,7 @@
;; Variant determines how result is given when combined with a ;; Variant determines how result is given when combined with a
;; ProducesFlags. See `with_flags` below for more. ;; ProducesFlags. See `with_flags` below for more.
(type ConsumesFlags (enum (type ConsumesFlags (enum
(ConsumesFlagsSideEffect (inst MInst))
(ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg)) (ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg))
(ConsumesFlagsReturnsReg (inst MInst) (result Reg)) (ConsumesFlagsReturnsReg (inst MInst) (result Reg))
(ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst) (ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst)
@@ -667,6 +677,30 @@
(let ((v ValueRegs (with_flags p c))) (let ((v ValueRegs (with_flags p c)))
(value_regs_get v 0))) (value_regs_get v 0)))
;; Indicate that the current state of the flags register from the instruction
;; that produces this Value is relied on.
(decl flags_to_producesflags (Value) ProducesFlags)
(rule (flags_to_producesflags val)
(let ((_ Unit (mark_value_used val)))
(ProducesFlags.AlreadyExistingFlags)))
;; Combine a flags-producing instruction and a flags-consuming instruction that
;; produces no results.
;;
;; This function handles the following case only:
;; - ProducesFlagsSideEffect + ConsumesFlagsSideEffect
(decl with_flags_side_effect (ProducesFlags ConsumesFlags) SideEffectNoResult)
(rule (with_flags_side_effect
(ProducesFlags.AlreadyExistingFlags)
(ConsumesFlags.ConsumesFlagsSideEffect c))
(SideEffectNoResult.Inst c))
(rule (with_flags_side_effect
(ProducesFlags.ProducesFlagsSideEffect p)
(ConsumesFlags.ConsumesFlagsSideEffect c))
(SideEffectNoResult.Inst2 p c))
;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl trap_code_division_by_zero () TrapCode) (decl trap_code_division_by_zero () TrapCode)

View File

@@ -208,37 +208,41 @@ block0(v0: i128, v1: i128):
; setnz %r8b ; setnz %r8b
; movq %r8, rsp(0 + virtual offset) ; movq %r8, rsp(0 + virtual offset)
; cmpq %rcx, %rsi ; cmpq %rcx, %rsi
; setl %r8b ; setl %r10b
; setz %r10b ; setz %r11b
; cmpq %rdx, %rdi ; cmpq %rdx, %rdi
; setb %r11b ; setb %r9b
; andq %r10, %r11, %r10 ; andq %r11, %r9, %r11
; orq %r8, %r10, %r8 ; orq %r10, %r11, %r10
; andq %r8, $1, %r8 ; testq $1, %r10
; setnz %r9b
; cmpq %rcx, %rsi ; cmpq %rcx, %rsi
; setl %r10b ; setl %r10b
; setz %r11b ; setz %r11b
; cmpq %rdx, %rdi ; cmpq %rdx, %rdi
; setbe %r13b ; setbe %r14b
; andq %r11, %r13, %r11 ; andq %r11, %r14, %r11
; orq %r10, %r11, %r10 ; orq %r10, %r11, %r10
; andq %r10, $1, %r10 ; testq $1, %r10
; setnz %r10b
; cmpq %rcx, %rsi ; cmpq %rcx, %rsi
; setnle %r11b ; setnle %r11b
; setz %r14b
; cmpq %rdx, %rdi
; setnbe %r15b
; andq %r14, %r15, %r14
; orq %r11, %r14, %r11
; andq %r11, $1, %r11
; cmpq %rcx, %rsi
; setnle %r12b
; setz %bl ; setz %bl
; cmpq %rdx, %rdi ; cmpq %rdx, %rdi
; setnb %r13b ; setnbe %r12b
; andq %rbx, %r13, %rbx ; andq %rbx, %r12, %rbx
; orq %r12, %rbx, %r12 ; orq %r11, %rbx, %r11
; andq %r12, $1, %r12 ; testq $1, %r11
; setnz %r11b
; cmpq %rcx, %rsi
; setnle %r14b
; setz %r15b
; cmpq %rdx, %rdi
; setnb %bl
; andq %r15, %rbx, %r15
; orq %r14, %r15, %r14
; testq $1, %r14
; setnz %r12b
; cmpq %rcx, %rsi ; cmpq %rcx, %rsi
; setb %r13b ; setb %r13b
; setz %r14b ; setz %r14b
@@ -246,41 +250,45 @@ block0(v0: i128, v1: i128):
; setb %r15b ; setb %r15b
; andq %r14, %r15, %r14 ; andq %r14, %r15, %r14
; orq %r13, %r14, %r13 ; orq %r13, %r14, %r13
; andq %r13, $1, %r13 ; testq $1, %r13
; setnz %r13b
; cmpq %rcx, %rsi ; cmpq %rcx, %rsi
; setb %bl ; setb %r15b
; setz %r15b ; setz %bl
; cmpq %rdx, %rdi ; cmpq %rdx, %rdi
; setbe %r14b ; setbe %r14b
; andq %r15, %r14, %r15 ; andq %rbx, %r14, %rbx
; orq %rbx, %r15, %rbx ; orq %r15, %rbx, %r15
; andq %rbx, $1, %rbx ; testq $1, %r15
; setnz %r14b
; cmpq %rcx, %rsi ; cmpq %rcx, %rsi
; setnbe %r14b ; setnbe %r15b
; setz %r15b ; setz %bl
; cmpq %rdx, %rdi ; cmpq %rdx, %rdi
; setnbe %r9b ; setnbe %r8b
; andq %r15, %r9, %r15 ; andq %rbx, %r8, %rbx
; orq %r14, %r15, %r14 ; orq %r15, %rbx, %r15
; andq %r14, $1, %r14 ; testq $1, %r15
; setnz %r15b
; cmpq %rcx, %rsi ; cmpq %rcx, %rsi
; setnbe %sil ; setnbe %cl
; setz %cl ; setz %sil
; cmpq %rdx, %rdi ; cmpq %rdx, %rdi
; setnb %dl ; setnb %dl
; andq %rcx, %rdx, %rcx ; andq %rsi, %rdx, %rsi
; orq %rsi, %rcx, %rsi ; orq %rcx, %rsi, %rcx
; andq %rsi, $1, %rsi ; testq $1, %rcx
; movq rsp(0 + virtual offset), %r9 ; setnz %sil
; andl %eax, %r9d, %eax ; movq rsp(0 + virtual offset), %rdx
; andl %r8d, %r10d, %r8d ; andl %eax, %edx, %eax
; andl %r9d, %r10d, %r9d
; andl %r11d, %r12d, %r11d ; andl %r11d, %r12d, %r11d
; andl %r13d, %ebx, %r13d ; andl %r13d, %r14d, %r13d
; andl %r14d, %esi, %r14d ; andl %r15d, %esi, %r15d
; andl %eax, %r8d, %eax ; andl %eax, %r9d, %eax
; andl %r11d, %r13d, %r11d ; andl %r11d, %r13d, %r11d
; andl %eax, %r11d, %eax ; andl %eax, %r11d, %eax
; andl %eax, %r14d, %eax ; andl %eax, %r15d, %eax
; movq 16(%rsp), %rbx ; movq 16(%rsp), %rbx
; movq 24(%rsp), %r12 ; movq 24(%rsp), %r12
; movq 32(%rsp), %r13 ; movq 32(%rsp), %r13

View File

@@ -0,0 +1,30 @@
test compile precise-output
target x86_64
function %trap() {
block0:
trap user0
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; ud2 user0
function %trap_iadd_ifcout(i64, i64) {
block0(v0: i64, v1: i64):
v2, v3 = iadd_ifcout v0, v1
trapif of v3, user0
return
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; addq %rdi, %rsi, %rdi
; jno ; ud2 user0 ;
; movq %rbp, %rsp
; popq %rbp
; ret