x64: Migrate brff and I128 branching instructions to ISLE (#4599)
https://github.com/bytecodealliance/wasmtime/pull/4599
This commit is contained in:
@@ -548,9 +548,7 @@
|
||||
And
|
||||
Or
|
||||
Xor
|
||||
Mul
|
||||
And8
|
||||
Or8))
|
||||
Mul))
|
||||
|
||||
(type UnaryRmROpcode extern
|
||||
(enum Bsr
|
||||
@@ -1074,6 +1072,13 @@
|
||||
(decl cc_invert (CC) CC)
|
||||
(extern constructor cc_invert cc_invert)
|
||||
|
||||
(decl floatcc_inverse (FloatCC) FloatCC)
|
||||
(extern constructor floatcc_inverse floatcc_inverse)
|
||||
|
||||
;; Fails if the argument is not either CC.NZ or CC.Z.
|
||||
(decl cc_nz_or_z (CC) CC)
|
||||
(extern extractor cc_nz_or_z cc_nz_or_z)
|
||||
|
||||
(type AvxOpcode extern
|
||||
(enum Vfmadd213ps
|
||||
Vfmadd213pd))
|
||||
@@ -3060,6 +3065,10 @@
|
||||
(rule (jmp_known target)
|
||||
(SideEffectNoResult.Inst (MInst.JmpKnown target)))
|
||||
|
||||
(decl jmp_if (CC MachLabel) ConsumesFlags)
|
||||
(rule (jmp_if cc taken)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.JmpIf cc taken)))
|
||||
|
||||
;; Conditional jump based on the condition code.
|
||||
(decl jmp_cond (CC MachLabel MachLabel) ConsumesFlags)
|
||||
(rule (jmp_cond cc taken not_taken)
|
||||
@@ -3070,6 +3079,21 @@
|
||||
(rule (jmp_cond_icmp (IcmpCondResult.Condition producer cc) taken not_taken)
|
||||
(with_flags_side_effect producer (jmp_cond cc taken not_taken)))
|
||||
|
||||
;; Conditional jump based on the result of an fcmp.
|
||||
(decl jmp_cond_fcmp (FcmpCondResult MachLabel MachLabel) SideEffectNoResult)
|
||||
(rule (jmp_cond_fcmp (FcmpCondResult.Condition producer cc) taken not_taken)
|
||||
(with_flags_side_effect producer (jmp_cond cc taken not_taken)))
|
||||
(rule (jmp_cond_fcmp (FcmpCondResult.AndCondition producer cc1 cc2) taken not_taken)
|
||||
(with_flags_side_effect producer
|
||||
(consumes_flags_concat
|
||||
(jmp_if (cc_invert cc1) not_taken)
|
||||
(jmp_cond (cc_invert cc2) not_taken taken))))
|
||||
(rule (jmp_cond_fcmp (FcmpCondResult.OrCondition producer cc1 cc2) taken not_taken)
|
||||
(with_flags_side_effect producer
|
||||
(consumes_flags_concat
|
||||
(jmp_if cc1 taken)
|
||||
(jmp_cond cc2 taken not_taken))))
|
||||
|
||||
;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC))))
|
||||
|
||||
@@ -701,12 +701,6 @@ pub enum AluRmiROpcode {
|
||||
Xor,
|
||||
/// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
|
||||
Mul,
|
||||
/// 8-bit form of And. Handled separately as we don't have full 8-bit op
|
||||
/// support (we just use wider instructions). Used only with some sequences
|
||||
/// with SETcc.
|
||||
And8,
|
||||
/// 8-bit form of Or.
|
||||
Or8,
|
||||
}
|
||||
|
||||
impl fmt::Debug for AluRmiROpcode {
|
||||
@@ -720,8 +714,6 @@ impl fmt::Debug for AluRmiROpcode {
|
||||
AluRmiROpcode::Or => "or",
|
||||
AluRmiROpcode::Xor => "xor",
|
||||
AluRmiROpcode::Mul => "imul",
|
||||
AluRmiROpcode::And8 => "and",
|
||||
AluRmiROpcode::Or8 => "or",
|
||||
};
|
||||
write!(fmt, "{}", name)
|
||||
}
|
||||
@@ -733,16 +725,6 @@ impl fmt::Display for AluRmiROpcode {
|
||||
}
|
||||
}
|
||||
|
||||
impl AluRmiROpcode {
|
||||
/// Is this a special-cased 8-bit ALU op?
|
||||
pub fn is_8bit(self) -> bool {
|
||||
match self {
|
||||
AluRmiROpcode::And8 | AluRmiROpcode::Or8 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum UnaryRmROpcode {
|
||||
/// Bit-scan reverse.
|
||||
@@ -1704,32 +1686,6 @@ impl CC {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn from_floatcc(floatcc: FloatCC) -> Self {
|
||||
match floatcc {
|
||||
FloatCC::Ordered => CC::NP,
|
||||
FloatCC::Unordered => CC::P,
|
||||
// Alias for NE
|
||||
FloatCC::OrderedNotEqual => CC::NZ,
|
||||
// Alias for E
|
||||
FloatCC::UnorderedOrEqual => CC::Z,
|
||||
// Alias for A
|
||||
FloatCC::GreaterThan => CC::NBE,
|
||||
// Alias for AE
|
||||
FloatCC::GreaterThanOrEqual => CC::NB,
|
||||
FloatCC::UnorderedOrLessThan => CC::B,
|
||||
FloatCC::UnorderedOrLessThanOrEqual => CC::BE,
|
||||
FloatCC::Equal
|
||||
| FloatCC::NotEqual
|
||||
| FloatCC::LessThan
|
||||
| FloatCC::LessThanOrEqual
|
||||
| FloatCC::UnorderedOrGreaterThan
|
||||
| FloatCC::UnorderedOrGreaterThanOrEqual => panic!(
|
||||
"{:?} can't be lowered to a CC code; treat as special case.",
|
||||
floatcc
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_enc(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
|
||||
@@ -158,7 +158,7 @@ pub(crate) fn emit(
|
||||
(reg_g, src2)
|
||||
};
|
||||
|
||||
let mut rex = RexFlags::from(*size);
|
||||
let rex = RexFlags::from(*size);
|
||||
if *op == AluRmiROpcode::Mul {
|
||||
// We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
|
||||
// we have to special-case it.
|
||||
@@ -191,26 +191,19 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let (opcode_r, opcode_m, subopcode_i, is_8bit) = match op {
|
||||
AluRmiROpcode::Add => (0x01, 0x03, 0, false),
|
||||
AluRmiROpcode::Adc => (0x11, 0x03, 0, false),
|
||||
AluRmiROpcode::Sub => (0x29, 0x2B, 5, false),
|
||||
AluRmiROpcode::Sbb => (0x19, 0x2B, 5, false),
|
||||
AluRmiROpcode::And => (0x21, 0x23, 4, false),
|
||||
AluRmiROpcode::Or => (0x09, 0x0B, 1, false),
|
||||
AluRmiROpcode::Xor => (0x31, 0x33, 6, false),
|
||||
AluRmiROpcode::And8 => (0x20, 0x22, 4, true),
|
||||
AluRmiROpcode::Or8 => (0x08, 0x0A, 1, true),
|
||||
let (opcode_r, opcode_m, subopcode_i) = match op {
|
||||
AluRmiROpcode::Add => (0x01, 0x03, 0),
|
||||
AluRmiROpcode::Adc => (0x11, 0x03, 0),
|
||||
AluRmiROpcode::Sub => (0x29, 0x2B, 5),
|
||||
AluRmiROpcode::Sbb => (0x19, 0x2B, 5),
|
||||
AluRmiROpcode::And => (0x21, 0x23, 4),
|
||||
AluRmiROpcode::Or => (0x09, 0x0B, 1),
|
||||
AluRmiROpcode::Xor => (0x31, 0x33, 6),
|
||||
AluRmiROpcode::Mul => panic!("unreachable"),
|
||||
};
|
||||
assert!(!(is_8bit && *size == OperandSize::Size64));
|
||||
|
||||
match src2 {
|
||||
RegMemImm::Reg { reg: reg_e } => {
|
||||
if is_8bit {
|
||||
rex.always_emit_if_8bit_needed(reg_e);
|
||||
rex.always_emit_if_8bit_needed(reg_g);
|
||||
}
|
||||
// GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R
|
||||
// duality). Do this too, so as to be able to compare generated machine
|
||||
// code easily.
|
||||
@@ -227,9 +220,6 @@ pub(crate) fn emit(
|
||||
|
||||
RegMemImm::Mem { addr } => {
|
||||
let amode = addr.finalize(state, sink);
|
||||
if is_8bit {
|
||||
rex.always_emit_if_8bit_needed(reg_g);
|
||||
}
|
||||
// Here we revert to the "normal" G-E ordering.
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
@@ -245,7 +235,6 @@ pub(crate) fn emit(
|
||||
}
|
||||
|
||||
RegMemImm::Imm { simm32 } => {
|
||||
assert!(!is_8bit);
|
||||
let use_imm8 = low8_will_sign_extend_to_32(simm32);
|
||||
let opcode = if use_imm8 { 0x83 } else { 0x81 };
|
||||
// And also here we use the "normal" G-E ordering.
|
||||
|
||||
@@ -1258,86 +1258,6 @@ fn test_x64_emit() {
|
||||
"4C09FA",
|
||||
"orq %rdx, %r15, %rdx",
|
||||
));
|
||||
insns.push((
|
||||
Inst::alu_rmi_r(
|
||||
OperandSize::Size32,
|
||||
AluRmiROpcode::And8,
|
||||
RegMemImm::reg(r15),
|
||||
w_rdx,
|
||||
),
|
||||
"4420FA",
|
||||
"andb %dl, %r15b, %dl",
|
||||
));
|
||||
insns.push((
|
||||
Inst::alu_rmi_r(
|
||||
OperandSize::Size32,
|
||||
AluRmiROpcode::And8,
|
||||
RegMemImm::reg(rax),
|
||||
w_rsi,
|
||||
),
|
||||
"4020C6",
|
||||
"andb %sil, %al, %sil",
|
||||
));
|
||||
insns.push((
|
||||
Inst::alu_rmi_r(
|
||||
OperandSize::Size32,
|
||||
AluRmiROpcode::And8,
|
||||
RegMemImm::reg(rax),
|
||||
w_rbx,
|
||||
),
|
||||
"20C3",
|
||||
"andb %bl, %al, %bl",
|
||||
));
|
||||
insns.push((
|
||||
Inst::alu_rmi_r(
|
||||
OperandSize::Size32,
|
||||
AluRmiROpcode::And8,
|
||||
RegMemImm::mem(Amode::imm_reg(0, rax)),
|
||||
w_rbx,
|
||||
),
|
||||
"2218",
|
||||
"andb %bl, 0(%rax), %bl",
|
||||
));
|
||||
insns.push((
|
||||
Inst::alu_rmi_r(
|
||||
OperandSize::Size32,
|
||||
AluRmiROpcode::Or8,
|
||||
RegMemImm::reg(r15),
|
||||
w_rdx,
|
||||
),
|
||||
"4408FA",
|
||||
"orb %dl, %r15b, %dl",
|
||||
));
|
||||
insns.push((
|
||||
Inst::alu_rmi_r(
|
||||
OperandSize::Size32,
|
||||
AluRmiROpcode::Or8,
|
||||
RegMemImm::reg(rax),
|
||||
w_rsi,
|
||||
),
|
||||
"4008C6",
|
||||
"orb %sil, %al, %sil",
|
||||
));
|
||||
insns.push((
|
||||
Inst::alu_rmi_r(
|
||||
OperandSize::Size32,
|
||||
AluRmiROpcode::Or8,
|
||||
RegMemImm::reg(rax),
|
||||
w_rbx,
|
||||
),
|
||||
"08C3",
|
||||
"orb %bl, %al, %bl",
|
||||
));
|
||||
insns.push((
|
||||
Inst::alu_rmi_r(
|
||||
OperandSize::Size32,
|
||||
AluRmiROpcode::Or8,
|
||||
RegMemImm::mem(Amode::imm_reg(0, rax)),
|
||||
w_rbx,
|
||||
),
|
||||
"0A18",
|
||||
"orb %bl, 0(%rax), %bl",
|
||||
));
|
||||
insns.push((
|
||||
Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
|
||||
@@ -729,10 +729,6 @@ impl Inst {
|
||||
Inst::JmpKnown { dst }
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_if(cc: CC, taken: MachLabel) -> Inst {
|
||||
Inst::JmpIf { cc, taken }
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_cond(cc: CC, taken: MachLabel, not_taken: MachLabel) -> Inst {
|
||||
Inst::JmpCond {
|
||||
cc,
|
||||
@@ -892,23 +888,15 @@ impl PrettyPrint for Inst {
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn suffix_lqb(size: OperandSize, is_8: bool) -> String {
|
||||
match (size, is_8) {
|
||||
(_, true) => "b",
|
||||
(OperandSize::Size32, false) => "l",
|
||||
(OperandSize::Size64, false) => "q",
|
||||
fn suffix_lqb(size: OperandSize) -> String {
|
||||
match size {
|
||||
OperandSize::Size32 => "l",
|
||||
OperandSize::Size64 => "q",
|
||||
_ => unreachable!(),
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn size_lqb(size: OperandSize, is_8: bool) -> u8 {
|
||||
if is_8 {
|
||||
return 1;
|
||||
}
|
||||
size.to_bytes()
|
||||
}
|
||||
|
||||
fn suffix_bwlq(size: OperandSize) -> String {
|
||||
match size {
|
||||
OperandSize::Size8 => "b".to_string(),
|
||||
@@ -922,11 +910,10 @@ impl PrettyPrint for Inst {
|
||||
Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
|
||||
|
||||
Inst::AluRmiR { size, op, dst, .. } if self.produces_const() => {
|
||||
let dst =
|
||||
pretty_print_reg(dst.to_reg().to_reg(), size_lqb(*size, op.is_8bit()), allocs);
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
|
||||
format!(
|
||||
"{} {}, {}, {}",
|
||||
ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())),
|
||||
ljustify2(op.to_string(), suffix_lqb(*size)),
|
||||
dst,
|
||||
dst,
|
||||
dst
|
||||
@@ -939,13 +926,13 @@ impl PrettyPrint for Inst {
|
||||
src2,
|
||||
dst,
|
||||
} => {
|
||||
let size_bytes = size_lqb(*size, op.is_8bit());
|
||||
let size_bytes = size.to_bytes();
|
||||
let src1 = pretty_print_reg(src1.to_reg(), size_bytes, allocs);
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), size_bytes, allocs);
|
||||
let src2 = src2.pretty_print(size_bytes, allocs);
|
||||
format!(
|
||||
"{} {}, {}, {}",
|
||||
ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())),
|
||||
ljustify2(op.to_string(), suffix_lqb(*size)),
|
||||
src1,
|
||||
src2,
|
||||
dst
|
||||
@@ -957,12 +944,12 @@ impl PrettyPrint for Inst {
|
||||
src1_dst,
|
||||
src2,
|
||||
} => {
|
||||
let size_bytes = size_lqb(*size, op.is_8bit());
|
||||
let size_bytes = size.to_bytes();
|
||||
let src2 = pretty_print_reg(src2.to_reg(), size_bytes, allocs);
|
||||
let src1_dst = src1_dst.pretty_print(size_bytes, allocs);
|
||||
format!(
|
||||
"{} {}, {}",
|
||||
ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())),
|
||||
ljustify2(op.to_string(), suffix_lqb(*size)),
|
||||
src2,
|
||||
src1_dst,
|
||||
)
|
||||
|
||||
@@ -2872,17 +2872,49 @@
|
||||
(rule (lower_branch (brif cc (ifcmp a b) _ _) (two_targets taken not_taken))
|
||||
(side_effect (jmp_cond_icmp (emit_cmp cc a b) taken not_taken)))
|
||||
|
||||
;; Rules for `brz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Rules for `brff` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower_branch (brff cc (ffcmp a b) _ _) (two_targets taken not_taken))
|
||||
(side_effect (jmp_cond_fcmp (emit_fcmp cc a b) taken not_taken)))
|
||||
|
||||
;; Rules for `brz` and `brnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower_branch (brz (icmp cc a b) _ _) (two_targets taken not_taken))
|
||||
(let ((cmp IcmpCondResult (invert_icmp_cond_result (emit_cmp cc a b))))
|
||||
(side_effect (jmp_cond_icmp cmp taken not_taken))))
|
||||
|
||||
;; Rules for `brnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule (lower_branch (brz (fcmp cc a b) _ _) (two_targets taken not_taken))
|
||||
(let ((cmp FcmpCondResult (emit_fcmp (floatcc_inverse cc) a b)))
|
||||
(side_effect (jmp_cond_fcmp cmp taken not_taken))))
|
||||
|
||||
(rule (lower_branch (brz val @ (value_type $I128) _ _) (two_targets taken not_taken))
|
||||
(side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.NZ) val) taken not_taken)))
|
||||
|
||||
(rule (lower_branch (brnz (icmp cc a b) _ _) (two_targets taken not_taken))
|
||||
(side_effect (jmp_cond_icmp (emit_cmp cc a b) taken not_taken)))
|
||||
|
||||
(rule (lower_branch (brnz (fcmp cc a b) _ _) (two_targets taken not_taken))
|
||||
(let ((cmp FcmpCondResult (emit_fcmp cc a b)))
|
||||
(side_effect (jmp_cond_fcmp cmp taken not_taken))))
|
||||
|
||||
(rule (lower_branch (brnz val @ (value_type $I128) _ _) (two_targets taken not_taken))
|
||||
(side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.Z) val) taken not_taken)))
|
||||
|
||||
;; Compare an I128 value to zero, returning a flags result suitable for making a
|
||||
;; jump decision. The comparison is implemented as `(hi == 0) && (low == 0)`,
|
||||
;; and the result can be interpreted as follows
|
||||
;; * CC.Z indicates that the value was non-zero, as one or both of the halves of
|
||||
;; the value were non-zero
|
||||
;; * CC.NZ indicates that both halves of the value were 0
|
||||
(decl cmp_zero_i128 (CC ValueRegs) IcmpCondResult)
|
||||
(rule (cmp_zero_i128 (cc_nz_or_z cc) val)
|
||||
(let ((lo Gpr (value_regs_get_gpr val 0))
|
||||
(hi Gpr (value_regs_get_gpr val 1))
|
||||
(lo_z Gpr (with_flags_reg (x64_cmp (OperandSize.Size64) (RegMemImm.Imm 0) lo)
|
||||
(x64_setcc (CC.Z))))
|
||||
(hi_z Gpr (with_flags_reg (x64_cmp (OperandSize.Size64) (RegMemImm.Imm 0) hi)
|
||||
(x64_setcc (CC.Z)))))
|
||||
(icmp_cond_result (x64_test (OperandSize.Size8) lo_z hi_z) cc)))
|
||||
|
||||
;; Rules for `bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ pub(super) mod isle;
|
||||
|
||||
use crate::data_value::DataValue;
|
||||
use crate::ir::{
|
||||
condcodes::{CondCode, FloatCC, IntCC},
|
||||
condcodes::{FloatCC, IntCC},
|
||||
types, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type,
|
||||
};
|
||||
use crate::isa::x64::abi::*;
|
||||
@@ -478,100 +478,6 @@ fn emit_cmp<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst, cc: IntCC) -> IntC
|
||||
}
|
||||
}
|
||||
|
||||
/// A specification for a fcmp emission.
|
||||
enum FcmpSpec {
|
||||
/// Normal flow.
|
||||
Normal,
|
||||
|
||||
/// Avoid emitting Equal at all costs by inverting it to NotEqual, and indicate when that
|
||||
/// happens with `InvertedEqualOrConditions`.
|
||||
///
|
||||
/// This is useful in contexts where it is hard/inefficient to produce a single instruction (or
|
||||
/// sequence of instructions) that check for an "AND" combination of condition codes; see for
|
||||
/// instance lowering of Select.
|
||||
#[allow(dead_code)]
|
||||
InvertEqual,
|
||||
}
|
||||
|
||||
/// This explains how to interpret the results of an fcmp instruction.
|
||||
enum FcmpCondResult {
|
||||
/// The given condition code must be set.
|
||||
Condition(CC),
|
||||
|
||||
/// Both condition codes must be set.
|
||||
AndConditions(CC, CC),
|
||||
|
||||
/// Either of the conditions codes must be set.
|
||||
OrConditions(CC, CC),
|
||||
|
||||
/// The associated spec was set to `FcmpSpec::InvertEqual` and Equal has been inverted. Either
|
||||
/// of the condition codes must be set, and the user must invert meaning of analyzing the
|
||||
/// condition code results. When the spec is set to `FcmpSpec::Normal`, then this case can't be
|
||||
/// reached.
|
||||
InvertedEqualOrConditions(CC, CC),
|
||||
}
|
||||
|
||||
/// Emits a float comparison instruction.
|
||||
///
|
||||
/// Note: make sure that there are no instructions modifying the flags between a call to this
|
||||
/// function and the use of the flags!
|
||||
fn emit_fcmp<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
insn: IRInst,
|
||||
mut cond_code: FloatCC,
|
||||
spec: FcmpSpec,
|
||||
) -> FcmpCondResult {
|
||||
let (flip_operands, inverted_equal) = match cond_code {
|
||||
FloatCC::LessThan
|
||||
| FloatCC::LessThanOrEqual
|
||||
| FloatCC::UnorderedOrGreaterThan
|
||||
| FloatCC::UnorderedOrGreaterThanOrEqual => {
|
||||
cond_code = cond_code.reverse();
|
||||
(true, false)
|
||||
}
|
||||
FloatCC::Equal => {
|
||||
let inverted_equal = match spec {
|
||||
FcmpSpec::Normal => false,
|
||||
FcmpSpec::InvertEqual => {
|
||||
cond_code = FloatCC::NotEqual; // same as .inverse()
|
||||
true
|
||||
}
|
||||
};
|
||||
(false, inverted_equal)
|
||||
}
|
||||
_ => (false, false),
|
||||
};
|
||||
|
||||
// The only valid CC constructed with `from_floatcc` can be put in the flag
|
||||
// register with a direct float comparison; do this here.
|
||||
let op = match ctx.input_ty(insn, 0) {
|
||||
types::F32 => SseOpcode::Ucomiss,
|
||||
types::F64 => SseOpcode::Ucomisd,
|
||||
_ => panic!("Bad input type to Fcmp"),
|
||||
};
|
||||
|
||||
let inputs = &[InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
|
||||
let (lhs_input, rhs_input) = if flip_operands {
|
||||
(inputs[1], inputs[0])
|
||||
} else {
|
||||
(inputs[0], inputs[1])
|
||||
};
|
||||
let lhs = put_input_in_reg(ctx, lhs_input);
|
||||
let rhs = input_to_reg_mem(ctx, rhs_input);
|
||||
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
|
||||
|
||||
let cond_result = match cond_code {
|
||||
FloatCC::Equal => FcmpCondResult::AndConditions(CC::NP, CC::Z),
|
||||
FloatCC::NotEqual if inverted_equal => {
|
||||
FcmpCondResult::InvertedEqualOrConditions(CC::P, CC::NZ)
|
||||
}
|
||||
FloatCC::NotEqual if !inverted_equal => FcmpCondResult::OrConditions(CC::P, CC::NZ),
|
||||
_ => FcmpCondResult::Condition(CC::from_floatcc(cond_code)),
|
||||
};
|
||||
|
||||
cond_result
|
||||
}
|
||||
|
||||
fn emit_vm_call<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
flags: &Flags,
|
||||
@@ -2878,61 +2784,10 @@ impl LowerBackend for X64Backend {
|
||||
|
||||
if let Some(_icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
|
||||
implemented_in_isle(ctx)
|
||||
} else if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
|
||||
let cond_code = ctx.data(fcmp).fp_cond_code().unwrap();
|
||||
let cond_code = if op0 == Opcode::Brz {
|
||||
cond_code.inverse()
|
||||
} else {
|
||||
cond_code
|
||||
};
|
||||
match emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::Normal) {
|
||||
FcmpCondResult::Condition(cc) => {
|
||||
ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
|
||||
}
|
||||
FcmpCondResult::AndConditions(cc1, cc2) => {
|
||||
ctx.emit(Inst::jmp_if(cc1.invert(), not_taken));
|
||||
ctx.emit(Inst::jmp_cond(cc2.invert(), not_taken, taken));
|
||||
}
|
||||
FcmpCondResult::OrConditions(cc1, cc2) => {
|
||||
ctx.emit(Inst::jmp_if(cc1, taken));
|
||||
ctx.emit(Inst::jmp_cond(cc2, taken, not_taken));
|
||||
}
|
||||
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
|
||||
}
|
||||
} else if let Some(_fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
|
||||
implemented_in_isle(ctx)
|
||||
} else if src_ty == types::I128 {
|
||||
let src = put_input_in_regs(
|
||||
ctx,
|
||||
InsnInput {
|
||||
insn: branches[0],
|
||||
input: 0,
|
||||
},
|
||||
);
|
||||
let (half_cc, comb_op) = match op0 {
|
||||
Opcode::Brz => (CC::Z, AluRmiROpcode::And8),
|
||||
Opcode::Brnz => (CC::NZ, AluRmiROpcode::Or8),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
ctx.emit(Inst::cmp_rmi_r(
|
||||
OperandSize::Size64,
|
||||
RegMemImm::imm(0),
|
||||
src.regs()[0],
|
||||
));
|
||||
ctx.emit(Inst::setcc(half_cc, tmp1));
|
||||
ctx.emit(Inst::cmp_rmi_r(
|
||||
OperandSize::Size64,
|
||||
RegMemImm::imm(0),
|
||||
src.regs()[1],
|
||||
));
|
||||
ctx.emit(Inst::setcc(half_cc, tmp2));
|
||||
ctx.emit(Inst::alu_rmi_r(
|
||||
OperandSize::Size32,
|
||||
comb_op,
|
||||
RegMemImm::reg(tmp1.to_reg()),
|
||||
tmp2,
|
||||
));
|
||||
ctx.emit(Inst::jmp_cond(CC::NZ, taken, not_taken));
|
||||
implemented_in_isle(ctx);
|
||||
} else if is_int_or_ref_ty(src_ty) || is_bool_ty(src_ty) {
|
||||
let src = put_input_in_reg(
|
||||
ctx,
|
||||
@@ -2968,34 +2823,7 @@ impl LowerBackend for X64Backend {
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::BrIcmp | Opcode::Brif => implemented_in_isle(ctx),
|
||||
Opcode::Brff => {
|
||||
let flag_input = InsnInput {
|
||||
insn: branches[0],
|
||||
input: 0,
|
||||
};
|
||||
|
||||
if let Some(ffcmp) = matches_input(ctx, flag_input, Opcode::Ffcmp) {
|
||||
let cond_code = ctx.data(branches[0]).fp_cond_code().unwrap();
|
||||
match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) {
|
||||
FcmpCondResult::Condition(cc) => {
|
||||
ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
|
||||
}
|
||||
FcmpCondResult::AndConditions(cc1, cc2) => {
|
||||
ctx.emit(Inst::jmp_if(cc1.invert(), not_taken));
|
||||
ctx.emit(Inst::jmp_cond(cc2.invert(), not_taken, taken));
|
||||
}
|
||||
FcmpCondResult::OrConditions(cc1, cc2) => {
|
||||
ctx.emit(Inst::jmp_if(cc1, taken));
|
||||
ctx.emit(Inst::jmp_cond(cc2, taken, not_taken));
|
||||
}
|
||||
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
|
||||
}
|
||||
} else {
|
||||
// Should be disallowed by flags checks in verifier.
|
||||
unimplemented!("Brff with input not from ffcmp");
|
||||
}
|
||||
}
|
||||
Opcode::BrIcmp | Opcode::Brif | Opcode::Brff => implemented_in_isle(ctx),
|
||||
|
||||
_ => panic!("unexpected branch opcode: {:?}", op0),
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::ir::LibCall;
|
||||
use crate::isa::x64::lower::emit_vm_call;
|
||||
use crate::{
|
||||
ir::{
|
||||
condcodes::{FloatCC, IntCC},
|
||||
condcodes::{CondCode, FloatCC, IntCC},
|
||||
immediates::*,
|
||||
types::*,
|
||||
Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueList,
|
||||
@@ -590,6 +590,20 @@ where
|
||||
cc.invert()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn cc_nz_or_z(&mut self, cc: &CC) -> Option<CC> {
|
||||
match cc {
|
||||
CC::Z => Some(*cc),
|
||||
CC::NZ => Some(*cc),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn floatcc_inverse(&mut self, cc: &FloatCC) -> FloatCC {
|
||||
cc.inverse()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn sum_extend_fits_in_32_bits(
|
||||
&mut self,
|
||||
|
||||
@@ -553,7 +553,10 @@
|
||||
(type SideEffectNoResult (enum
|
||||
(Inst (inst MInst))
|
||||
(Inst2 (inst1 MInst)
|
||||
(inst2 MInst))))
|
||||
(inst2 MInst))
|
||||
(Inst3 (inst1 MInst)
|
||||
(inst2 MInst)
|
||||
(inst3 MInst))))
|
||||
|
||||
;; Create an empty `InstOutput`, but do emit the given side-effectful
|
||||
;; instruction.
|
||||
@@ -565,10 +568,19 @@
|
||||
(let ((_ Unit (emit inst1))
|
||||
(_ Unit (emit inst2)))
|
||||
(output_none)))
|
||||
(rule (side_effect (SideEffectNoResult.Inst3 inst1 inst2 inst3))
|
||||
(let ((_ Unit (emit inst1))
|
||||
(_ Unit (emit inst2))
|
||||
(_ Unit (emit inst3)))
|
||||
(output_none)))
|
||||
|
||||
(decl side_effect_concat (SideEffectNoResult SideEffectNoResult) SideEffectNoResult)
|
||||
(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst inst2))
|
||||
(SideEffectNoResult.Inst2 inst1 inst2))
|
||||
(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst2 inst2 inst3))
|
||||
(SideEffectNoResult.Inst3 inst1 inst2 inst3))
|
||||
(rule (side_effect_concat (SideEffectNoResult.Inst2 inst1 inst2) (SideEffectNoResult.Inst inst3))
|
||||
(SideEffectNoResult.Inst3 inst1 inst2 inst3))
|
||||
|
||||
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -595,6 +607,7 @@
|
||||
;; ProducesFlags. See `with_flags` below for more.
|
||||
(type ConsumesFlags (enum
|
||||
(ConsumesFlagsSideEffect (inst MInst))
|
||||
(ConsumesFlagsSideEffect2 (inst1 MInst) (inst2 MInst))
|
||||
(ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg))
|
||||
(ConsumesFlagsReturnsReg (inst MInst) (result Reg))
|
||||
(ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst)
|
||||
@@ -630,6 +643,10 @@
|
||||
inst1
|
||||
inst2
|
||||
(value_regs reg1 reg2)))
|
||||
(rule (consumes_flags_concat
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect inst1)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect inst2))
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect2 inst1 inst2))
|
||||
|
||||
;; Combine flags-producing and -consuming instructions together, ensuring that
|
||||
;; they are emitted back-to-back and no other instructions can be emitted
|
||||
@@ -707,11 +724,21 @@
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect c))
|
||||
(SideEffectNoResult.Inst c))
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.AlreadyExistingFlags)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
|
||||
(SideEffectNoResult.Inst2 c1 c2))
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.ProducesFlagsSideEffect p)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect c))
|
||||
(SideEffectNoResult.Inst2 p c))
|
||||
|
||||
(rule (with_flags_side_effect
|
||||
(ProducesFlags.ProducesFlagsSideEffect p)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
|
||||
(SideEffectNoResult.Inst3 p c1 c2))
|
||||
|
||||
;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl trap_code_division_by_zero () TrapCode)
|
||||
|
||||
@@ -126,3 +126,62 @@ block2:
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %f4(f32, f32) -> b1 {
|
||||
block0(v0: f32, v1: f32):
|
||||
v2 = fcmp eq v0, v1
|
||||
brz v2, block1
|
||||
jump block2
|
||||
block1:
|
||||
v3 = bconst.b1 true
|
||||
return v3
|
||||
block2:
|
||||
v4 = bconst.b1 false
|
||||
return v4
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; ucomiss %xmm1, %xmm0
|
||||
; jp label1
|
||||
; jnz label1; j label2
|
||||
; block1:
|
||||
; movl $1, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
; block2:
|
||||
; xorl %eax, %eax, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %f4(f32, f32) -> b1 {
|
||||
block0(v0: f32, v1: f32):
|
||||
v2 = fcmp ne v0, v1
|
||||
brz v2, block1
|
||||
jump block2
|
||||
block1:
|
||||
v3 = bconst.b1 true
|
||||
return v3
|
||||
block2:
|
||||
v4 = bconst.b1 false
|
||||
return v4
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; ucomiss %xmm1, %xmm0
|
||||
; jp label2
|
||||
; jnz label2; j label1
|
||||
; block1:
|
||||
; movl $1, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
; block2:
|
||||
; xorl %eax, %eax, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
@@ -320,7 +320,7 @@ block2:
|
||||
; setz %r11b
|
||||
; cmpq $0, %rsi
|
||||
; setz %al
|
||||
; andb %al, %r11b, %al
|
||||
; testb %r11b, %al
|
||||
; jnz label1; j label2
|
||||
; block1:
|
||||
; movl $1, %eax
|
||||
@@ -351,11 +351,11 @@ block2:
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; cmpq $0, %rdi
|
||||
; setnz %r11b
|
||||
; setz %r11b
|
||||
; cmpq $0, %rsi
|
||||
; setnz %al
|
||||
; orb %al, %r11b, %al
|
||||
; jnz label1; j label2
|
||||
; setz %al
|
||||
; testb %r11b, %al
|
||||
; jz label1; j label2
|
||||
; block1:
|
||||
; movl $1, %eax
|
||||
; movq %rbp, %rsp
|
||||
|
||||
62
cranelift/filetests/filetests/runtests/fcmp.clif
Normal file
62
cranelift/filetests/filetests/runtests/fcmp.clif
Normal file
@@ -0,0 +1,62 @@
|
||||
test run
|
||||
target aarch64
|
||||
target s390x
|
||||
target x86_64
|
||||
|
||||
function %fcmp_eq(f64, f64) -> b1 {
|
||||
block0(v0: f64, v1: f64):
|
||||
v2 = fcmp eq v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; run: %fcmp_eq(0x1.0, 0x1.0) == true
|
||||
; run: %fcmp_eq(0x1.0, 0x0.0) == false
|
||||
|
||||
function %fcmp_ne(f64, f64) -> b1 {
|
||||
block0(v0: f64, v1: f64):
|
||||
v2 = fcmp ne v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; run: %fcmp_ne(0x1.0, 0x1.0) == false
|
||||
; run: %fcmp_ne(0x1.0, 0x0.0) == true
|
||||
|
||||
function %fcmp_lt(f64, f64) -> b1 {
|
||||
block0(v0: f64, v1: f64):
|
||||
v2 = fcmp lt v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; run: %fcmp_lt(0x1.0, 0x1.0) == false
|
||||
; run: %fcmp_lt(0x1.0, 0x0.0) == false
|
||||
; run: %fcmp_lt(0x1.0, 0x2.3) == true
|
||||
|
||||
function %fcmp_le(f64, f64) -> b1 {
|
||||
block0(v0: f64, v1: f64):
|
||||
v2 = fcmp le v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; run: %fcmp_le(0x1.0, 0x1.0) == true
|
||||
; run: %fcmp_le(0x1.0, 0x0.0) == false
|
||||
; run: %fcmp_le(0x1.0, 0x2.3) == true
|
||||
|
||||
function %fcmp_gt(f64, f64) -> b1 {
|
||||
block0(v0: f64, v1: f64):
|
||||
v2 = fcmp gt v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; run: %fcmp_gt(0x1.0, 0x1.0) == false
|
||||
; run: %fcmp_gt(0x1.0, 0x0.0) == true
|
||||
; run: %fcmp_gt(0x1.0, 0x2.3) == false
|
||||
|
||||
function %fcmp_ge(f64, f64) -> b1 {
|
||||
block0(v0: f64, v1: f64):
|
||||
v2 = fcmp ge v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; run: %fcmp_ge(0x1.0, 0x1.0) == true
|
||||
; run: %fcmp_ge(0x1.0, 0x0.0) == true
|
||||
; run: %fcmp_ge(0x1.0, 0x2.3) == false
|
||||
Reference in New Issue
Block a user