Add support for 32 bit and 64 bit fcmp for the new backend
Implements commiss and commisd.
This commit is contained in:
@@ -5,7 +5,7 @@ use std::string::{String, ToString};
|
|||||||
|
|
||||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper};
|
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper};
|
||||||
|
|
||||||
use crate::ir::condcodes::IntCC;
|
use crate::ir::condcodes::{FloatCC, IntCC};
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
@@ -636,6 +636,12 @@ pub enum CC {
|
|||||||
LE = 14,
|
LE = 14,
|
||||||
/// > signed
|
/// > signed
|
||||||
NLE = 15,
|
NLE = 15,
|
||||||
|
|
||||||
|
/// parity
|
||||||
|
P = 10,
|
||||||
|
|
||||||
|
/// not parity
|
||||||
|
NP = 11,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CC {
|
impl CC {
|
||||||
@@ -678,6 +684,33 @@ impl CC {
|
|||||||
|
|
||||||
CC::LE => CC::NLE,
|
CC::LE => CC::NLE,
|
||||||
CC::NLE => CC::LE,
|
CC::NLE => CC::LE,
|
||||||
|
|
||||||
|
CC::P => CC::NP,
|
||||||
|
CC::NP => CC::P,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn from_floatcc(floatcc: FloatCC) -> Self {
|
||||||
|
match floatcc {
|
||||||
|
FloatCC::Ordered => CC::NP,
|
||||||
|
FloatCC::Unordered => CC::P,
|
||||||
|
// Alias for NE
|
||||||
|
FloatCC::NotEqual | FloatCC::OrderedNotEqual => CC::NZ,
|
||||||
|
// Alias for E
|
||||||
|
FloatCC::UnorderedOrEqual => CC::Z,
|
||||||
|
// Alias for A
|
||||||
|
FloatCC::GreaterThan => CC::NBE,
|
||||||
|
// Alias for AE
|
||||||
|
FloatCC::GreaterThanOrEqual => CC::NB,
|
||||||
|
FloatCC::UnorderedOrLessThan => CC::B,
|
||||||
|
FloatCC::UnorderedOrLessThanOrEqual => CC::BE,
|
||||||
|
FloatCC::Equal
|
||||||
|
| FloatCC::LessThan
|
||||||
|
| FloatCC::LessThanOrEqual
|
||||||
|
| FloatCC::UnorderedOrGreaterThan
|
||||||
|
| FloatCC::UnorderedOrGreaterThanOrEqual => unimplemented!(
|
||||||
|
"No single condition code to guarantee ordered. Treat as special case."
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -703,6 +736,8 @@ impl fmt::Debug for CC {
|
|||||||
CC::NL => "nl",
|
CC::NL => "nl",
|
||||||
CC::LE => "le",
|
CC::LE => "le",
|
||||||
CC::NLE => "nle",
|
CC::NLE => "nle",
|
||||||
|
CC::P => "p",
|
||||||
|
CC::NP => "np",
|
||||||
};
|
};
|
||||||
write!(fmt, "{}", name)
|
write!(fmt, "{}", name)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1603,6 +1603,25 @@ pub(crate) fn emit(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XMM_Cmp_RM_R { op, src, dst } => {
|
||||||
|
let rex = RexFlags::clear_w();
|
||||||
|
let (prefix, opcode) = match op {
|
||||||
|
SseOpcode::Ucomisd => (LegacyPrefix::_66, 0x0F2E),
|
||||||
|
SseOpcode::Ucomiss => (LegacyPrefix::None, 0x0F2E),
|
||||||
|
_ => unimplemented!("Emit xmm cmp rm r"),
|
||||||
|
};
|
||||||
|
|
||||||
|
match src {
|
||||||
|
RegMem::Reg { reg } => {
|
||||||
|
emit_std_reg_reg(sink, prefix, opcode, 2, *dst, *reg, rex);
|
||||||
|
}
|
||||||
|
RegMem::Mem { addr } => {
|
||||||
|
let addr = &addr.finalize(state);
|
||||||
|
emit_std_reg_mem(sink, prefix, opcode, 2, *dst, addr, rex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Inst::LoadExtName {
|
Inst::LoadExtName {
|
||||||
dst,
|
dst,
|
||||||
name,
|
name,
|
||||||
|
|||||||
@@ -2711,7 +2711,8 @@ fn test_x64_emit() {
|
|||||||
insns.push((Inst::setcc(CC::NLE, w_rsi), "400F9FC6", "setnle %sil"));
|
insns.push((Inst::setcc(CC::NLE, w_rsi), "400F9FC6", "setnle %sil"));
|
||||||
insns.push((Inst::setcc(CC::Z, w_r14), "410F94C6", "setz %r14b"));
|
insns.push((Inst::setcc(CC::Z, w_r14), "410F94C6", "setz %r14b"));
|
||||||
insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle %r14b"));
|
insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle %r14b"));
|
||||||
|
insns.push((Inst::setcc(CC::P, w_r9), "410F9AC1", "setp %r9b"));
|
||||||
|
insns.push((Inst::setcc(CC::NP, w_r8), "410F9BC0", "setnp %r8b"));
|
||||||
// ========================================================
|
// ========================================================
|
||||||
// Cmove
|
// Cmove
|
||||||
insns.push((
|
insns.push((
|
||||||
@@ -2876,6 +2877,33 @@ fn test_x64_emit() {
|
|||||||
"jmp *321(%r10,%rdx,4)",
|
"jmp *321(%r10,%rdx,4)",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
// ========================================================
|
||||||
|
// XMM_CMP_RM_R
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm1), xmm2),
|
||||||
|
"0F2ED1",
|
||||||
|
"ucomiss %xmm1, %xmm2",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm0), xmm9),
|
||||||
|
"440F2EC8",
|
||||||
|
"ucomiss %xmm0, %xmm9",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm13), xmm4),
|
||||||
|
"66410F2EE5",
|
||||||
|
"ucomisd %xmm13, %xmm4",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm11), xmm12),
|
||||||
|
"66450F2EE3",
|
||||||
|
"ucomisd %xmm11, %xmm12",
|
||||||
|
));
|
||||||
|
|
||||||
// ========================================================
|
// ========================================================
|
||||||
// XMM_RM_R: float binary ops
|
// XMM_RM_R: float binary ops
|
||||||
|
|
||||||
|
|||||||
@@ -244,6 +244,13 @@ pub enum Inst {
|
|||||||
dst: Writable<Reg>,
|
dst: Writable<Reg>,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Float comparisons/tests: cmp (b w l q) (reg addr imm) reg.
|
||||||
|
XMM_Cmp_RM_R {
|
||||||
|
op: SseOpcode,
|
||||||
|
src: RegMem,
|
||||||
|
dst: Reg,
|
||||||
|
},
|
||||||
|
|
||||||
// =====================================
|
// =====================================
|
||||||
// Control flow instructions.
|
// Control flow instructions.
|
||||||
/// Direct call: call simm32.
|
/// Direct call: call simm32.
|
||||||
@@ -480,6 +487,12 @@ impl Inst {
|
|||||||
Inst::GprToXmm { op, src, dst }
|
Inst::GprToXmm { op, src, dst }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src: RegMem, dst: Reg) -> Inst {
|
||||||
|
//TODO:: Add assert_reg_type helper
|
||||||
|
debug_assert!(dst.get_class() == RegClass::V128);
|
||||||
|
Inst::XMM_Cmp_RM_R { op, src, dst }
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn movzx_rm_r(
|
pub(crate) fn movzx_rm_r(
|
||||||
ext_mode: ExtMode,
|
ext_mode: ExtMode,
|
||||||
src: RegMem,
|
src: RegMem,
|
||||||
@@ -859,6 +872,12 @@ impl ShowWithRRU for Inst {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XMM_Cmp_RM_R { op, src, dst } => format!(
|
||||||
|
"{} {}, {}",
|
||||||
|
ljustify(op.to_string()),
|
||||||
|
src.show_rru_sized(mb_rru, 8),
|
||||||
|
show_ireg_sized(*dst, mb_rru, 8),
|
||||||
|
),
|
||||||
Inst::Imm_R {
|
Inst::Imm_R {
|
||||||
dst_is_64,
|
dst_is_64,
|
||||||
simm64,
|
simm64,
|
||||||
@@ -1117,6 +1136,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_use(*src);
|
collector.add_use(*src);
|
||||||
dst.get_regs_as_uses(collector);
|
dst.get_regs_as_uses(collector);
|
||||||
}
|
}
|
||||||
|
Inst::XMM_Cmp_RM_R { src, dst, .. } => {
|
||||||
|
src.get_regs_as_uses(collector);
|
||||||
|
collector.add_use(*dst);
|
||||||
|
}
|
||||||
Inst::Imm_R { dst, .. } => {
|
Inst::Imm_R { dst, .. } => {
|
||||||
collector.add_def(*dst);
|
collector.add_def(*dst);
|
||||||
}
|
}
|
||||||
@@ -1332,6 +1355,14 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_use(mapper, src);
|
map_use(mapper, src);
|
||||||
dst.map_uses(mapper);
|
dst.map_uses(mapper);
|
||||||
}
|
}
|
||||||
|
Inst::XMM_Cmp_RM_R {
|
||||||
|
ref mut src,
|
||||||
|
ref mut dst,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
src.map_uses(mapper);
|
||||||
|
map_use(mapper, dst);
|
||||||
|
}
|
||||||
Inst::Imm_R { ref mut dst, .. } => map_def(mapper, dst),
|
Inst::Imm_R { ref mut dst, .. } => map_def(mapper, dst),
|
||||||
Inst::Mov_R_R {
|
Inst::Mov_R_R {
|
||||||
ref mut src,
|
ref mut src,
|
||||||
|
|||||||
@@ -6,14 +6,14 @@ use log::trace;
|
|||||||
use regalloc::{Reg, RegClass, Writable};
|
use regalloc::{Reg, RegClass, Writable};
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
use alloc::boxed::Box;
|
|
||||||
use alloc::vec::Vec;
|
|
||||||
use std::convert::TryFrom;
|
|
||||||
|
|
||||||
use crate::ir::types;
|
use crate::ir::types;
|
||||||
use crate::ir::types::*;
|
use crate::ir::types::*;
|
||||||
use crate::ir::Inst as IRInst;
|
use crate::ir::Inst as IRInst;
|
||||||
use crate::ir::{condcodes::IntCC, InstructionData, Opcode, TrapCode, Type};
|
use crate::ir::{condcodes::FloatCC, condcodes::IntCC, InstructionData, Opcode, TrapCode, Type};
|
||||||
|
use alloc::boxed::Box;
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
use cranelift_codegen_shared::condcodes::CondCode;
|
||||||
|
use std::convert::TryFrom;
|
||||||
|
|
||||||
use crate::machinst::lower::*;
|
use crate::machinst::lower::*;
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
@@ -95,6 +95,16 @@ fn inst_condcode(data: &InstructionData) -> IntCC {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn inst_fp_condcode(data: &InstructionData) -> Option<FloatCC> {
|
||||||
|
match data {
|
||||||
|
&InstructionData::BranchFloat { cond, .. }
|
||||||
|
| &InstructionData::FloatCompare { cond, .. }
|
||||||
|
| &InstructionData::FloatCond { cond, .. }
|
||||||
|
| &InstructionData::FloatCondTrap { cond, .. } => Some(cond),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn ldst_offset(data: &InstructionData) -> Option<i32> {
|
fn ldst_offset(data: &InstructionData) -> Option<i32> {
|
||||||
match data {
|
match data {
|
||||||
&InstructionData::Load { offset, .. }
|
&InstructionData::Load { offset, .. }
|
||||||
@@ -734,6 +744,77 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(Inst::setcc(cc, dst));
|
ctx.emit(Inst::setcc(cc, dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::Fcmp => {
|
||||||
|
let condcode = inst_fp_condcode(ctx.data(insn)).unwrap();
|
||||||
|
let input_ty = ctx.input_ty(insn, 0);
|
||||||
|
let op = match input_ty {
|
||||||
|
F32 => SseOpcode::Ucomiss,
|
||||||
|
F64 => SseOpcode::Ucomisd,
|
||||||
|
_ => panic!("Bad input type to Fcmp"),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Unordered is returned by setting ZF, PF, CF <- 111
|
||||||
|
// Greater than by ZF, PF, CF <- 000
|
||||||
|
// Less than by ZF, PF, CF <- 001
|
||||||
|
// Equal by ZF, PF, CF <- 100
|
||||||
|
//
|
||||||
|
// Checking the result of comiss is somewhat annoying because you don't
|
||||||
|
// have setcc instructions that explicitly check simultaneously for the condition
|
||||||
|
// (i.e. eq, le, gt, etc) and orderedness. So that might mean we need more
|
||||||
|
// than one setcc check and then a logical "and" or "or" to determine both.
|
||||||
|
// However knowing that if the parity bit is set, then the result was
|
||||||
|
// considered unordered and knowing that if the parity bit is set, then both
|
||||||
|
// the ZF and CF flag bits must also be set we can getaway with using one setcc
|
||||||
|
// for most condition codes.
|
||||||
|
match condcode {
|
||||||
|
// setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1 which
|
||||||
|
// doesn't exclude unorderdness. To get around this we can reverse the operands
|
||||||
|
// and the cc test to instead check if CF and ZF are 0 which would also excludes
|
||||||
|
// unorderedness. Using similiar logic we also reverse UnorderedOrGreaterThan and
|
||||||
|
// UnorderedOrGreaterThanOrEqual and assure that ZF or CF is 1 to exclude orderedness.
|
||||||
|
FloatCC::LessThan
|
||||||
|
| FloatCC::LessThanOrEqual
|
||||||
|
| FloatCC::UnorderedOrGreaterThan
|
||||||
|
| FloatCC::UnorderedOrGreaterThanOrEqual => {
|
||||||
|
let lhs = input_to_reg_mem(ctx, inputs[0]);
|
||||||
|
let rhs = input_to_reg(ctx, inputs[1]);
|
||||||
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
|
ctx.emit(Inst::xmm_cmp_rm_r(op, lhs, rhs));
|
||||||
|
let condcode = condcode.reverse();
|
||||||
|
let cc = CC::from_floatcc(condcode);
|
||||||
|
ctx.emit(Inst::setcc(cc, dst));
|
||||||
|
}
|
||||||
|
// Outlier case where we cannot get around checking the parity bit to determine
|
||||||
|
// if the result was ordered.
|
||||||
|
FloatCC::Equal => {
|
||||||
|
let lhs = input_to_reg(ctx, inputs[0]);
|
||||||
|
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||||
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
|
let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
|
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
|
||||||
|
ctx.emit(Inst::setcc(CC::NP, tmp_gpr1));
|
||||||
|
ctx.emit(Inst::setcc(CC::Z, dst));
|
||||||
|
ctx.emit(Inst::alu_rmi_r(
|
||||||
|
false,
|
||||||
|
AluRmiROpcode::And,
|
||||||
|
RegMemImm::reg(tmp_gpr1.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
// For all remaining condition codes we can handle things with one check. Condition
|
||||||
|
// ordered NotEqual for example does not need a separate check for the parity bit because
|
||||||
|
// the setnz checks that the zero flag is 0 which is impossible with an unordered result.
|
||||||
|
_ => {
|
||||||
|
let lhs = input_to_reg(ctx, inputs[0]);
|
||||||
|
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||||
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
|
let cc = CC::from_floatcc(condcode);
|
||||||
|
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
|
||||||
|
ctx.emit(Inst::setcc(cc, dst));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::FallthroughReturn | Opcode::Return => {
|
Opcode::FallthroughReturn | Opcode::Return => {
|
||||||
for i in 0..ctx.num_inputs(insn) {
|
for i in 0..ctx.num_inputs(insn) {
|
||||||
let src_reg = input_to_reg(ctx, inputs[i]);
|
let src_reg = input_to_reg(ctx, inputs[i]);
|
||||||
|
|||||||
Reference in New Issue
Block a user