x64: port select to ISLE (#3682)

* x64: port `select` using an FP comparison to ISLE

This change includes quite a few interlocking parts, required mainly by
the current x64 conventions in ISLE:
 - it adds a way to emit a `cmove` with multiple OR-ing conditions;
   because x64 ISLE cannot currently safely emit a comparison followed
   by several jumps, this adds `MachInst::CmoveOr` and
   `MachInst::XmmCmoveOr` macro instructions. Unfortunately, these macro
   instructions hide the multi-instruction sequence in `lower.isle`
 - to properly keep track of what instructions consume and produce
   flags, @cfallin added a way to pass around variants of
   `ConsumesFlags` and `ProducesFlags`--these changes affect all
   backends
 - then, to lower the `fcmp + select` CLIF, this change adds several
   `cmove*_from_values` helpers that perform all of the awkward
   conversions between `Value`, `ValueReg`, `Reg`, and `Gpr/Xmm`; one
   upside is that now these lowerings have much-improved documentation
   explaining why the various `FloatCC` and `CC` choices are made the
   the way they are.

Co-authored-by: Chris Fallin <chris@cfallin.org>
This commit is contained in:
Andrew Brown
2022-02-23 10:03:16 -08:00
committed by GitHub
parent 5a5e401a9c
commit f87c61176a
20 changed files with 3163 additions and 2272 deletions

View File

@@ -1064,9 +1064,9 @@ pub(crate) fn emit(
cc,
consequent,
alternative,
dst: reg_g,
dst,
} => {
debug_assert_eq!(*alternative, reg_g.to_reg());
debug_assert_eq!(*alternative, dst.to_reg());
let rex_flags = RexFlags::from(*size);
let prefix = match size {
OperandSize::Size16 => LegacyPrefixes::_66,
@@ -1076,14 +1076,14 @@ pub(crate) fn emit(
};
let opcode = 0x0F40 + cc.get_enc() as u32;
match consequent.clone().to_reg_mem() {
RegMem::Reg { reg: reg_e } => {
RegMem::Reg { reg } => {
emit_std_reg_reg(
sink,
prefix,
opcode,
2,
reg_g.to_reg().to_reg(),
reg_e,
dst.to_reg().to_reg(),
reg,
rex_flags,
);
}
@@ -1096,7 +1096,7 @@ pub(crate) fn emit(
prefix,
opcode,
2,
reg_g.to_reg().to_reg(),
dst.to_reg().to_reg(),
addr,
rex_flags,
);
@@ -1104,7 +1104,42 @@ pub(crate) fn emit(
}
}
Inst::XmmCmove { size, cc, src, dst } => {
Inst::CmoveOr {
size,
cc1,
cc2,
consequent,
alternative,
dst,
} => {
let first_cmove = Inst::Cmove {
cc: *cc1,
size: *size,
consequent: consequent.clone(),
alternative: alternative.clone(),
dst: dst.clone(),
};
first_cmove.emit(sink, info, state);
let second_cmove = Inst::Cmove {
cc: *cc2,
size: *size,
consequent: consequent.clone(),
alternative: alternative.clone(),
dst: dst.clone(),
};
second_cmove.emit(sink, info, state);
}
Inst::XmmCmove {
size,
cc,
consequent,
alternative,
dst,
} => {
debug_assert_eq!(*alternative, dst.to_reg());
// Lowering of the Select IR opcode when the input is an fcmp relies on the fact that
// this doesn't clobber flags. Make sure to not do so here.
let next = sink.get_label();
@@ -1117,12 +1152,46 @@ pub(crate) fn emit(
} else {
SseOpcode::Movss
};
let inst = Inst::xmm_unary_rm_r(op, src.clone().to_reg_mem(), dst.to_writable_reg());
let inst =
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
inst.emit(sink, info, state);
sink.bind_label(next);
}
Inst::XmmCmoveOr {
size,
cc1,
cc2,
consequent,
alternative,
dst,
} => {
debug_assert_eq!(*alternative, dst.to_reg());
let op = if *size == OperandSize::Size64 {
SseOpcode::Movsd
} else {
SseOpcode::Movss
};
let second_test = sink.get_label();
let next_instruction = sink.get_label();
// Jump to second test if `cc1` is *not* set.
one_way_jmp(sink, cc1.invert(), next_instruction);
let inst =
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
inst.emit(sink, info, state);
sink.bind_label(second_test);
// Jump to next instruction if `cc2` is *not* set.
one_way_jmp(sink, cc2.invert(), next_instruction);
let inst =
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
inst.emit(sink, info, state);
sink.bind_label(next_instruction);
}
Inst::Push64 { src } => {
if info.flags.enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);

View File

@@ -52,6 +52,7 @@ impl Inst {
| Inst::CallUnknown { .. }
| Inst::CheckedDivOrRemSeq { .. }
| Inst::Cmove { .. }
| Inst::CmoveOr { .. }
| Inst::CmpRmiR { .. }
| Inst::CvtFloatToSintSeq { .. }
| Inst::CvtFloatToUintSeq { .. }
@@ -88,6 +89,7 @@ impl Inst {
| Inst::Ud2 { .. }
| Inst::VirtualSPOffsetAdj { .. }
| Inst::XmmCmove { .. }
| Inst::XmmCmoveOr { .. }
| Inst::XmmCmpRmR { .. }
| Inst::XmmLoadConst { .. }
| Inst::XmmMinMaxSeq { .. }
@@ -629,7 +631,13 @@ impl Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
let src = XmmMem::new(src).unwrap();
let dst = WritableXmm::from_writable_reg(dst).unwrap();
Inst::XmmCmove { size, cc, src, dst }
Inst::XmmCmove {
size,
cc,
consequent: src,
alternative: dst.to_reg(),
dst,
}
}
pub(crate) fn push64(src: RegMemImm) -> Inst {
@@ -898,6 +906,12 @@ impl Inst {
alternative,
dst,
..
}
| Inst::CmoveOr {
size,
alternative,
dst,
..
} => {
if *alternative != dst.to_reg() {
debug_assert!(alternative.is_virtual());
@@ -910,6 +924,23 @@ impl Inst {
}
insts.push(self);
}
Inst::XmmCmove {
alternative, dst, ..
}
| Inst::XmmCmoveOr {
alternative, dst, ..
} => {
if *alternative != dst.to_reg() {
debug_assert!(alternative.is_virtual());
insts.push(Self::gen_move(
dst.to_writable_reg(),
alternative.to_reg(),
types::F32X4,
));
*alternative = dst.to_reg();
}
insts.push(self);
}
Inst::Not { src, dst, .. } | Inst::Neg { src, dst, .. } => {
if *src != dst.to_reg() {
debug_assert!(src.is_virtual());
@@ -1588,7 +1619,34 @@ impl PrettyPrint for Inst {
show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
),
Inst::XmmCmove { size, cc, src, dst } => {
Inst::CmoveOr {
size,
cc1,
cc2,
consequent: src,
alternative: _,
dst,
} => {
let src = src.show_rru_sized(mb_rru, size.to_bytes());
let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes());
format!(
"{} {}, {}; {} {}, {}",
ljustify(format!("cmov{}{}", cc1.to_string(), suffix_bwlq(*size))),
src,
dst,
ljustify(format!("cmov{}{}", cc2.to_string(), suffix_bwlq(*size))),
src,
dst,
)
}
Inst::XmmCmove {
size,
cc,
consequent: src,
dst,
..
} => {
format!(
"j{} $next; mov{} {}, {}; $next: ",
cc.invert().to_string(),
@@ -1602,6 +1660,34 @@ impl PrettyPrint for Inst {
)
}
Inst::XmmCmoveOr {
size,
cc1,
cc2,
consequent: src,
dst,
..
} => {
let suffix = if *size == OperandSize::Size64 {
"sd"
} else {
"ss"
};
let src = src.show_rru_sized(mb_rru, size.to_bytes());
let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes());
format!(
"j{} $check; mov{} {}, {}; $check: j{} $next; mov{} {}, {}; $next",
cc1.invert().to_string(),
suffix,
src,
dst,
cc2.invert().to_string(),
suffix,
src,
dst,
)
}
Inst::Push64 { src } => {
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
}
@@ -2000,11 +2086,25 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
consequent: src,
dst,
..
}
| Inst::CmoveOr {
consequent: src,
dst,
..
} => {
src.get_regs_as_uses(collector);
collector.add_mod(dst.to_writable_reg());
}
Inst::XmmCmove { src, dst, .. } => {
Inst::XmmCmove {
consequent: src,
dst,
..
}
| Inst::XmmCmoveOr {
consequent: src,
dst,
..
} => {
src.get_regs_as_uses(collector);
collector.add_mod(dst.to_writable_reg());
}
@@ -2454,18 +2554,32 @@ pub(crate) fn x64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
ref mut dst,
ref mut alternative,
..
}
| Inst::CmoveOr {
consequent: ref mut src,
ref mut dst,
ref mut alternative,
..
} => {
src.map_uses(mapper);
dst.map_mod(mapper);
*alternative = dst.to_reg();
}
Inst::XmmCmove {
ref mut src,
consequent: ref mut src,
ref mut dst,
ref mut alternative,
..
}
| Inst::XmmCmoveOr {
consequent: ref mut src,
ref mut dst,
ref mut alternative,
..
} => {
src.map_uses(mapper);
dst.map_mod(mapper);
*alternative = dst.to_reg();
}
Inst::Push64 { ref mut src } => src.map_uses(mapper),
Inst::Pop64 { ref mut dst } => {