machinst x64: implement float min/max with the right semantics;

This commit is contained in:
Benjamin Bouvier
2020-07-21 14:27:44 +02:00
parent e43310a088
commit 03b9e1e86a
4 changed files with 168 additions and 0 deletions

View File

@@ -337,6 +337,7 @@ pub enum SseOpcode {
Addss, Addss,
Addsd, Addsd,
Andps, Andps,
Andpd,
Andnps, Andnps,
Comiss, Comiss,
Comisd, Comisd,
@@ -365,6 +366,7 @@ pub enum SseOpcode {
Mulss, Mulss,
Mulsd, Mulsd,
Orps, Orps,
Orpd,
Rcpss, Rcpss,
Roundss, Roundss,
Roundsd, Roundsd,
@@ -404,6 +406,7 @@ impl SseOpcode {
| SseOpcode::Cmpss => SSE, | SseOpcode::Cmpss => SSE,
SseOpcode::Addsd SseOpcode::Addsd
| SseOpcode::Andpd
| SseOpcode::Cvtsd2ss | SseOpcode::Cvtsd2ss
| SseOpcode::Cvtsd2si | SseOpcode::Cvtsd2si
| SseOpcode::Cvtsi2sd | SseOpcode::Cvtsi2sd
@@ -416,6 +419,7 @@ impl SseOpcode {
| SseOpcode::Movq | SseOpcode::Movq
| SseOpcode::Movsd | SseOpcode::Movsd
| SseOpcode::Mulsd | SseOpcode::Mulsd
| SseOpcode::Orpd
| SseOpcode::Sqrtsd | SseOpcode::Sqrtsd
| SseOpcode::Subsd | SseOpcode::Subsd
| SseOpcode::Ucomisd | SseOpcode::Ucomisd
@@ -440,6 +444,7 @@ impl fmt::Debug for SseOpcode {
let name = match self { let name = match self {
SseOpcode::Addss => "addss", SseOpcode::Addss => "addss",
SseOpcode::Addsd => "addsd", SseOpcode::Addsd => "addsd",
SseOpcode::Andpd => "andpd",
SseOpcode::Andps => "andps", SseOpcode::Andps => "andps",
SseOpcode::Andnps => "andnps", SseOpcode::Andnps => "andnps",
SseOpcode::Comiss => "comiss", SseOpcode::Comiss => "comiss",
@@ -465,6 +470,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Movsd => "movsd", SseOpcode::Movsd => "movsd",
SseOpcode::Mulss => "mulss", SseOpcode::Mulss => "mulss",
SseOpcode::Mulsd => "mulsd", SseOpcode::Mulsd => "mulsd",
SseOpcode::Orpd => "orpd",
SseOpcode::Orps => "orps", SseOpcode::Orps => "orps",
SseOpcode::Rcpss => "rcpss", SseOpcode::Rcpss => "rcpss",
SseOpcode::Roundss => "roundss", SseOpcode::Roundss => "roundss",

View File

@@ -1530,10 +1530,12 @@ pub(crate) fn emit(
let (prefix, opcode) = match op { let (prefix, opcode) = match op {
SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58), SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58),
SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58), SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58),
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54),
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54), SseOpcode::Andps => (LegacyPrefix::None, 0x0F54),
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55), SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55),
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59), SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59),
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59), SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59),
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56),
SseOpcode::Orps => (LegacyPrefix::None, 0x0F56), SseOpcode::Orps => (LegacyPrefix::None, 0x0F56),
SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C), SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C),
SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C), SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C),
@@ -1557,6 +1559,92 @@ pub(crate) fn emit(
} }
} }
Inst::XmmMinMaxSeq {
size,
is_min,
lhs,
rhs_dst,
} => {
// Generates the following sequence:
// cmpss/cmpsd %lhs, %rhs_dst
// jnz do_min_max
// jp propagate_nan
//
// ;; ordered and equal: propagate the sign bit (for -0 vs 0):
// {and,or}{ss,sd} %lhs, %rhs_dst
// j done
//
// ;; to get the desired NaN behavior (signalling NaN transformed into a quiet NaN, the
// NaN value is returned), we add both inputs.
// propagate_nan:
// add{ss,sd} %lhs, %rhs_dst
// j done
//
// do_min_max:
// min{ss,sd} %lhs, %rhs_dst
//
// done:
let done = sink.get_label();
let propagate_nan = sink.get_label();
let do_min_max = sink.get_label();
let (add_op, cmp_op, and_op, or_op, min_max_op) = match size {
OperandSize::Size32 => (
SseOpcode::Addss,
SseOpcode::Ucomiss,
SseOpcode::Andps,
SseOpcode::Orps,
if *is_min {
SseOpcode::Minss
} else {
SseOpcode::Maxss
},
),
OperandSize::Size64 => (
SseOpcode::Addsd,
SseOpcode::Ucomisd,
SseOpcode::Andpd,
SseOpcode::Orpd,
if *is_min {
SseOpcode::Minsd
} else {
SseOpcode::Maxsd
},
),
};
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(*lhs), rhs_dst.to_reg());
inst.emit(sink, flags, state);
one_way_jmp(sink, CC::NZ, do_min_max);
one_way_jmp(sink, CC::P, propagate_nan);
// Ordered and equal. The operands are bit-identical unless they are zero
// and negative zero. These instructions merge the sign bits in that
// case, and are no-ops otherwise.
let op = if *is_min { or_op } else { and_op };
let inst = Inst::xmm_rm_r(op, RegMem::reg(*lhs), *rhs_dst);
inst.emit(sink, flags, state);
let inst = Inst::jmp_known(BranchTarget::Label(done));
inst.emit(sink, flags, state);
// x86's min/max are not symmetric; if either operand is a NaN, they return the
// read-only operand: perform an addition between the two operands, which has the
// desired NaN propagation effects.
sink.bind_label(propagate_nan);
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(*lhs), *rhs_dst);
inst.emit(sink, flags, state);
one_way_jmp(sink, CC::P, done);
sink.bind_label(do_min_max);
let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(*lhs), *rhs_dst);
inst.emit(sink, flags, state);
sink.bind_label(done);
}
Inst::Xmm_Mov_R_M { Inst::Xmm_Mov_R_M {
op, op,
src, src,

View File

@@ -278,6 +278,14 @@ pub enum Inst {
srcloc: SourceLoc, srcloc: SourceLoc,
}, },
/// A sequence to compute min/max with the proper NaN semantics for xmm registers.
XmmMinMaxSeq {
size: OperandSize,
is_min: bool,
lhs: Reg,
rhs_dst: Writable<Reg>,
},
/// XMM (scalar) conditional move. /// XMM (scalar) conditional move.
/// Overwrites the destination register if cc is set. /// Overwrites the destination register if cc is set.
XmmCmove { XmmCmove {
@@ -629,6 +637,22 @@ impl Inst {
} }
} }
pub(crate) fn xmm_min_max_seq(
size: OperandSize,
is_min: bool,
lhs: Reg,
rhs_dst: Writable<Reg>,
) -> Inst {
debug_assert_eq!(lhs.get_class(), RegClass::V128);
debug_assert_eq!(rhs_dst.to_reg().get_class(), RegClass::V128);
Inst::XmmMinMaxSeq {
size,
is_min,
lhs,
rhs_dst,
}
}
pub(crate) fn movzx_rm_r( pub(crate) fn movzx_rm_r(
ext_mode: ExtMode, ext_mode: ExtMode,
src: RegMem, src: RegMem,
@@ -980,6 +1004,29 @@ impl ShowWithRRU for Inst {
show_ireg_sized(dst.to_reg(), mb_rru, 8), show_ireg_sized(dst.to_reg(), mb_rru, 8),
), ),
Inst::XmmMinMaxSeq {
lhs,
rhs_dst,
is_min,
size,
} => format!(
"{} {}, {}",
ljustify2(
if *is_min {
"xmm min seq ".to_string()
} else {
"xmm max seq ".to_string()
},
match size {
OperandSize::Size32 => "f32",
OperandSize::Size64 => "f64",
}
.into()
),
show_ireg_sized(*lhs, mb_rru, 8),
show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8),
),
Inst::XmmToGpr { Inst::XmmToGpr {
op, op,
src, src,
@@ -1333,6 +1380,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
src.get_regs_as_uses(collector); src.get_regs_as_uses(collector);
collector.add_mod(*dst); collector.add_mod(*dst);
} }
Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
collector.add_use(*lhs);
collector.add_mod(*rhs_dst);
}
Inst::Xmm_Mov_R_M { src, dst, .. } => { Inst::Xmm_Mov_R_M { src, dst, .. } => {
collector.add_use(*src); collector.add_use(*src);
dst.get_regs_as_uses(collector); dst.get_regs_as_uses(collector);
@@ -1579,6 +1630,14 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
src.map_uses(mapper); src.map_uses(mapper);
map_mod(mapper, dst); map_mod(mapper, dst);
} }
Inst::XmmMinMaxSeq {
ref mut lhs,
ref mut rhs_dst,
..
} => {
map_use(mapper, lhs);
map_mod(mapper, rhs_dst);
}
Inst::Xmm_Mov_R_M { Inst::Xmm_Mov_R_M {
ref mut src, ref mut src,
ref mut dst, ref mut dst,

View File

@@ -924,6 +924,21 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst)); ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst));
} }
Opcode::Fmin | Opcode::Fmax => {
let lhs = input_to_reg(ctx, inputs[0]);
let rhs = input_to_reg(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
let is_min = op == Opcode::Fmin;
let output_ty = ty.unwrap();
ctx.emit(Inst::gen_move(dst, rhs, output_ty));
let op_size = match output_ty {
F32 => OperandSize::Size32,
F64 => OperandSize::Size64,
_ => panic!("unexpected type {:?} for fmin/fmax", output_ty),
};
ctx.emit(Inst::xmm_min_max_seq(op_size, is_min, lhs, dst));
}
Opcode::Sqrt => { Opcode::Sqrt => {
let src = input_to_reg_mem(ctx, inputs[0]); let src = input_to_reg_mem(ctx, inputs[0]);
let dst = output_to_reg(ctx, outputs[0]); let dst = output_to_reg(ctx, outputs[0]);