machinst x64: implement float min/max with the right semantics;
This commit is contained in:
@@ -337,6 +337,7 @@ pub enum SseOpcode {
|
|||||||
Addss,
|
Addss,
|
||||||
Addsd,
|
Addsd,
|
||||||
Andps,
|
Andps,
|
||||||
|
Andpd,
|
||||||
Andnps,
|
Andnps,
|
||||||
Comiss,
|
Comiss,
|
||||||
Comisd,
|
Comisd,
|
||||||
@@ -365,6 +366,7 @@ pub enum SseOpcode {
|
|||||||
Mulss,
|
Mulss,
|
||||||
Mulsd,
|
Mulsd,
|
||||||
Orps,
|
Orps,
|
||||||
|
Orpd,
|
||||||
Rcpss,
|
Rcpss,
|
||||||
Roundss,
|
Roundss,
|
||||||
Roundsd,
|
Roundsd,
|
||||||
@@ -404,6 +406,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Cmpss => SSE,
|
| SseOpcode::Cmpss => SSE,
|
||||||
|
|
||||||
SseOpcode::Addsd
|
SseOpcode::Addsd
|
||||||
|
| SseOpcode::Andpd
|
||||||
| SseOpcode::Cvtsd2ss
|
| SseOpcode::Cvtsd2ss
|
||||||
| SseOpcode::Cvtsd2si
|
| SseOpcode::Cvtsd2si
|
||||||
| SseOpcode::Cvtsi2sd
|
| SseOpcode::Cvtsi2sd
|
||||||
@@ -416,6 +419,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Movq
|
| SseOpcode::Movq
|
||||||
| SseOpcode::Movsd
|
| SseOpcode::Movsd
|
||||||
| SseOpcode::Mulsd
|
| SseOpcode::Mulsd
|
||||||
|
| SseOpcode::Orpd
|
||||||
| SseOpcode::Sqrtsd
|
| SseOpcode::Sqrtsd
|
||||||
| SseOpcode::Subsd
|
| SseOpcode::Subsd
|
||||||
| SseOpcode::Ucomisd
|
| SseOpcode::Ucomisd
|
||||||
@@ -440,6 +444,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
let name = match self {
|
let name = match self {
|
||||||
SseOpcode::Addss => "addss",
|
SseOpcode::Addss => "addss",
|
||||||
SseOpcode::Addsd => "addsd",
|
SseOpcode::Addsd => "addsd",
|
||||||
|
SseOpcode::Andpd => "andpd",
|
||||||
SseOpcode::Andps => "andps",
|
SseOpcode::Andps => "andps",
|
||||||
SseOpcode::Andnps => "andnps",
|
SseOpcode::Andnps => "andnps",
|
||||||
SseOpcode::Comiss => "comiss",
|
SseOpcode::Comiss => "comiss",
|
||||||
@@ -465,6 +470,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Movsd => "movsd",
|
SseOpcode::Movsd => "movsd",
|
||||||
SseOpcode::Mulss => "mulss",
|
SseOpcode::Mulss => "mulss",
|
||||||
SseOpcode::Mulsd => "mulsd",
|
SseOpcode::Mulsd => "mulsd",
|
||||||
|
SseOpcode::Orpd => "orpd",
|
||||||
SseOpcode::Orps => "orps",
|
SseOpcode::Orps => "orps",
|
||||||
SseOpcode::Rcpss => "rcpss",
|
SseOpcode::Rcpss => "rcpss",
|
||||||
SseOpcode::Roundss => "roundss",
|
SseOpcode::Roundss => "roundss",
|
||||||
|
|||||||
@@ -1530,10 +1530,12 @@ pub(crate) fn emit(
|
|||||||
let (prefix, opcode) = match op {
|
let (prefix, opcode) = match op {
|
||||||
SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58),
|
SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58),
|
||||||
SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58),
|
SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58),
|
||||||
|
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54),
|
||||||
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54),
|
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54),
|
||||||
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55),
|
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55),
|
||||||
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59),
|
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59),
|
||||||
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59),
|
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59),
|
||||||
|
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56),
|
||||||
SseOpcode::Orps => (LegacyPrefix::None, 0x0F56),
|
SseOpcode::Orps => (LegacyPrefix::None, 0x0F56),
|
||||||
SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C),
|
SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C),
|
||||||
SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C),
|
SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C),
|
||||||
@@ -1557,6 +1559,92 @@ pub(crate) fn emit(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmMinMaxSeq {
|
||||||
|
size,
|
||||||
|
is_min,
|
||||||
|
lhs,
|
||||||
|
rhs_dst,
|
||||||
|
} => {
|
||||||
|
// Generates the following sequence:
|
||||||
|
// cmpss/cmpsd %lhs, %rhs_dst
|
||||||
|
// jnz do_min_max
|
||||||
|
// jp propagate_nan
|
||||||
|
//
|
||||||
|
// ;; ordered and equal: propagate the sign bit (for -0 vs 0):
|
||||||
|
// {and,or}{ss,sd} %lhs, %rhs_dst
|
||||||
|
// j done
|
||||||
|
//
|
||||||
|
// ;; to get the desired NaN behavior (signalling NaN transformed into a quiet NaN, the
|
||||||
|
// NaN value is returned), we add both inputs.
|
||||||
|
// propagate_nan:
|
||||||
|
// add{ss,sd} %lhs, %rhs_dst
|
||||||
|
// j done
|
||||||
|
//
|
||||||
|
// do_min_max:
|
||||||
|
// min{ss,sd} %lhs, %rhs_dst
|
||||||
|
//
|
||||||
|
// done:
|
||||||
|
let done = sink.get_label();
|
||||||
|
let propagate_nan = sink.get_label();
|
||||||
|
let do_min_max = sink.get_label();
|
||||||
|
|
||||||
|
let (add_op, cmp_op, and_op, or_op, min_max_op) = match size {
|
||||||
|
OperandSize::Size32 => (
|
||||||
|
SseOpcode::Addss,
|
||||||
|
SseOpcode::Ucomiss,
|
||||||
|
SseOpcode::Andps,
|
||||||
|
SseOpcode::Orps,
|
||||||
|
if *is_min {
|
||||||
|
SseOpcode::Minss
|
||||||
|
} else {
|
||||||
|
SseOpcode::Maxss
|
||||||
|
},
|
||||||
|
),
|
||||||
|
OperandSize::Size64 => (
|
||||||
|
SseOpcode::Addsd,
|
||||||
|
SseOpcode::Ucomisd,
|
||||||
|
SseOpcode::Andpd,
|
||||||
|
SseOpcode::Orpd,
|
||||||
|
if *is_min {
|
||||||
|
SseOpcode::Minsd
|
||||||
|
} else {
|
||||||
|
SseOpcode::Maxsd
|
||||||
|
},
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(*lhs), rhs_dst.to_reg());
|
||||||
|
inst.emit(sink, flags, state);
|
||||||
|
|
||||||
|
one_way_jmp(sink, CC::NZ, do_min_max);
|
||||||
|
one_way_jmp(sink, CC::P, propagate_nan);
|
||||||
|
|
||||||
|
// Ordered and equal. The operands are bit-identical unless they are zero
|
||||||
|
// and negative zero. These instructions merge the sign bits in that
|
||||||
|
// case, and are no-ops otherwise.
|
||||||
|
let op = if *is_min { or_op } else { and_op };
|
||||||
|
let inst = Inst::xmm_rm_r(op, RegMem::reg(*lhs), *rhs_dst);
|
||||||
|
inst.emit(sink, flags, state);
|
||||||
|
|
||||||
|
let inst = Inst::jmp_known(BranchTarget::Label(done));
|
||||||
|
inst.emit(sink, flags, state);
|
||||||
|
|
||||||
|
// x86's min/max are not symmetric; if either operand is a NaN, they return the
|
||||||
|
// read-only operand: perform an addition between the two operands, which has the
|
||||||
|
// desired NaN propagation effects.
|
||||||
|
sink.bind_label(propagate_nan);
|
||||||
|
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(*lhs), *rhs_dst);
|
||||||
|
inst.emit(sink, flags, state);
|
||||||
|
|
||||||
|
one_way_jmp(sink, CC::P, done);
|
||||||
|
|
||||||
|
sink.bind_label(do_min_max);
|
||||||
|
let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(*lhs), *rhs_dst);
|
||||||
|
inst.emit(sink, flags, state);
|
||||||
|
|
||||||
|
sink.bind_label(done);
|
||||||
|
}
|
||||||
|
|
||||||
Inst::Xmm_Mov_R_M {
|
Inst::Xmm_Mov_R_M {
|
||||||
op,
|
op,
|
||||||
src,
|
src,
|
||||||
|
|||||||
@@ -278,6 +278,14 @@ pub enum Inst {
|
|||||||
srcloc: SourceLoc,
|
srcloc: SourceLoc,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// A sequence to compute min/max with the proper NaN semantics for xmm registers.
|
||||||
|
XmmMinMaxSeq {
|
||||||
|
size: OperandSize,
|
||||||
|
is_min: bool,
|
||||||
|
lhs: Reg,
|
||||||
|
rhs_dst: Writable<Reg>,
|
||||||
|
},
|
||||||
|
|
||||||
/// XMM (scalar) conditional move.
|
/// XMM (scalar) conditional move.
|
||||||
/// Overwrites the destination register if cc is set.
|
/// Overwrites the destination register if cc is set.
|
||||||
XmmCmove {
|
XmmCmove {
|
||||||
@@ -629,6 +637,22 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn xmm_min_max_seq(
|
||||||
|
size: OperandSize,
|
||||||
|
is_min: bool,
|
||||||
|
lhs: Reg,
|
||||||
|
rhs_dst: Writable<Reg>,
|
||||||
|
) -> Inst {
|
||||||
|
debug_assert_eq!(lhs.get_class(), RegClass::V128);
|
||||||
|
debug_assert_eq!(rhs_dst.to_reg().get_class(), RegClass::V128);
|
||||||
|
Inst::XmmMinMaxSeq {
|
||||||
|
size,
|
||||||
|
is_min,
|
||||||
|
lhs,
|
||||||
|
rhs_dst,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn movzx_rm_r(
|
pub(crate) fn movzx_rm_r(
|
||||||
ext_mode: ExtMode,
|
ext_mode: ExtMode,
|
||||||
src: RegMem,
|
src: RegMem,
|
||||||
@@ -980,6 +1004,29 @@ impl ShowWithRRU for Inst {
|
|||||||
show_ireg_sized(dst.to_reg(), mb_rru, 8),
|
show_ireg_sized(dst.to_reg(), mb_rru, 8),
|
||||||
),
|
),
|
||||||
|
|
||||||
|
Inst::XmmMinMaxSeq {
|
||||||
|
lhs,
|
||||||
|
rhs_dst,
|
||||||
|
is_min,
|
||||||
|
size,
|
||||||
|
} => format!(
|
||||||
|
"{} {}, {}",
|
||||||
|
ljustify2(
|
||||||
|
if *is_min {
|
||||||
|
"xmm min seq ".to_string()
|
||||||
|
} else {
|
||||||
|
"xmm max seq ".to_string()
|
||||||
|
},
|
||||||
|
match size {
|
||||||
|
OperandSize::Size32 => "f32",
|
||||||
|
OperandSize::Size64 => "f64",
|
||||||
|
}
|
||||||
|
.into()
|
||||||
|
),
|
||||||
|
show_ireg_sized(*lhs, mb_rru, 8),
|
||||||
|
show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8),
|
||||||
|
),
|
||||||
|
|
||||||
Inst::XmmToGpr {
|
Inst::XmmToGpr {
|
||||||
op,
|
op,
|
||||||
src,
|
src,
|
||||||
@@ -1333,6 +1380,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
src.get_regs_as_uses(collector);
|
src.get_regs_as_uses(collector);
|
||||||
collector.add_mod(*dst);
|
collector.add_mod(*dst);
|
||||||
}
|
}
|
||||||
|
Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
|
||||||
|
collector.add_use(*lhs);
|
||||||
|
collector.add_mod(*rhs_dst);
|
||||||
|
}
|
||||||
Inst::Xmm_Mov_R_M { src, dst, .. } => {
|
Inst::Xmm_Mov_R_M { src, dst, .. } => {
|
||||||
collector.add_use(*src);
|
collector.add_use(*src);
|
||||||
dst.get_regs_as_uses(collector);
|
dst.get_regs_as_uses(collector);
|
||||||
@@ -1579,6 +1630,14 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
src.map_uses(mapper);
|
src.map_uses(mapper);
|
||||||
map_mod(mapper, dst);
|
map_mod(mapper, dst);
|
||||||
}
|
}
|
||||||
|
Inst::XmmMinMaxSeq {
|
||||||
|
ref mut lhs,
|
||||||
|
ref mut rhs_dst,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
map_use(mapper, lhs);
|
||||||
|
map_mod(mapper, rhs_dst);
|
||||||
|
}
|
||||||
Inst::Xmm_Mov_R_M {
|
Inst::Xmm_Mov_R_M {
|
||||||
ref mut src,
|
ref mut src,
|
||||||
ref mut dst,
|
ref mut dst,
|
||||||
|
|||||||
@@ -924,6 +924,21 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst));
|
ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::Fmin | Opcode::Fmax => {
|
||||||
|
let lhs = input_to_reg(ctx, inputs[0]);
|
||||||
|
let rhs = input_to_reg(ctx, inputs[1]);
|
||||||
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
|
let is_min = op == Opcode::Fmin;
|
||||||
|
let output_ty = ty.unwrap();
|
||||||
|
ctx.emit(Inst::gen_move(dst, rhs, output_ty));
|
||||||
|
let op_size = match output_ty {
|
||||||
|
F32 => OperandSize::Size32,
|
||||||
|
F64 => OperandSize::Size64,
|
||||||
|
_ => panic!("unexpected type {:?} for fmin/fmax", output_ty),
|
||||||
|
};
|
||||||
|
ctx.emit(Inst::xmm_min_max_seq(op_size, is_min, lhs, dst));
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Sqrt => {
|
Opcode::Sqrt => {
|
||||||
let src = input_to_reg_mem(ctx, inputs[0]);
|
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||||
let dst = output_to_reg(ctx, outputs[0]);
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
|
|||||||
Reference in New Issue
Block a user