machinst x64: extend Copysign to work for f64 inputs too;

This commit is contained in:
Benjamin Bouvier
2020-07-21 18:54:50 +02:00
parent 694af3aec2
commit aa103698d4
3 changed files with 59 additions and 40 deletions

View File

@@ -339,6 +339,7 @@ pub enum SseOpcode {
Andps, Andps,
Andpd, Andpd,
Andnps, Andnps,
Andnpd,
Comiss, Comiss,
Comisd, Comisd,
Cmpss, Cmpss,
@@ -359,6 +360,7 @@ pub enum SseOpcode {
Minss, Minss,
Minsd, Minsd,
Movaps, Movaps,
Movapd,
Movd, Movd,
Movq, Movq,
Movss, Movss,
@@ -410,6 +412,7 @@ impl SseOpcode {
SseOpcode::Addsd SseOpcode::Addsd
| SseOpcode::Andpd | SseOpcode::Andpd
| SseOpcode::Andnpd
| SseOpcode::Cvtsd2ss | SseOpcode::Cvtsd2ss
| SseOpcode::Cvtsd2si | SseOpcode::Cvtsd2si
| SseOpcode::Cvtsi2sd | SseOpcode::Cvtsi2sd
@@ -418,6 +421,7 @@ impl SseOpcode {
| SseOpcode::Divsd | SseOpcode::Divsd
| SseOpcode::Maxsd | SseOpcode::Maxsd
| SseOpcode::Minsd | SseOpcode::Minsd
| SseOpcode::Movapd
| SseOpcode::Movd | SseOpcode::Movd
| SseOpcode::Movq | SseOpcode::Movq
| SseOpcode::Movsd | SseOpcode::Movsd
@@ -451,6 +455,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Andpd => "andpd", SseOpcode::Andpd => "andpd",
SseOpcode::Andps => "andps", SseOpcode::Andps => "andps",
SseOpcode::Andnps => "andnps", SseOpcode::Andnps => "andnps",
SseOpcode::Andnpd => "andnpd",
SseOpcode::Comiss => "comiss", SseOpcode::Comiss => "comiss",
SseOpcode::Comisd => "comisd", SseOpcode::Comisd => "comisd",
SseOpcode::Cvtsd2ss => "cvtsd2ss", SseOpcode::Cvtsd2ss => "cvtsd2ss",
@@ -468,6 +473,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Minss => "minss", SseOpcode::Minss => "minss",
SseOpcode::Minsd => "minsd", SseOpcode::Minsd => "minsd",
SseOpcode::Movaps => "movaps", SseOpcode::Movaps => "movaps",
SseOpcode::Movapd => "movapd",
SseOpcode::Movd => "movd", SseOpcode::Movd => "movd",
SseOpcode::Movq => "movq", SseOpcode::Movq => "movq",
SseOpcode::Movss => "movss", SseOpcode::Movss => "movss",

View File

@@ -1497,6 +1497,7 @@ pub(crate) fn emit(
let (prefix, opcode) = match op { let (prefix, opcode) = match op {
SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28), SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28),
SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28),
SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10), SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10),
SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10), SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10),
SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51), SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51),
@@ -1533,6 +1534,7 @@ pub(crate) fn emit(
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54), SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54),
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54), SseOpcode::Andps => (LegacyPrefix::None, 0x0F54),
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55), SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55),
SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55),
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59), SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59),
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59), SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59),
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56), SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56),

View File

@@ -1228,50 +1228,61 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let dst = output_to_reg(ctx, outputs[0]); let dst = output_to_reg(ctx, outputs[0]);
let lhs = input_to_reg(ctx, inputs[0]); let lhs = input_to_reg(ctx, inputs[0]);
let rhs = input_to_reg(ctx, inputs[1]); let rhs = input_to_reg(ctx, inputs[1]);
if !flt_ty_is_64(ty.unwrap()) {
// movabs 0x8000_0000, tmp_gpr1 let ty = ty.unwrap();
// movd tmp_gpr1, tmp_xmm1
// movaps tmp_xmm1, dst // We're going to generate the following sequence:
// andnps src_1, dst //
// movss src_2, tmp_xmm2 // movabs $INT_MIN, tmp_gpr1
// andps tmp_xmm1, tmp_xmm2 // mov{d,q} tmp_gpr1, tmp_xmm1
// orps tmp_xmm2, dst // movap{s,d} tmp_xmm1, dst
let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32); // andnp{s,d} src_1, dst
// movap{s,d} src_2, tmp_xmm2
// andp{s,d} tmp_xmm1, tmp_xmm2
// orp{s,d} tmp_xmm2, dst
let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, F32); let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, F32);
let tmp_xmm2 = ctx.alloc_tmp(RegClass::V128, F32); let tmp_xmm2 = ctx.alloc_tmp(RegClass::V128, F32);
ctx.emit(Inst::imm_r(true, 0x8000_0000, tmp_gpr1));
ctx.emit(Inst::gpr_to_xmm( let (sign_bit_cst, mov_op, and_not_op, and_op, or_op) = match ty {
SseOpcode::Movd, F32 => (
RegMem::reg(tmp_gpr1.to_reg()), 0x8000_0000,
OperandSize::Size32,
tmp_xmm1,
));
ctx.emit(Inst::xmm_mov(
SseOpcode::Movaps, SseOpcode::Movaps,
RegMem::reg(tmp_xmm1.to_reg()), SseOpcode::Andnps,
dst,
None,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(lhs), dst));
ctx.emit(Inst::xmm_mov(
SseOpcode::Movss,
RegMem::reg(rhs),
tmp_xmm2,
None,
));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Andps, SseOpcode::Andps,
SseOpcode::Orps,
),
F64 => (
0x8000_0000_0000_0000,
SseOpcode::Movapd,
SseOpcode::Andnpd,
SseOpcode::Andpd,
SseOpcode::Orpd,
),
_ => {
panic!("unexpected type {:?} for copysign", ty);
}
};
for inst in Inst::gen_constant(tmp_xmm1, sign_bit_cst, ty, |reg_class, ty| {
ctx.alloc_tmp(reg_class, ty)
}) {
ctx.emit(inst);
}
ctx.emit(Inst::xmm_mov(
mov_op,
RegMem::reg(tmp_xmm1.to_reg()),
dst,
None,
));
ctx.emit(Inst::xmm_rm_r(and_not_op, RegMem::reg(lhs), dst));
ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(rhs), tmp_xmm2, None));
ctx.emit(Inst::xmm_rm_r(
and_op,
RegMem::reg(tmp_xmm1.to_reg()), RegMem::reg(tmp_xmm1.to_reg()),
tmp_xmm2, tmp_xmm2,
)); ));
ctx.emit(Inst::xmm_rm_r( ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(tmp_xmm2.to_reg()), dst));
SseOpcode::Orps,
RegMem::reg(tmp_xmm2.to_reg()),
dst,
));
} else {
unimplemented!("{:?} for non 32-bit destination is not supported", op);
}
} }
Opcode::Ceil | Opcode::Floor | Opcode::Nearest | Opcode::Trunc => { Opcode::Ceil | Opcode::Floor | Opcode::Nearest | Opcode::Trunc => {