machinst x64: extend Copysign to work for f64 inputs too;
This commit is contained in:
@@ -339,6 +339,7 @@ pub enum SseOpcode {
|
|||||||
Andps,
|
Andps,
|
||||||
Andpd,
|
Andpd,
|
||||||
Andnps,
|
Andnps,
|
||||||
|
Andnpd,
|
||||||
Comiss,
|
Comiss,
|
||||||
Comisd,
|
Comisd,
|
||||||
Cmpss,
|
Cmpss,
|
||||||
@@ -359,6 +360,7 @@ pub enum SseOpcode {
|
|||||||
Minss,
|
Minss,
|
||||||
Minsd,
|
Minsd,
|
||||||
Movaps,
|
Movaps,
|
||||||
|
Movapd,
|
||||||
Movd,
|
Movd,
|
||||||
Movq,
|
Movq,
|
||||||
Movss,
|
Movss,
|
||||||
@@ -410,6 +412,7 @@ impl SseOpcode {
|
|||||||
|
|
||||||
SseOpcode::Addsd
|
SseOpcode::Addsd
|
||||||
| SseOpcode::Andpd
|
| SseOpcode::Andpd
|
||||||
|
| SseOpcode::Andnpd
|
||||||
| SseOpcode::Cvtsd2ss
|
| SseOpcode::Cvtsd2ss
|
||||||
| SseOpcode::Cvtsd2si
|
| SseOpcode::Cvtsd2si
|
||||||
| SseOpcode::Cvtsi2sd
|
| SseOpcode::Cvtsi2sd
|
||||||
@@ -418,6 +421,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Divsd
|
| SseOpcode::Divsd
|
||||||
| SseOpcode::Maxsd
|
| SseOpcode::Maxsd
|
||||||
| SseOpcode::Minsd
|
| SseOpcode::Minsd
|
||||||
|
| SseOpcode::Movapd
|
||||||
| SseOpcode::Movd
|
| SseOpcode::Movd
|
||||||
| SseOpcode::Movq
|
| SseOpcode::Movq
|
||||||
| SseOpcode::Movsd
|
| SseOpcode::Movsd
|
||||||
@@ -451,6 +455,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Andpd => "andpd",
|
SseOpcode::Andpd => "andpd",
|
||||||
SseOpcode::Andps => "andps",
|
SseOpcode::Andps => "andps",
|
||||||
SseOpcode::Andnps => "andnps",
|
SseOpcode::Andnps => "andnps",
|
||||||
|
SseOpcode::Andnpd => "andnpd",
|
||||||
SseOpcode::Comiss => "comiss",
|
SseOpcode::Comiss => "comiss",
|
||||||
SseOpcode::Comisd => "comisd",
|
SseOpcode::Comisd => "comisd",
|
||||||
SseOpcode::Cvtsd2ss => "cvtsd2ss",
|
SseOpcode::Cvtsd2ss => "cvtsd2ss",
|
||||||
@@ -468,6 +473,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Minss => "minss",
|
SseOpcode::Minss => "minss",
|
||||||
SseOpcode::Minsd => "minsd",
|
SseOpcode::Minsd => "minsd",
|
||||||
SseOpcode::Movaps => "movaps",
|
SseOpcode::Movaps => "movaps",
|
||||||
|
SseOpcode::Movapd => "movapd",
|
||||||
SseOpcode::Movd => "movd",
|
SseOpcode::Movd => "movd",
|
||||||
SseOpcode::Movq => "movq",
|
SseOpcode::Movq => "movq",
|
||||||
SseOpcode::Movss => "movss",
|
SseOpcode::Movss => "movss",
|
||||||
|
|||||||
@@ -1497,6 +1497,7 @@ pub(crate) fn emit(
|
|||||||
|
|
||||||
let (prefix, opcode) = match op {
|
let (prefix, opcode) = match op {
|
||||||
SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28),
|
SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28),
|
||||||
|
SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28),
|
||||||
SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10),
|
SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10),
|
||||||
SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10),
|
SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10),
|
||||||
SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51),
|
SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51),
|
||||||
@@ -1533,6 +1534,7 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54),
|
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54),
|
||||||
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54),
|
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54),
|
||||||
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55),
|
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55),
|
||||||
|
SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55),
|
||||||
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59),
|
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59),
|
||||||
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59),
|
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59),
|
||||||
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56),
|
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56),
|
||||||
|
|||||||
@@ -1228,50 +1228,61 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let dst = output_to_reg(ctx, outputs[0]);
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
let lhs = input_to_reg(ctx, inputs[0]);
|
let lhs = input_to_reg(ctx, inputs[0]);
|
||||||
let rhs = input_to_reg(ctx, inputs[1]);
|
let rhs = input_to_reg(ctx, inputs[1]);
|
||||||
if !flt_ty_is_64(ty.unwrap()) {
|
|
||||||
// movabs 0x8000_0000, tmp_gpr1
|
let ty = ty.unwrap();
|
||||||
// movd tmp_gpr1, tmp_xmm1
|
|
||||||
// movaps tmp_xmm1, dst
|
// We're going to generate the following sequence:
|
||||||
// andnps src_1, dst
|
//
|
||||||
// movss src_2, tmp_xmm2
|
// movabs $INT_MIN, tmp_gpr1
|
||||||
// andps tmp_xmm1, tmp_xmm2
|
// mov{d,q} tmp_gpr1, tmp_xmm1
|
||||||
// orps tmp_xmm2, dst
|
// movap{s,d} tmp_xmm1, dst
|
||||||
let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32);
|
// andnp{s,d} src_1, dst
|
||||||
|
// movap{s,d} src_2, tmp_xmm2
|
||||||
|
// andp{s,d} tmp_xmm1, tmp_xmm2
|
||||||
|
// orp{s,d} tmp_xmm2, dst
|
||||||
|
|
||||||
let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, F32);
|
let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, F32);
|
||||||
let tmp_xmm2 = ctx.alloc_tmp(RegClass::V128, F32);
|
let tmp_xmm2 = ctx.alloc_tmp(RegClass::V128, F32);
|
||||||
ctx.emit(Inst::imm_r(true, 0x8000_0000, tmp_gpr1));
|
|
||||||
ctx.emit(Inst::gpr_to_xmm(
|
let (sign_bit_cst, mov_op, and_not_op, and_op, or_op) = match ty {
|
||||||
SseOpcode::Movd,
|
F32 => (
|
||||||
RegMem::reg(tmp_gpr1.to_reg()),
|
0x8000_0000,
|
||||||
OperandSize::Size32,
|
|
||||||
tmp_xmm1,
|
|
||||||
));
|
|
||||||
ctx.emit(Inst::xmm_mov(
|
|
||||||
SseOpcode::Movaps,
|
SseOpcode::Movaps,
|
||||||
RegMem::reg(tmp_xmm1.to_reg()),
|
SseOpcode::Andnps,
|
||||||
dst,
|
|
||||||
None,
|
|
||||||
));
|
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(lhs), dst));
|
|
||||||
ctx.emit(Inst::xmm_mov(
|
|
||||||
SseOpcode::Movss,
|
|
||||||
RegMem::reg(rhs),
|
|
||||||
tmp_xmm2,
|
|
||||||
None,
|
|
||||||
));
|
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
|
||||||
SseOpcode::Andps,
|
SseOpcode::Andps,
|
||||||
|
SseOpcode::Orps,
|
||||||
|
),
|
||||||
|
F64 => (
|
||||||
|
0x8000_0000_0000_0000,
|
||||||
|
SseOpcode::Movapd,
|
||||||
|
SseOpcode::Andnpd,
|
||||||
|
SseOpcode::Andpd,
|
||||||
|
SseOpcode::Orpd,
|
||||||
|
),
|
||||||
|
_ => {
|
||||||
|
panic!("unexpected type {:?} for copysign", ty);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for inst in Inst::gen_constant(tmp_xmm1, sign_bit_cst, ty, |reg_class, ty| {
|
||||||
|
ctx.alloc_tmp(reg_class, ty)
|
||||||
|
}) {
|
||||||
|
ctx.emit(inst);
|
||||||
|
}
|
||||||
|
ctx.emit(Inst::xmm_mov(
|
||||||
|
mov_op,
|
||||||
|
RegMem::reg(tmp_xmm1.to_reg()),
|
||||||
|
dst,
|
||||||
|
None,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(and_not_op, RegMem::reg(lhs), dst));
|
||||||
|
ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(rhs), tmp_xmm2, None));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
and_op,
|
||||||
RegMem::reg(tmp_xmm1.to_reg()),
|
RegMem::reg(tmp_xmm1.to_reg()),
|
||||||
tmp_xmm2,
|
tmp_xmm2,
|
||||||
));
|
));
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(tmp_xmm2.to_reg()), dst));
|
||||||
SseOpcode::Orps,
|
|
||||||
RegMem::reg(tmp_xmm2.to_reg()),
|
|
||||||
dst,
|
|
||||||
));
|
|
||||||
} else {
|
|
||||||
unimplemented!("{:?} for non 32-bit destination is not supported", op);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Ceil | Opcode::Floor | Opcode::Nearest | Opcode::Trunc => {
|
Opcode::Ceil | Opcode::Floor | Opcode::Nearest | Opcode::Trunc => {
|
||||||
|
|||||||
Reference in New Issue
Block a user