Adds support for converting packed unsigned integer to packed float
This commit is contained in:
@@ -2241,6 +2241,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
|
|
||||||
let input_ty = ctx.input_ty(insn, 0);
|
let input_ty = ctx.input_ty(insn, 0);
|
||||||
|
if !ty.is_vector() {
|
||||||
match input_ty {
|
match input_ty {
|
||||||
types::I8 | types::I16 | types::I32 => {
|
types::I8 | types::I16 | types::I32 => {
|
||||||
// Conversion from an unsigned int smaller than 64-bit is easy: zero-extend +
|
// Conversion from an unsigned int smaller than 64-bit is easy: zero-extend +
|
||||||
@@ -2252,8 +2253,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
SseOpcode::Cvtsi2sd
|
SseOpcode::Cvtsi2sd
|
||||||
};
|
};
|
||||||
|
|
||||||
let src =
|
let src = RegMem::reg(extend_input_to_reg(
|
||||||
RegMem::reg(extend_input_to_reg(ctx, inputs[0], ExtSpec::ZeroExtendTo64));
|
ctx,
|
||||||
|
inputs[0],
|
||||||
|
ExtSpec::ZeroExtendTo64,
|
||||||
|
));
|
||||||
ctx.emit(Inst::gpr_to_xmm(opcode, src, OperandSize::Size64, dst));
|
ctx.emit(Inst::gpr_to_xmm(opcode, src, OperandSize::Size64, dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2273,9 +2277,57 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
dst,
|
dst,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
_ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty),
|
_ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty),
|
||||||
};
|
};
|
||||||
|
} else {
|
||||||
|
// Converting packed unsigned integers to packed floats requires a few steps.
|
||||||
|
// There is no single instruction lowering for converting unsigned floats but there
|
||||||
|
// is for converted packed signed integers to float (cvtdq2ps). In the steps below
|
||||||
|
// we isolate the upper half (16 bits) and lower half (16 bits) of each lane and
|
||||||
|
// then we convert each half separately using cvtdq2ps meant for signed integers.
|
||||||
|
// In order for this to work for the upper half bits we must shift right by 1
|
||||||
|
// (divide by 2) these bits in order to ensure the most significant bit is 0 not
|
||||||
|
// signed, and then after the conversion we double the value. Finally we add the
|
||||||
|
// converted values where addition will correctly round.
|
||||||
|
assert_eq!(ctx.input_ty(insn, 0), types::I32X4);
|
||||||
|
let src = put_input_in_reg(ctx, inputs[0]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
|
||||||
|
// Create a temporary register
|
||||||
|
let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
|
||||||
|
ctx.emit(Inst::xmm_unary_rm_r(
|
||||||
|
SseOpcode::Movapd,
|
||||||
|
RegMem::reg(src),
|
||||||
|
tmp,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::gen_move(dst, src, ty));
|
||||||
|
|
||||||
|
// Get the low 16 bits
|
||||||
|
ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Pslld, RegMemImm::imm(16), tmp));
|
||||||
|
ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(16), tmp));
|
||||||
|
|
||||||
|
// Get the high 16 bits
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::from(tmp), dst));
|
||||||
|
|
||||||
|
// Convert the low 16 bits
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(tmp), tmp));
|
||||||
|
|
||||||
|
// Shift the high bits by 1, convert, and double to get the correct value.
|
||||||
|
ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(1), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(dst), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Addps,
|
||||||
|
RegMem::reg(dst.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
|
||||||
|
// Add together the two converted values.
|
||||||
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Addps,
|
||||||
|
RegMem::reg(tmp.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::FcvtToUint | Opcode::FcvtToUintSat | Opcode::FcvtToSint | Opcode::FcvtToSintSat => {
|
Opcode::FcvtToUint | Opcode::FcvtToUintSat | Opcode::FcvtToSint | Opcode::FcvtToSintSat => {
|
||||||
|
|||||||
Reference in New Issue
Block a user