Adds support for converting packed unsigned integer to packed float

2020-10-16 12:12:07 -07:00
parent c35904a8bf
commit 97392eae3d
1 changed files with 82 additions and 30 deletions
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -2241,6 +2241,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let ty = ty.unwrap();
            let input_ty = ctx.input_ty(insn, 0);
            if !ty.is_vector() {
                match input_ty {
                    types::I8 | types::I16 | types::I32 => {
                        // Conversion from an unsigned int smaller than 64-bit is easy: zero-extend +
@@ -2252,8 +2253,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                            SseOpcode::Cvtsi2sd
                        };
-                    let src =
+                        let src = RegMem::reg(extend_input_to_reg(
-                        RegMem::reg(extend_input_to_reg(ctx, inputs[0], ExtSpec::ZeroExtendTo64));
+                            ctx,
                            inputs[0],
                            ExtSpec::ZeroExtendTo64,
                        ));
                        ctx.emit(Inst::gpr_to_xmm(opcode, src, OperandSize::Size64, dst));
                    }
@@ -2273,9 +2277,57 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                            dst,
                        ));
                    }
                    _ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty),
                };
            } else {
                // Converting packed unsigned integers to packed floats requires a few steps.
                // There is no single instruction lowering for converting unsigned floats but there
                // is for converted packed signed integers to float (cvtdq2ps). In the steps below
                // we isolate the upper half (16 bits) and lower half (16 bits) of each lane and
                // then we convert each half separately using cvtdq2ps meant for signed integers.
                // In order for this to work for the upper half bits we must shift right by 1
                // (divide by 2) these bits in order to ensure the most significant bit is 0 not
                // signed, and then after the conversion we double the value. Finally we add the
                // converted values where addition will correctly round.
                assert_eq!(ctx.input_ty(insn, 0), types::I32X4);
                let src = put_input_in_reg(ctx, inputs[0]);
                let dst = get_output_reg(ctx, outputs[0]);
                // Create a temporary register
                let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
                ctx.emit(Inst::xmm_unary_rm_r(
                    SseOpcode::Movapd,
                    RegMem::reg(src),
                    tmp,
                ));
                ctx.emit(Inst::gen_move(dst, src, ty));
                // Get the low 16 bits
                ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Pslld, RegMemImm::imm(16), tmp));
                ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(16), tmp));
                // Get the high 16 bits
                ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::from(tmp), dst));
                // Convert the low 16 bits
                ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(tmp), tmp));
                // Shift the high bits by 1, convert, and double to get the correct value.
                ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(1), dst));
                ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(dst), dst));
                ctx.emit(Inst::xmm_rm_r(
                    SseOpcode::Addps,
                    RegMem::reg(dst.to_reg()),
                    dst,
                ));
                // Add together the two converted values.
                ctx.emit(Inst::xmm_rm_r(
                    SseOpcode::Addps,
                    RegMem::reg(tmp.to_reg()),
                    dst,
                ));
            }
        }
        Opcode::FcvtToUint | Opcode::FcvtToUintSat | Opcode::FcvtToSint | Opcode::FcvtToSintSat => {