From c32740ffcd44f72d4327be43b44ee565ddd353e5 Mon Sep 17 00:00:00 2001 From: Johnnie Birch <45402135+jlb6740@users.noreply.github.com> Date: Fri, 30 Oct 2020 16:49:30 -0700 Subject: [PATCH] Updates comments on Int to Float conversion Int to float for unsigned ints has merged, but there were some comments on a different PR for the same pull request that are addressed in this PR --- cranelift/codegen/src/isa/x64/lower.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index d7c21d292e..977011ec53 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2298,13 +2298,24 @@ fn lower_insn_to_regs>( } else { // Converting packed unsigned integers to packed floats requires a few steps. // There is no single instruction lowering for converting unsigned floats but there - // is for converted packed signed integers to float (cvtdq2ps). In the steps below + // is for converting packed signed integers to float (cvtdq2ps). In the steps below // we isolate the upper half (16 bits) and lower half (16 bits) of each lane and // then we convert each half separately using cvtdq2ps meant for signed integers. // In order for this to work for the upper half bits we must shift right by 1 // (divide by 2) these bits in order to ensure the most significant bit is 0 not // signed, and then after the conversion we double the value. Finally we add the // converted values where addition will correctly round. + // + // Sequence: + // -> A = 0xffffffff + // -> Ah = 0xffff0000 + // -> Al = 0x0000ffff + // -> Convert(Al) // Convert int to float + // -> Ah = Ah >> 1 // Shift right 1 to assure Ah conversion isn't treated as signed + // -> Convert(Ah) // Convert .. with no loss of significant digits from previous shift + // -> Ah = Ah + Ah // Double Ah to account for shift right before the conversion. + // -> dst = Ah + Al // Add the two floats together + assert_eq!(ctx.input_ty(insn, 0), types::I32X4); let src = put_input_in_reg(ctx, inputs[0]); let dst = get_output_reg(ctx, outputs[0]);