Implements convert low signed integer to float for x64 simd

2021-03-25 12:33:13 -07:00
parent c43d00fa34
commit 31d3db1ec2
8 changed files with 45 additions and 4 deletions
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -4325,6 +4325,26 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );
    ig.push(
        Inst::new(
            "fcvt_low_from_sint",
            r#"
        Converts packed signed doubleword integers to packed double precision floating point.
        Considering only the low half of the register, each lane in `x` is interpreted as a
        signed doubleword integer that is then converted to a double precision float. This
        instruction differs from fcvt_from_sint in that it converts half the number of lanes
        which are converted to occupy twice the number of bits. No rounding should be needed
        for the resulting float.
        The result type will have half the number of vector lanes as the input.
        "#,
            &formats.unary,
        )
        .operands_in(vec![x])
        .operands_out(vec![a]),
    );
    let WideInt = &TypeVar::new(
        "WideInt",
        "An integer type with lanes from `i16` upwards",
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -3013,6 +3013,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }
        Opcode::TlsValue => unimplemented!("tls_value"),
        Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
    }
    Ok(())
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -480,6 +480,7 @@ pub enum SseOpcode {
    Cmpss,
    Cmpsd,
    Cvtdq2ps,
    Cvtdq2pd,
    Cvtsd2ss,
    Cvtsd2si,
    Cvtsi2ss,
@@ -673,6 +674,7 @@ impl SseOpcode {
            | SseOpcode::Cmpsd
            | SseOpcode::Comisd
            | SseOpcode::Cvtdq2ps
            | SseOpcode::Cvtdq2pd
            | SseOpcode::Cvtsd2ss
            | SseOpcode::Cvtsd2si
            | SseOpcode::Cvtsi2sd
@@ -828,6 +830,7 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Comiss => "comiss",
            SseOpcode::Comisd => "comisd",
            SseOpcode::Cvtdq2ps => "cvtdq2ps",
            SseOpcode::Cvtdq2pd => "cvtdq2pd",
            SseOpcode::Cvtsd2ss => "cvtsd2ss",
            SseOpcode::Cvtsd2si => "cvtsd2si",
            SseOpcode::Cvtsi2ss => "cvtsi2ss",
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1768,6 +1768,7 @@ pub(crate) fn emit(
            let rex = RexFlags::clear_w();
            let (prefix, opcode, num_opcodes) = match op {
                SseOpcode::Cvtdq2pd => (LegacyPrefixes::_F3, 0x0FE6, 2),
                SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
                SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
                SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -3859,6 +3859,12 @@ fn test_x64_emit() {
        "pabsd   %xmm10, %xmm11",
    ));
    insns.push((
        Inst::xmm_unary_rm_r(SseOpcode::Cvtdq2pd, RegMem::reg(xmm2), w_xmm8),
        "F3440FE6C2",
        "cvtdq2pd %xmm2, %xmm8",
    ));
    // Xmm to int conversions, and conversely.
    insns.push((
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -3915,7 +3915,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst));
            }
        }
-
+        Opcode::FcvtLowFromSint => {
            let src = RegMem::reg(put_input_in_reg(ctx, inputs[0]));
            let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
            ctx.emit(Inst::xmm_unary_rm_r(
                SseOpcode::Cvtdq2pd,
                RegMem::from(src),
                dst,
            ));
        }
        Opcode::FcvtFromUint => {
            let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
            let ty = ty.unwrap();
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -563,6 +563,7 @@ where
        Opcode::FcvtToSintSat => unimplemented!("FcvtToSintSat"),
        Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"),
        Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"),
        Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
        Opcode::Isplit => unimplemented!("Isplit"),
        Opcode::Iconcat => unimplemented!("Iconcat"),
        Opcode::AtomicRmw => unimplemented!("AtomicRmw"),
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -1775,6 +1775,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            let a = pop1_with_bitcast(state, I32X4, builder);
            state.push1(builder.ins().fcvt_from_uint(F32X4, a))
        }
        Operator::F64x2ConvertLowI32x4S => {
            let a = pop1_with_bitcast(state, I32X4, builder);
            state.push1(builder.ins().fcvt_low_from_sint(F64X2, a));
        }
        Operator::I32x4TruncSatF32x4S => {
            let a = pop1_with_bitcast(state, F32X4, builder);
            state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a))
@@ -1851,12 +1855,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            let arg = pop1_with_bitcast(state, type_of(op), builder);
            state.push1(builder.ins().nearest(arg));
        }
        Operator::I32x4DotI16x8S => {
            let (a, b) = pop2_with_bitcast(state, I16X8, builder);
            state.push1(builder.ins().widening_pairwise_dot_product_s(a, b));
        }
        Operator::I64x2ExtendLowI32x4S
        | Operator::I64x2ExtendHighI32x4S
        | Operator::I64x2ExtendLowI32x4U
@@ -1880,7 +1882,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
        | Operator::I32x4ExtAddPairwiseI16x8U
        | Operator::F32x4DemoteF64x2Zero
        | Operator::F64x2PromoteLowF32x4
        | Operator::F64x2ConvertLowI32x4S
        | Operator::F64x2ConvertLowI32x4U
        | Operator::I32x4TruncSatF64x2SZero
        | Operator::I32x4TruncSatF64x2UZero