diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 36adfd2f5e..6b79a5e92f 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -4457,28 +4457,6 @@ pub(crate) fn define( .operands_out(vec![a]), ); - ig.push( - Inst::new( - "fcvt_low_from_uint", - r#" - - Converts packed unsigned 32-bit integers to packed double precision floating point. - - Considering only the low half of the register, each lane in `x` is interpreted as a - unsigned 32-bit integer that is then converted to a double precision float. This - instruction differs from fcvt_from_uint in that it converts half the number of lanes - which are converted to occupy twice the number of bits. No rounding should be needed - for the resulting float. - - The result type will have half the number of vector lanes as the input. - - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![a]), - ); - let WideInt = &TypeVar::new( "WideInt", "An integer type with lanes from `i16` upwards", diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 67e99917e6..8c46602cbd 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -3557,7 +3557,6 @@ pub(crate) fn lower_insn_to_regs>( Opcode::ConstAddr | Opcode::FcvtLowFromSint - | Opcode::FcvtLowFromUint | Opcode::Fvdemote | Opcode::FvpromoteLow | Opcode::Vconcat diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index 9cc33eff50..8ab66add04 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -2867,7 +2867,6 @@ fn lower_insn_to_regs>( | Opcode::UwidenHigh | Opcode::WideningPairwiseDotProductS | Opcode::SqmulRoundSat - | Opcode::FcvtLowFromUint | Opcode::FvpromoteLow | Opcode::Fvdemote => { // TODO diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 4e0d67e2d1..295b5daa32 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -4154,58 +4154,6 @@ fn lower_insn_to_regs>( dst, )); } - Opcode::FcvtLowFromUint => { - // Algorithm uses unpcklps to help create a float that is equivalent - // 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent - // every value of the mantissa represents a corresponding uint32 number. - // When we subtract 0x1.0p52 we are left with double(src). - let src = put_input_in_reg(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap(); - - ctx.emit(Inst::gen_move(dst, src, types::I32X4)); - - static UINT_MASK: [u8; 16] = [ - 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, - ]; - - let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK)); - - ctx.emit(Inst::xmm_load_const( - uint_mask_const, - uint_mask, - types::I32X4, - )); - - // Creates 0x1.0p52 + double(src) - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Unpcklps, - RegMem::from(uint_mask), - dst, - )); - - static UINT_MASK_HIGH: [u8; 16] = [ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x30, 0x43, - ]; - - let uint_mask_high_const = - ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH)); - let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const( - uint_mask_high_const, - uint_mask_high, - types::I32X4, - )); - - // 0x1.0p52 + double(src) - 0x1.0p52 - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Subpd, - RegMem::from(uint_mask_high), - dst, - )); - } Opcode::FcvtFromUint => { let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ty.unwrap(); @@ -4253,6 +4201,67 @@ fn lower_insn_to_regs>( } _ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty), }; + } else if let Some(uwiden) = matches_input(ctx, inputs[0], Opcode::UwidenLow) { + let uwiden_input = InsnInput { + insn: uwiden, + input: 0, + }; + let src = put_input_in_reg(ctx, uwiden_input); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let input_ty = ctx.input_ty(uwiden, 0); + let output_ty = ctx.output_ty(insn, 0); + + // Matches_input further obfuscates which Wasm instruction this is ultimately + // lowering. Check here that the types are as expected for F64x2ConvertLowI32x4U. + debug_assert!(input_ty == types::I32X4 || output_ty == types::F64X2); + + // Algorithm uses unpcklps to help create a float that is equivalent + // 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent + // every value of the mantissa represents a corresponding uint32 number. + // When we subtract 0x1.0p52 we are left with double(src). + let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap(); + ctx.emit(Inst::gen_move(dst, src, types::I32X4)); + + static UINT_MASK: [u8; 16] = [ + 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, + ]; + + let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK)); + + ctx.emit(Inst::xmm_load_const( + uint_mask_const, + uint_mask, + types::I32X4, + )); + + // Creates 0x1.0p52 + double(src) + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Unpcklps, + RegMem::from(uint_mask), + dst, + )); + + static UINT_MASK_HIGH: [u8; 16] = [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x30, 0x43, + ]; + + let uint_mask_high_const = + ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH)); + let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap(); + ctx.emit(Inst::xmm_load_const( + uint_mask_high_const, + uint_mask_high, + types::I32X4, + )); + + // 0x1.0p52 + double(src) - 0x1.0p52 + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Subpd, + RegMem::from(uint_mask_high), + dst, + )); } else { assert_eq!(ctx.input_ty(insn, 0), types::I32X4); let src = put_input_in_reg(ctx, inputs[0]); @@ -4595,7 +4604,10 @@ fn lower_insn_to_regs>( (types::I16X8, types::I32X4) => { ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::reg(src), dst)); } - _ => unreachable!(), + _ => unreachable!( + "In UwidenLow: input_ty {:?}, output_ty {:?}", + input_ty, output_ty + ), }, Opcode::UwidenHigh => match (input_ty, output_ty) { (types::I8X16, types::I16X8) => { diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 9440632f44..609603f624 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -565,7 +565,6 @@ where Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"), Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"), Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"), - Opcode::FcvtLowFromUint => unimplemented!("FcvtLowFromUint"), Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"), Opcode::Fvdemote => unimplemented!("Fvdemote"), Opcode::Isplit => unimplemented!("Isplit"), diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 00fd529248..3b03a193d7 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1780,7 +1780,8 @@ pub fn translate_operator( } Operator::F64x2ConvertLowI32x4U => { let a = pop1_with_bitcast(state, I32X4, builder); - state.push1(builder.ins().fcvt_low_from_uint(F64X2, a)); + let widened_a = builder.ins().uwiden_low(a); + state.push1(builder.ins().fcvt_from_uint(F64X2, widened_a)); } Operator::F64x2PromoteLowF32x4 => { let a = pop1_with_bitcast(state, F32X4, builder);