diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 9a9637f1bf..5c3d8e52e2 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -4223,6 +4223,69 @@ pub(crate) fn define( .constraints(vec![WiderOrEq(Float.clone(), FloatTo.clone())]), ); + let F64x2 = &TypeVar::new( + "F64x2", + "A SIMD vector type consisting of 2 lanes of 64-bit floats", + TypeSetBuilder::new() + .floats(64..64) + .simd_lanes(2..2) + .includes_scalars(false) + .build(), + ); + let F32x4 = &TypeVar::new( + "F32x4", + "A SIMD vector type consisting of 4 lanes of 32-bit floats", + TypeSetBuilder::new() + .floats(32..32) + .simd_lanes(4..4) + .includes_scalars(false) + .build(), + ); + + let x = &Operand::new("x", F64x2); + let a = &Operand::new("a", F32x4); + + ig.push( + Inst::new( + "fvdemote", + r#" + Convert `x` to a smaller floating point format. + + Each lane in `x` is converted to the destination floating point format + by rounding to nearest, ties to even. + + Cranelift currently only supports two floating point formats + - `f32` and `f64`. This may change in the future. + + Fvdemote differs from fdemote in that with fvdemote it targets vectors. + Fvdemote is constrained to having the input type being F64x2 and the result + type being F32x4. The result lane that was the upper half of the input lane + is initialized to zero. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fvpromote_low", + r#" + Converts packed single precision floating point to packed double precision floating point. + + Considering only the lower half of the register, the low lanes in `x` are interpreted as + single precision floats that are then converted to a double precision floats. + + The result type will have half the number of vector lanes as the input. Fvpromote_low is + constrained to input F32x4 with a result type of F64x2. + "#, + &formats.unary, + ) + .operands_in(vec![a]) + .operands_out(vec![x]), + ); + let x = &Operand::new("x", Float); let a = &Operand::new("a", IntTo); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index d99bf620c2..8a65c3dfd0 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -3193,6 +3193,8 @@ pub(crate) fn lower_insn_to_regs>( Opcode::TlsValue => unimplemented!("tls_value"), Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"), + Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"), + Opcode::Fvdemote => unimplemented!("Fvdemote"), } Ok(()) diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index 26276f0434..0afe2be76e 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -2548,7 +2548,9 @@ fn lower_insn_to_regs>( | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh - | Opcode::WideningPairwiseDotProductS => { + | Opcode::WideningPairwiseDotProductS + | Opcode::FvpromoteLow + | Opcode::Fvdemote => { // TODO panic!("Vector ops not implemented."); } diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 010dd87633..c362075061 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -489,6 +489,8 @@ pub enum SseOpcode { Cmpsd, Cvtdq2ps, Cvtdq2pd, + Cvtpd2ps, + Cvtps2pd, Cvtsd2ss, Cvtsd2si, Cvtsi2ss, @@ -684,6 +686,8 @@ impl SseOpcode { | SseOpcode::Comisd | SseOpcode::Cvtdq2ps | SseOpcode::Cvtdq2pd + | SseOpcode::Cvtpd2ps + | SseOpcode::Cvtps2pd | SseOpcode::Cvtsd2ss | SseOpcode::Cvtsd2si | SseOpcode::Cvtsi2sd @@ -843,6 +847,8 @@ impl fmt::Debug for SseOpcode { SseOpcode::Comisd => "comisd", SseOpcode::Cvtdq2ps => "cvtdq2ps", SseOpcode::Cvtdq2pd => "cvtdq2pd", + SseOpcode::Cvtpd2ps => "cvtpd2ps", + SseOpcode::Cvtps2pd => "cvtps2pd", SseOpcode::Cvtsd2ss => "cvtsd2ss", SseOpcode::Cvtsd2si => "cvtsd2si", SseOpcode::Cvtsi2ss => "cvtsi2ss", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index c6703b23aa..441d89fa91 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1348,6 +1348,8 @@ pub(crate) fn emit( let (prefix, opcode, num_opcodes) = match op { SseOpcode::Cvtdq2pd => (LegacyPrefixes::_F3, 0x0FE6, 2), + SseOpcode::Cvtpd2ps => (LegacyPrefixes::_66, 0x0F5A, 2), + SseOpcode::Cvtps2pd => (LegacyPrefixes::None, 0x0F5A, 2), SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2), SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2), SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2), diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 9951842d2b..a77882c3f6 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3913,6 +3913,18 @@ fn test_x64_emit() { "vpopcntb %xmm2, %xmm8", )); + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Cvtpd2ps, RegMem::reg(xmm7), w_xmm7), + "660F5AFF", + "cvtpd2ps %xmm7, %xmm7", + )); + + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Cvtps2pd, RegMem::reg(xmm11), w_xmm9), + "450F5ACB", + "cvtps2pd %xmm11, %xmm9", + )); + // Xmm to int conversions, and conversely. insns.push(( diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index d60b83345f..b16147a925 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -4057,6 +4057,16 @@ fn lower_insn_to_regs>( ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtss2sd, src, dst)); } + Opcode::FvpromoteLow => { + let src = RegMem::reg(put_input_in_reg(ctx, inputs[0])); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::xmm_unary_rm_r( + SseOpcode::Cvtps2pd, + RegMem::from(src), + dst, + )); + } + Opcode::Fdemote => { // We can't guarantee the RHS (if a load) is 128-bit aligned, so we // must avoid merging a load here. @@ -4065,6 +4075,16 @@ fn lower_insn_to_regs>( ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtsd2ss, src, dst)); } + Opcode::Fvdemote => { + let src = RegMem::reg(put_input_in_reg(ctx, inputs[0])); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::xmm_unary_rm_r( + SseOpcode::Cvtpd2ps, + RegMem::from(src), + dst, + )); + } + Opcode::FcvtFromSint => { let output_ty = ty.unwrap(); if !output_ty.is_vector() { diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index f8a79a9847..fb40583f0c 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -564,6 +564,8 @@ where Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"), Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"), Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"), + Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"), + Opcode::Fvdemote => unimplemented!("Fvdemote"), Opcode::Isplit => unimplemented!("Isplit"), Opcode::Iconcat => unimplemented!("Iconcat"), Opcode::AtomicRmw => unimplemented!("AtomicRmw"), diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index d810306892..c3301e0102 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1779,6 +1779,14 @@ pub fn translate_operator( let a = pop1_with_bitcast(state, I32X4, builder); state.push1(builder.ins().fcvt_low_from_sint(F64X2, a)); } + Operator::F64x2PromoteLowF32x4 => { + let a = pop1_with_bitcast(state, F32X4, builder); + state.push1(builder.ins().fvpromote_low(a)); + } + Operator::F32x4DemoteF64x2Zero => { + let a = pop1_with_bitcast(state, F64X2, builder); + state.push1(builder.ins().fvdemote(a)); + } Operator::I32x4TruncSatF32x4S => { let a = pop1_with_bitcast(state, F32X4, builder); state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a)) @@ -1884,8 +1892,6 @@ pub fn translate_operator( | Operator::I16x8ExtAddPairwiseI8x16U | Operator::I32x4ExtAddPairwiseI16x8S | Operator::I32x4ExtAddPairwiseI16x8U - | Operator::F32x4DemoteF64x2Zero - | Operator::F64x2PromoteLowF32x4 | Operator::F64x2ConvertLowI32x4U | Operator::I32x4TruncSatF64x2SZero | Operator::I32x4TruncSatF64x2UZero => {