diff --git a/build.rs b/build.rs index 1e1cb823de..8ac0808cf1 100644 --- a/build.rs +++ b/build.rs @@ -231,7 +231,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("simd", "simd_conversions") | ("simd", "simd_i16x8_extadd_pairwise_i8x16") | ("simd", "simd_i16x8_extmul_i8x16") - | ("simd", "simd_i16x8_q15mulr_sat_s") | ("simd", "simd_i32x4_extadd_pairwise_i16x8") | ("simd", "simd_i32x4_extmul_i16x8") | ("simd", "simd_i32x4_trunc_sat_f64x2") diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 22ebd4fc15..8759fd347d 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -2479,6 +2479,33 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let I16or32 = &TypeVar::new( + "I16or32", + "A scalar or vector integer type with 16- or 32-bit numbers", + TypeSetBuilder::new().ints(16..32).simd_lanes(4..8).build(), + ); + + let qx = &Operand::new("x", I16or32); + let qy = &Operand::new("y", I16or32); + let qa = &Operand::new("a", I16or32); + + ig.push( + Inst::new( + "sqmul_round_sat", + r#" + Fixed-point multiplication of numbers in the QN format, where N + 1 + is the number bitwidth: + `a := signed_saturate((x * y + 1 << (Q - 1)) >> Q)` + + Polymorphic over all integer types (scalar and vector) with 16- or + 32-bit numbers. + "#, + &formats.binary, + ) + .operands_in(vec![qx, qy]) + .operands_out(vec![qa]), + ); + ig.push( Inst::new( "udiv", diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 2e09e60a5c..18b1932f80 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -2228,6 +2228,14 @@ impl MachInstEmit for Inst { VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110), VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000), VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000), + VecALUOp::Sqrdmulh => { + debug_assert!( + size.lane_size() == ScalarSize::Size16 + || size.lane_size() == ScalarSize::Size32 + ); + + (0b001_01110_00_1 | enc_size << 1, 0b101101) + } }; let top11 = match alu_op { VecALUOp::Smull | VecALUOp::Smull2 => top11, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 2e586f7d2c..a618b7e81c 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -3610,6 +3610,30 @@ fn test_aarch64_binemit() { "smull2 v8.2d, v12.4s, v14.4s", )); + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sqrdmulh, + rd: writable_vreg(31), + rn: vreg(0), + rm: vreg(31), + size: VectorSize::Size16x8, + }, + "1FB47F6E", + "sqrdmulh v31.8h, v0.8h, v31.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sqrdmulh, + rd: writable_vreg(7), + rn: vreg(7), + rm: vreg(23), + size: VectorSize::Size32x2, + }, + "E7B4B72E", + "sqrdmulh v7.2s, v7.2s, v23.2s", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Not, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index f3fe3ab7be..d3df28982e 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -311,6 +311,8 @@ pub enum VecALUOp { Smull, /// Signed multiply long (high halves) Smull2, + /// Signed saturating rounding doubling multiply returning high half + Sqrdmulh, } /// A Vector miscellaneous operation with two registers. @@ -3980,6 +3982,7 @@ impl Inst { VecALUOp::Zip1 => ("zip1", size), VecALUOp::Smull => ("smull", size), VecALUOp::Smull2 => ("smull2", size), + VecALUOp::Sqrdmulh => ("sqrdmulh", size), }; let rd_size = match alu_op { VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(), diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index a9611e239b..6a5b70351c 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1650,8 +1650,6 @@ pub(crate) fn lower_insn_to_regs>( panic!("table_addr should have been removed by legalization!"); } - Opcode::ConstAddr => unimplemented!(), - Opcode::Nop => { // Nothing. } @@ -2684,11 +2682,6 @@ pub(crate) fn lower_insn_to_regs>( }); } - Opcode::Vsplit | Opcode::Vconcat => { - // TODO - panic!("Vector ops not implemented."); - } - Opcode::Isplit => { assert_eq!( ctx.input_ty(insn, 0), @@ -3524,9 +3517,35 @@ pub(crate) fn lower_insn_to_regs>( } }, - Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"), - Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"), - Opcode::Fvdemote => unimplemented!("Fvdemote"), + Opcode::SqmulRoundSat => { + let ty = ty.unwrap(); + + if !ty.is_vector() || (ty.lane_type() != I16 && ty.lane_type() != I32) { + return Err(CodegenError::Unsupported(format!( + "Unsupported type: {:?}", + ty + ))); + } + + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::Sqrdmulh, + rd, + rn, + rm, + size: VectorSize::from_ty(ty), + }); + } + + Opcode::ConstAddr + | Opcode::FcvtLowFromSint + | Opcode::Fvdemote + | Opcode::FvpromoteLow + | Opcode::Vconcat + | Opcode::Vsplit => unimplemented!("lowering {}", op), } Ok(()) diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index 6edec062b0..188d7884a3 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -2458,11 +2458,11 @@ fn lower_insn_to_regs>( } Opcode::TlsValue => { - panic!("Thread-local storage support not implemented!"); + unimplemented!("Thread-local storage support not implemented!"); } Opcode::GetPinnedReg | Opcode::SetPinnedReg => { - panic!("Pinned register support not implemented!"); + unimplemented!("Pinned register support not implemented!"); } Opcode::Icmp => { @@ -2679,10 +2679,10 @@ fn lower_insn_to_regs>( let ty = ty.unwrap(); assert!(is_valid_atomic_transaction_ty(ty)); if endianness == Endianness::Little { - panic!("Little-endian atomic operations not implemented"); + unimplemented!("Little-endian atomic operations not implemented"); } if ty_bits(ty) < 32 { - panic!("Sub-word atomic operations not implemented"); + unimplemented!("Sub-word atomic operations not implemented"); } let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap()); let (alu_op, rn) = match op { @@ -2701,7 +2701,7 @@ fn lower_insn_to_regs>( }); (choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), tmp.to_reg()) } - _ => panic!("AtomicRmw operation type {:?} not implemented", op), + _ => unimplemented!("AtomicRmw operation type {:?} not implemented", op), }; let mem = MemArg::reg(addr, flags); ctx.emit(Inst::AtomicRmw { @@ -2721,10 +2721,10 @@ fn lower_insn_to_regs>( let ty = ty.unwrap(); assert!(is_valid_atomic_transaction_ty(ty)); if endianness == Endianness::Little { - panic!("Little-endian atomic operations not implemented"); + unimplemented!("Little-endian atomic operations not implemented"); } if ty_bits(ty) < 32 { - panic!("Sub-word atomic operations not implemented"); + unimplemented!("Sub-word atomic operations not implemented"); } let mem = MemArg::reg(addr, flags); ctx.emit(Inst::gen_move(rd, rm, ty)); @@ -2865,13 +2865,14 @@ fn lower_insn_to_regs>( | Opcode::UwidenLow | Opcode::UwidenHigh | Opcode::WideningPairwiseDotProductS + | Opcode::SqmulRoundSat | Opcode::FvpromoteLow | Opcode::Fvdemote => { // TODO - panic!("Vector ops not implemented."); + unimplemented!("Vector ops not implemented."); } - Opcode::Isplit | Opcode::Iconcat => panic!("Wide integer ops not implemented."), + Opcode::Isplit | Opcode::Iconcat => unimplemented!("Wide integer ops not implemented."), Opcode::Spill | Opcode::Fill diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 7ef1a8d424..1cf1da4e9b 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -6001,6 +6001,8 @@ fn lower_insn_to_regs>( unimplemented!("Vector split/concat ops not implemented."); } + Opcode::SqmulRoundSat => unimplemented!("unimplemented lowering for opcode {:?}", op), + // Opcodes that should be removed by legalization. These should // eventually be removed if/when we replace in-situ legalization with // something better. diff --git a/cranelift/codegen/src/preopt.serialized b/cranelift/codegen/src/preopt.serialized index 48510b95d8..a0d55ca753 100644 Binary files a/cranelift/codegen/src/preopt.serialized and b/cranelift/codegen/src/preopt.serialized differ diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index fb40583f0c..76ec43a814 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -574,6 +574,7 @@ where Opcode::AtomicStore => unimplemented!("AtomicStore"), Opcode::Fence => unimplemented!("Fence"), Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"), + Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"), // TODO: these instructions should be removed once the new backend makes these obsolete // (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index ab92db177a..aff11dc1ec 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1885,8 +1885,12 @@ pub fn translate_operator( let arg = pop1_with_bitcast(state, type_of(op), builder); state.push1(builder.ins().popcnt(arg)); } - Operator::I16x8Q15MulrSatS - | Operator::I16x8ExtMulLowI8x16S + Operator::I16x8Q15MulrSatS => { + let (a, b) = pop2_with_bitcast(state, I16X8, builder); + + state.push1(builder.ins().sqmul_round_sat(a, b)) + } + Operator::I16x8ExtMulLowI8x16S | Operator::I16x8ExtMulHighI8x16S | Operator::I16x8ExtMulLowI8x16U | Operator::I16x8ExtMulHighI8x16U