Merge pull request #3035 from akirilov-arm/simd_i16x8_q15mulr_sat_s

Enable the simd_i16x8_q15mulr_sat_s test on AArch64
2021-06-28 09:49:52 -07:00
parent d42c8692bc 98f1ac789e
commit 522cc8aa84
11 changed files with 110 additions and 22 deletions
--- a/build.rs
+++ b/build.rs
@@ -231,7 +231,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
            ("simd", "simd_conversions")
            | ("simd", "simd_i16x8_extadd_pairwise_i8x16")
            | ("simd", "simd_i16x8_extmul_i8x16")
            | ("simd", "simd_i16x8_q15mulr_sat_s")
            | ("simd", "simd_i32x4_extadd_pairwise_i16x8")
            | ("simd", "simd_i32x4_extmul_i16x8")
            | ("simd", "simd_i32x4_trunc_sat_f64x2")
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -2479,6 +2479,33 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );
    let I16or32 = &TypeVar::new(
        "I16or32",
        "A scalar or vector integer type with 16- or 32-bit numbers",
        TypeSetBuilder::new().ints(16..32).simd_lanes(4..8).build(),
    );
    let qx = &Operand::new("x", I16or32);
    let qy = &Operand::new("y", I16or32);
    let qa = &Operand::new("a", I16or32);
    ig.push(
        Inst::new(
            "sqmul_round_sat",
            r#"
        Fixed-point multiplication of numbers in the QN format, where N + 1
        is the number bitwidth:
        `a := signed_saturate((x * y + 1 << (Q - 1)) >> Q)`
        Polymorphic over all integer types (scalar and vector) with 16- or
        32-bit numbers.
        "#,
            &formats.binary,
        )
        .operands_in(vec![qx, qy])
        .operands_out(vec![qa]),
    );
    ig.push(
        Inst::new(
            "udiv",
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -2228,6 +2228,14 @@ impl MachInstEmit for Inst {
                    VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
                    VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
                    VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
                    VecALUOp::Sqrdmulh => {
                        debug_assert!(
                            size.lane_size() == ScalarSize::Size16
                                || size.lane_size() == ScalarSize::Size32
                        );
                        (0b001_01110_00_1 | enc_size << 1, 0b101101)
                    }
                };
                let top11 = match alu_op {
                    VecALUOp::Smull | VecALUOp::Smull2 => top11,
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -3610,6 +3610,30 @@ fn test_aarch64_binemit() {
        "smull2 v8.2d, v12.4s, v14.4s",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Sqrdmulh,
            rd: writable_vreg(31),
            rn: vreg(0),
            rm: vreg(31),
            size: VectorSize::Size16x8,
        },
        "1FB47F6E",
        "sqrdmulh v31.8h, v0.8h, v31.8h",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Sqrdmulh,
            rd: writable_vreg(7),
            rn: vreg(7),
            rm: vreg(23),
            size: VectorSize::Size32x2,
        },
        "E7B4B72E",
        "sqrdmulh v7.2s, v7.2s, v23.2s",
    ));
    insns.push((
        Inst::VecMisc {
            op: VecMisc2::Not,
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -311,6 +311,8 @@ pub enum VecALUOp {
    Smull,
    /// Signed multiply long (high halves)
    Smull2,
    /// Signed saturating rounding doubling multiply returning high half
    Sqrdmulh,
 }
 /// A Vector miscellaneous operation with two registers.
@@ -3980,6 +3982,7 @@ impl Inst {
                    VecALUOp::Zip1 => ("zip1", size),
                    VecALUOp::Smull => ("smull", size),
                    VecALUOp::Smull2 => ("smull2", size),
                    VecALUOp::Sqrdmulh => ("sqrdmulh", size),
                };
                let rd_size = match alu_op {
                    VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1650,8 +1650,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            panic!("table_addr should have been removed by legalization!");
        }
        Opcode::ConstAddr => unimplemented!(),
        Opcode::Nop => {
            // Nothing.
        }
@@ -2684,11 +2682,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            });
        }
        Opcode::Vsplit | Opcode::Vconcat => {
            // TODO
            panic!("Vector ops not implemented.");
        }
        Opcode::Isplit => {
            assert_eq!(
                ctx.input_ty(insn, 0),
@@ -3524,9 +3517,35 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            }
        },
-        Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
+        Opcode::SqmulRoundSat => {
-        Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"),
+            let ty = ty.unwrap();
-        Opcode::Fvdemote => unimplemented!("Fvdemote"),
+
            if !ty.is_vector() || (ty.lane_type() != I16 && ty.lane_type() != I32) {
                return Err(CodegenError::Unsupported(format!(
                    "Unsupported type: {:?}",
                    ty
                )));
            }
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
            ctx.emit(Inst::VecRRR {
                alu_op: VecALUOp::Sqrdmulh,
                rd,
                rn,
                rm,
                size: VectorSize::from_ty(ty),
            });
        }
        Opcode::ConstAddr
        | Opcode::FcvtLowFromSint
        | Opcode::Fvdemote
        | Opcode::FvpromoteLow
        | Opcode::Vconcat
        | Opcode::Vsplit => unimplemented!("lowering {}", op),
    }
    Ok(())
--- a/cranelift/codegen/src/isa/s390x/lower.rs
+++ b/cranelift/codegen/src/isa/s390x/lower.rs
@@ -2458,11 +2458,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }
        Opcode::TlsValue => {
-            panic!("Thread-local storage support not implemented!");
+            unimplemented!("Thread-local storage support not implemented!");
        }
        Opcode::GetPinnedReg | Opcode::SetPinnedReg => {
-            panic!("Pinned register support not implemented!");
+            unimplemented!("Pinned register support not implemented!");
        }
        Opcode::Icmp => {
@@ -2679,10 +2679,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let ty = ty.unwrap();
            assert!(is_valid_atomic_transaction_ty(ty));
            if endianness == Endianness::Little {
-                panic!("Little-endian atomic operations not implemented");
+                unimplemented!("Little-endian atomic operations not implemented");
            }
            if ty_bits(ty) < 32 {
-                panic!("Sub-word atomic operations not implemented");
+                unimplemented!("Sub-word atomic operations not implemented");
            }
            let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
            let (alu_op, rn) = match op {
@@ -2701,7 +2701,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    });
                    (choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), tmp.to_reg())
                }
-                _ => panic!("AtomicRmw operation type {:?} not implemented", op),
+                _ => unimplemented!("AtomicRmw operation type {:?} not implemented", op),
            };
            let mem = MemArg::reg(addr, flags);
            ctx.emit(Inst::AtomicRmw {
@@ -2721,10 +2721,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let ty = ty.unwrap();
            assert!(is_valid_atomic_transaction_ty(ty));
            if endianness == Endianness::Little {
-                panic!("Little-endian atomic operations not implemented");
+                unimplemented!("Little-endian atomic operations not implemented");
            }
            if ty_bits(ty) < 32 {
-                panic!("Sub-word atomic operations not implemented");
+                unimplemented!("Sub-word atomic operations not implemented");
            }
            let mem = MemArg::reg(addr, flags);
            ctx.emit(Inst::gen_move(rd, rm, ty));
@@ -2865,13 +2865,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::UwidenLow
        | Opcode::UwidenHigh
        | Opcode::WideningPairwiseDotProductS
        | Opcode::SqmulRoundSat
        | Opcode::FvpromoteLow
        | Opcode::Fvdemote => {
            // TODO
-            panic!("Vector ops not implemented.");
+            unimplemented!("Vector ops not implemented.");
        }
-        Opcode::Isplit | Opcode::Iconcat => panic!("Wide integer ops not implemented."),
+        Opcode::Isplit | Opcode::Iconcat => unimplemented!("Wide integer ops not implemented."),
        Opcode::Spill
        | Opcode::Fill
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -6001,6 +6001,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            unimplemented!("Vector split/concat ops not implemented.");
        }
        Opcode::SqmulRoundSat => unimplemented!("unimplemented lowering for opcode {:?}", op),
        // Opcodes that should be removed by legalization. These should
        // eventually be removed if/when we replace in-situ legalization with
        // something better.
--- a/cranelift/codegen/src/preopt.serialized
+++ b/cranelift/codegen/src/preopt.serialized
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -574,6 +574,7 @@ where
        Opcode::AtomicStore => unimplemented!("AtomicStore"),
        Opcode::Fence => unimplemented!("Fence"),
        Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
        Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
        // TODO: these instructions should be removed once the new backend makes these obsolete
        // (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -1885,8 +1885,12 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            let arg = pop1_with_bitcast(state, type_of(op), builder);
            state.push1(builder.ins().popcnt(arg));
        }
-        Operator::I16x8Q15MulrSatS
+        Operator::I16x8Q15MulrSatS => {
-        | Operator::I16x8ExtMulLowI8x16S
+            let (a, b) = pop2_with_bitcast(state, I16X8, builder);
            state.push1(builder.ins().sqmul_round_sat(a, b))
        }
        Operator::I16x8ExtMulLowI8x16S
        | Operator::I16x8ExtMulHighI8x16S
        | Operator::I16x8ExtMulLowI8x16U
        | Operator::I16x8ExtMulHighI8x16U