Refactor and turn on lowering for extend-add-pairwise

2021-07-29 20:19:38 -07:00
parent e373ddfe1b
commit e519fca61c
8 changed files with 107 additions and 80 deletions
--- a/build.rs
+++ b/build.rs
@@ -156,10 +156,8 @@ fn write_testsuite_tests(
    let testname = extract_name(path);

    writeln!(out, "#[test]")?;
-    if x64_should_panic(testsuite, &testname, strategy) {
-        writeln!(out, r#"#[should_panic]"#)?;
    // Ignore when using QEMU for running tests (limited memory).
-    } else if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
+    if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
        writeln!(out, "#[ignore]")?;
    }

@@ -182,19 +180,6 @@ fn write_testsuite_tests(
    Ok(())
 }

-/// For x64 backend features that are not supported yet, mark tests as panicking, so
-/// they stop "passing" once the features are properly implemented.
-fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
-    if !platform_is_x64() || strategy != "Cranelift" {
-        return false;
-    }
-
-    match (testsuite, testname) {
-        _ => {}
-    }
-    false
-}
-
 /// Ignore tests that aren't supported yet.
 fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
    match strategy {
@@ -217,6 +202,13 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
            ("simd", _) if cfg!(feature = "old-x86-backend") => return true,
            // No simd support yet for s390x.
            ("simd", _) if platform_is_s390x() => return true,
+            // These are new instructions that are only known to be supported for x64.
+            ("simd", "simd_i16x8_extadd_pairwise_i8x16")
+            | ("simd", "simd_i32x4_extadd_pairwise_i16x8")
+                if !platform_is_x64() =>
+            {
+                return true
+            }
            _ => {}
        },
        _ => panic!("unrecognized strategy"),
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -4114,20 +4114,7 @@ pub(crate) fn define(
        Inst::new(
            "uwiden_high",
            r#"
-        Lane-wise integer extended pairwise addition producing extended results
-        (twice wider results than the input)
-            "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "extended_pairwise_add_signed",
-            r#"
-        Widen the high lanes of `x` using signed extension.
+            Widen the high lanes of `x` using unsigned extension.

            This will double the lane width and halve the number of lanes.
            "#,
@@ -4137,17 +4124,24 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

+    let x = &Operand::new("x", I8or16or32xN);
+    let y = &Operand::new("y", I8or16or32xN);
+    let a = &Operand::new("a", I8or16or32xN);
+
    ig.push(
        Inst::new(
-            "extended_pairwise_add_unsigned",
+            "iadd_pairwise",
            r#"
-        Widen the high lanes of `x` extending with zeros.
-
-        This will double the lane width and halve the number of lanes.
+        Does lane-wise integer pairwise addition on two operands, putting the
+        combined results into a single vector result. Here a pair refers to adjacent
+        lanes in a vector, i.e. i*2 + (i*2+1) for i == num_lanes/2. The first operand
+        pairwise add results will make up the low half of the resulting vector while
+        the second operand pairwise add results will make up the upper half of the
+        resulting vector.
            "#,
-            &formats.unary,
+            &formats.binary,
        )
-        .operands_in(vec![x])
+        .operands_in(vec![x, y])
        .operands_out(vec![a]),
    );

--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -3519,11 +3519,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            });
        }

-        Opcode::ExtendedPairwiseAddSigned
-        | Opcode::ExtendedPairwiseAddUnsigned
-        | Opcode::ConstAddr
-        | Opcode::Vconcat
-        | Opcode::Vsplit => unimplemented!("lowering {}", op),
+        Opcode::IaddPairwise | Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => {
+            unimplemented!("lowering {}", op)
+        }
    }

    Ok(())
--- a/cranelift/codegen/src/isa/s390x/lower.rs
+++ b/cranelift/codegen/src/isa/s390x/lower.rs
@@ -2869,8 +2869,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::SqmulRoundSat
        | Opcode::FvpromoteLow
        | Opcode::Fvdemote
-        | Opcode::ExtendedPairwiseAddSigned
-        | Opcode::ExtendedPairwiseAddUnsigned => {
+        | Opcode::IaddPairwise => {
            // TODO
            unimplemented!("Vector ops not implemented.");
        }
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -4927,18 +4927,33 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                }
            }
        }
-        Opcode::ExtendedPairwiseAddSigned | Opcode::ExtendedPairwiseAddUnsigned => {
-            // Extended pairwise addition instructions computes extended sums within adjacent
-            // pairs of lanes of a SIMD vector, producing a SIMD vector with half as many lanes.
-            // Instruction sequences taken from instruction SPEC PR https://github.com/WebAssembly/simd/pull/380
-            /*
-            let input_ty = ctx.input_ty(insn, 0);
+        Opcode::IaddPairwise => {
+            if let (Some(swiden_low), Some(swiden_high)) = (
+                matches_input(ctx, inputs[0], Opcode::SwidenLow),
+                matches_input(ctx, inputs[1], Opcode::SwidenHigh),
+            ) {
+                let swiden_input = &[
+                    InsnInput {
+                        insn: swiden_low,
+                        input: 0,
+                    },
+                    InsnInput {
+                        insn: swiden_high,
+                        input: 0,
+                    },
+                ];
+
+                let input_ty = ctx.input_ty(swiden_low, 0);
                let output_ty = ctx.output_ty(insn, 0);
-            let src = put_input_in_reg(ctx, inputs[0]);
+                let src0 = put_input_in_reg(ctx, swiden_input[0]);
+                let src1 = put_input_in_reg(ctx, swiden_input[1]);
                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            unreachable!();
-            match op {
-                Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) {
+                if src0 != src1 {
+                    unimplemented!(
+                        "iadd_pairwise not implemented for general case with different inputs"
+                    );
+                }
+                match (input_ty, output_ty) {
                    (types::I8X16, types::I16X8) => {
                        static MUL_CONST: [u8; 16] = [0x01; 16];
                        let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
@@ -4949,7 +4964,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                            RegMem::reg(mul_const_reg.to_reg()),
                            dst,
                        ));
-                        ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst));
+                        ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src0), dst));
                    }
                    (types::I16X8, types::I32X4) => {
                        static MUL_CONST: [u8; 16] = [
@@ -4959,25 +4974,49 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
                        let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
                        ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
-                        ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
+                        ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
                        ctx.emit(Inst::xmm_rm_r(
                            SseOpcode::Pmaddwd,
                            RegMem::reg(mul_const_reg.to_reg()),
                            dst,
                        ));
                    }
-                    _ => unreachable!(
-                        "Type pattern not supported {:?}-{:?} not supported for {:?}.",
-                        input_ty, output_ty, op
-                    ),
+                    _ => {
+                        unimplemented!("Type not supported for {:?}", op);
+                    }
+                }
+            } else if let (Some(uwiden_low), Some(uwiden_high)) = (
+                matches_input(ctx, inputs[0], Opcode::UwidenLow),
+                matches_input(ctx, inputs[1], Opcode::UwidenHigh),
+            ) {
+                let uwiden_input = &[
+                    InsnInput {
+                        insn: uwiden_low,
+                        input: 0,
                    },
-                Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) {
+                    InsnInput {
+                        insn: uwiden_high,
+                        input: 0,
+                    },
+                ];
+
+                let input_ty = ctx.input_ty(uwiden_low, 0);
+                let output_ty = ctx.output_ty(insn, 0);
+                let src0 = put_input_in_reg(ctx, uwiden_input[0]);
+                let src1 = put_input_in_reg(ctx, uwiden_input[1]);
+                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+                if src0 != src1 {
+                    unimplemented!(
+                        "iadd_pairwise not implemented for general case with different inputs"
+                    );
+                }
+                match (input_ty, output_ty) {
                    (types::I8X16, types::I16X8) => {
                        static MUL_CONST: [u8; 16] = [0x01; 16];
                        let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
                        let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
                        ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
-                        ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
+                        ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
                        ctx.emit(Inst::xmm_rm_r(
                            SseOpcode::Pmaddubsw,
                            RegMem::reg(mul_const_reg.to_reg()),
@@ -4997,7 +5036,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                            pxor_const_reg,
                            types::I16X8,
                        ));
-                        ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
+                        ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
                        ctx.emit(Inst::xmm_rm_r(
                            SseOpcode::Pxor,
                            RegMem::reg(pxor_const_reg.to_reg()),
@@ -5021,7 +5060,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                            RegMem::reg(madd_const_reg.to_reg()),
                            dst,
                        ));
-
                        static ADDD_CONST2: [u8; 16] = [
                            0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
                            0x00, 0x00, 0x01, 0x00,
@@ -5040,14 +5078,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                            dst,
                        ));
                    }
-                    _ => unreachable!(
-                        "Type pattern not supported {:?}-{:?} not supported for {:?}.",
-                        input_ty, output_ty, op
-                    ),
-                },
-                _ => unreachable!("{:?} not supported.", op),
+                    _ => {
+                        unimplemented!("Type not supported for {:?}", op);
+                    }
+                }
+            } else {
+                unimplemented!("Operands not supported for {:?}", op);
            }
-            */
        }
        Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
            let input_ty = ctx.input_ty(insn, 0);
--- a/cranelift/codegen/src/preopt.serialized
+++ b/cranelift/codegen/src/preopt.serialized
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -630,8 +630,7 @@ where
        Opcode::Fence => unimplemented!("Fence"),
        Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
        Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
-        Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"),
-        Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"),
+        Opcode::IaddPairwise => unimplemented!("IaddPairwise"),

        // TODO: these instructions should be removed once the new backend makes these obsolete
        // (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -1881,19 +1881,27 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
        }
        Operator::I16x8ExtAddPairwiseI8x16S => {
            let a = pop1_with_bitcast(state, I8X16, builder);
-            state.push1(builder.ins().extended_pairwise_add_signed(a))
+            let widen_low = builder.ins().swiden_low(a);
+            let widen_high = builder.ins().swiden_high(a);
+            state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
        }
        Operator::I32x4ExtAddPairwiseI16x8S => {
            let a = pop1_with_bitcast(state, I16X8, builder);
-            state.push1(builder.ins().extended_pairwise_add_signed(a))
+            let widen_low = builder.ins().swiden_low(a);
+            let widen_high = builder.ins().swiden_high(a);
+            state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
        }
        Operator::I16x8ExtAddPairwiseI8x16U => {
            let a = pop1_with_bitcast(state, I8X16, builder);
-            state.push1(builder.ins().extended_pairwise_add_unsigned(a))
+            let widen_low = builder.ins().uwiden_low(a);
+            let widen_high = builder.ins().uwiden_high(a);
+            state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
        }
        Operator::I32x4ExtAddPairwiseI16x8U => {
            let a = pop1_with_bitcast(state, I16X8, builder);
-            state.push1(builder.ins().extended_pairwise_add_unsigned(a))
+            let widen_low = builder.ins().uwiden_low(a);
+            let widen_high = builder.ins().uwiden_high(a);
+            state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
        }
        Operator::F32x4Ceil | Operator::F64x2Ceil => {
            // This is something of a misuse of `type_of`, because that produces the return type