rebase and ran cargo fmt

2021-07-09 10:13:04 +01:00
parent 541a4ee428
commit f2806a9192
5 changed files with 176 additions and 153 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -287,13 +287,21 @@ fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -
        | machreg_to_vec(rd.to_reg())
 }
-fn enc_vec_rrr_long(q: u32, u: u32, size: u32, bit14: u32, rm: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
+fn enc_vec_rrr_long(
-  debug_assert_eq!(q & 0b1, q);
+    q: u32,
-  debug_assert_eq!(u & 0b1, u);
+    u: u32,
-  debug_assert_eq!(size & 0b11, size);
+    size: u32,
-  debug_assert_eq!(bit14 & 0b1, bit14);
+    bit14: u32,
    rm: Reg,
    rn: Reg,
    rd: Writable<Reg>,
 ) -> u32 {
    debug_assert_eq!(q & 0b1, q);
    debug_assert_eq!(u & 0b1, u);
    debug_assert_eq!(size & 0b11, size);
    debug_assert_eq!(bit14 & 0b1, bit14);
-  0b0_0_0_01110_00_1_00000_100000_00000_00000
+    0b0_0_0_01110_00_1_00000_100000_00000_00000
        | q << 30
        | u << 29
        | size << 22
@@ -2207,7 +2215,15 @@ impl MachInstEmit for Inst {
                    VecRRRLongOp::Umlal16 => (0b1, 0b01, 0b0),
                    VecRRRLongOp::Umlal32 => (0b1, 0b10, 0b0),
                };
-                sink.put4(enc_vec_rrr_long(high_half as u32, u, size, bit14, rm, rn, rd));
+                sink.put4(enc_vec_rrr_long(
                    high_half as u32,
                    u,
                    size,
                    bit14,
                    rm,
                    rn,
                    rd,
                ));
            }
            &Inst::VecRRR {
                rd,
@@ -2289,9 +2305,9 @@ impl MachInstEmit for Inst {
                    }
                };
                let top11 = if is_float {
-                  top11 | enc_float_size << 1
+                    top11 | enc_float_size << 1
                } else {
-                  top11
+                    top11
                };
                sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
            }
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -3705,7 +3705,7 @@ fn test_aarch64_binemit() {
            rd: writable_vreg(16),
            rn: vreg(12),
            rm: vreg(1),
-            high_half: false
+            high_half: false,
        },
        "90C1210E",
        "smull v16.8h, v12.8b, v1.8b",
@@ -3717,7 +3717,7 @@ fn test_aarch64_binemit() {
            rd: writable_vreg(15),
            rn: vreg(11),
            rm: vreg(2),
-            high_half: false
+            high_half: false,
        },
        "6FC1222E",
        "umull v15.8h, v11.8b, v2.8b",
@@ -3729,7 +3729,7 @@ fn test_aarch64_binemit() {
            rd: writable_vreg(4),
            rn: vreg(8),
            rm: vreg(16),
-            high_half: false
+            high_half: false,
        },
        "0481302E",
        "umlal v4.8h, v8.8b, v16.8b",
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -412,7 +412,6 @@ pub enum VecRRRLongOp {
    Umlal32,
 }
 /// A vector operation on a pair of elements with one register.
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
 pub enum VecPairOp {
@@ -2159,9 +2158,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            alu_op, rd, rn, rm, ..
        } => {
            match alu_op {
-                VecRRRLongOp::Umlal8
+                VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
-                | VecRRRLongOp::Umlal16
+                    collector.add_mod(rd)
-                | VecRRRLongOp::Umlal32 => collector.add_mod(rd),
+                }
                _ => collector.add_def(rd),
            };
            collector.add_use(rn);
@@ -2985,9 +2984,9 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            ..
        } => {
            match alu_op {
-                VecRRRLongOp::Umlal8
+                VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
-                | VecRRRLongOp::Umlal16
+                    map_mod(mapper, rd)
-                | VecRRRLongOp::Umlal32 => map_mod(mapper, rd),
+                }
                _ => map_def(mapper, rd),
            };
            map_use(mapper, rn);
@@ -4212,42 +4211,60 @@ impl Inst {
                high_half,
            } => {
                let (op, dest_size, src_size) = match (alu_op, high_half) {
-                    (VecRRRLongOp::Smull8, false) =>
+                    (VecRRRLongOp::Smull8, false) => {
-                        ("smull", VectorSize::Size16x8, VectorSize::Size8x8),
+                        ("smull", VectorSize::Size16x8, VectorSize::Size8x8)
-                    (VecRRRLongOp::Smull8, true) =>
+                    }
-                        ("smull2", VectorSize::Size16x8, VectorSize::Size8x16),
+                    (VecRRRLongOp::Smull8, true) => {
-                    (VecRRRLongOp::Smull16, false) =>
+                        ("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
-                        ("smull", VectorSize::Size32x4, VectorSize::Size16x4),
+                    }
-                    (VecRRRLongOp::Smull16, true) =>
+                    (VecRRRLongOp::Smull16, false) => {
-                        ("smull2", VectorSize::Size32x4, VectorSize::Size16x8),
+                        ("smull", VectorSize::Size32x4, VectorSize::Size16x4)
-                    (VecRRRLongOp::Smull32, false) =>
+                    }
-                        ("smull", VectorSize::Size64x2, VectorSize::Size32x2),
+                    (VecRRRLongOp::Smull16, true) => {
-                    (VecRRRLongOp::Smull32, true) =>
+                        ("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
-                        ("smull2", VectorSize::Size64x2, VectorSize::Size32x4),
+                    }
-                    (VecRRRLongOp::Umull8, false) =>
+                    (VecRRRLongOp::Smull32, false) => {
-                        ("umull", VectorSize::Size16x8, VectorSize::Size8x8),
+                        ("smull", VectorSize::Size64x2, VectorSize::Size32x2)
-                    (VecRRRLongOp::Umull8, true) =>
+                    }
-                        ("umull2", VectorSize::Size16x8, VectorSize::Size8x16),
+                    (VecRRRLongOp::Smull32, true) => {
-                    (VecRRRLongOp::Umull16, false) =>
+                        ("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
-                        ("umull", VectorSize::Size32x4, VectorSize::Size16x4),
+                    }
-                    (VecRRRLongOp::Umull16, true) =>
+                    (VecRRRLongOp::Umull8, false) => {
-                        ("umull2", VectorSize::Size32x4, VectorSize::Size16x8),
+                        ("umull", VectorSize::Size16x8, VectorSize::Size8x8)
-                    (VecRRRLongOp::Umull32, false) =>
+                    }
-                        ("umull", VectorSize::Size64x2, VectorSize::Size32x2),
+                    (VecRRRLongOp::Umull8, true) => {
-                    (VecRRRLongOp::Umull32, true) =>
+                        ("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
-                        ("umull2", VectorSize::Size64x2, VectorSize::Size32x4),
+                    }
-                    (VecRRRLongOp::Umlal8, false) =>
+                    (VecRRRLongOp::Umull16, false) => {
-                        ("umlal", VectorSize::Size16x8, VectorSize::Size8x8),
+                        ("umull", VectorSize::Size32x4, VectorSize::Size16x4)
-                    (VecRRRLongOp::Umlal8, true) =>
+                    }
-                        ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16),
+                    (VecRRRLongOp::Umull16, true) => {
-                    (VecRRRLongOp::Umlal16, false) =>
+                        ("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
-                        ("umlal", VectorSize::Size32x4, VectorSize::Size16x4),
+                    }
-                    (VecRRRLongOp::Umlal16, true) =>
+                    (VecRRRLongOp::Umull32, false) => {
-                        ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8),
+                        ("umull", VectorSize::Size64x2, VectorSize::Size32x2)
-                    (VecRRRLongOp::Umlal32, false) =>
+                    }
-                        ("umlal", VectorSize::Size64x2, VectorSize::Size32x2),
+                    (VecRRRLongOp::Umull32, true) => {
-                    (VecRRRLongOp::Umlal32, true) =>
+                        ("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
-                        ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4),
+                    }
                    (VecRRRLongOp::Umlal8, false) => {
                        ("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
                    }
                    (VecRRRLongOp::Umlal8, true) => {
                        ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
                    }
                    (VecRRRLongOp::Umlal16, false) => {
                        ("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
                    }
                    (VecRRRLongOp::Umlal16, true) => {
                        ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
                    }
                    (VecRRRLongOp::Umlal32, false) => {
                        ("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
                    }
                    (VecRRRLongOp::Umlal32, true) => {
                        ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
                    }
                };
                let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
                let rn = show_vreg_vector(rn, mb_rru, src_size);
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -1253,11 +1253,10 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
    None
 }
 pub(crate) fn match_vec_long_mul<C: LowerCtx<I = Inst>>(
    c: &mut C,
    insn: IRInst,
-    ext_op: Opcode
+    ext_op: Opcode,
 ) -> Option<(VecRRRLongOp, regalloc::Reg, regalloc::Reg, bool)> {
    let inputs = insn_inputs(c, insn);
    if let Some(lhs) = maybe_input_insn(c, inputs[0], ext_op) {
@@ -1268,41 +1267,26 @@ pub(crate) fn match_vec_long_mul<C: LowerCtx<I = Inst>>(
            let rm = put_input_in_reg(c, rhs_input, NarrowValueMode::None);
            let lane_type = c.output_ty(insn, 0).lane_type();
            match (lane_type, ext_op) {
-                (I16, Opcode::SwidenLow) =>
+                (I16, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull8, rn, rm, false)),
-                    return Some((VecRRRLongOp::Smull8, rn, rm, false)),
+                (I16, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull8, rn, rm, true)),
-                (I16, Opcode::SwidenHigh) =>
+                (I16, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull8, rn, rm, false)),
-                    return Some((VecRRRLongOp::Smull8, rn, rm, true)),
+                (I16, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull8, rn, rm, true)),
-                (I16, Opcode::UwidenLow) =>
+                (I32, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull16, rn, rm, false)),
-                    return Some((VecRRRLongOp::Umull8, rn, rm, false)),
+                (I32, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull16, rn, rm, true)),
-                (I16, Opcode::UwidenHigh) =>
+                (I32, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull16, rn, rm, false)),
-                    return Some((VecRRRLongOp::Umull8, rn, rm, true)),
+                (I32, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull16, rn, rm, true)),
-                (I32, Opcode::SwidenLow) =>
+                (I64, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull32, rn, rm, false)),
-                    return Some((VecRRRLongOp::Smull16, rn, rm, false)),
+                (I64, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull32, rn, rm, true)),
-                (I32, Opcode::SwidenHigh) =>
+                (I64, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull32, rn, rm, false)),
-                    return Some((VecRRRLongOp::Smull16, rn, rm, true)),
+                (I64, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull32, rn, rm, true)),
-                (I32, Opcode::UwidenLow) =>
+                _ => {}
-                    return Some((VecRRRLongOp::Umull16, rn, rm, false)),
+            };
-                (I32, Opcode::UwidenHigh) =>
+        }
                    return Some((VecRRRLongOp::Umull16, rn, rm, true)),
                (I64, Opcode::SwidenLow) =>
                    return Some((VecRRRLongOp::Smull32, rn, rm, false)),
                (I64, Opcode::SwidenHigh) =>
                    return Some((VecRRRLongOp::Smull32, rn, rm, true)),
                (I64, Opcode::UwidenLow) =>
                    return Some((VecRRRLongOp::Umull32, rn, rm, false)),
                (I64, Opcode::UwidenHigh) =>
                    return Some((VecRRRLongOp::Umull32, rn, rm, true)),
                _ => {},
             };
         }
    }
    None
 }
-pub(crate) fn lower_i64x2_mul<C: LowerCtx<I = Inst>>(
+pub(crate) fn lower_i64x2_mul<C: LowerCtx<I = Inst>>(c: &mut C, insn: IRInst) {
    c: &mut C,
    insn: IRInst,
 ) {
    let inputs = insn_inputs(c, insn);
    let outputs = insn_outputs(c, insn);
    let rd = get_output_reg(c, outputs[0]).regs()[0];
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -246,80 +246,86 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Imul => {
            let ty = ty.unwrap();
            if ty == I128 {
-              let lhs = put_input_in_regs(ctx, inputs[0]);
+                let lhs = put_input_in_regs(ctx, inputs[0]);
-              let rhs = put_input_in_regs(ctx, inputs[1]);
+                let rhs = put_input_in_regs(ctx, inputs[1]);
-              let dst = get_output_reg(ctx, outputs[0]);
+                let dst = get_output_reg(ctx, outputs[0]);
-              assert_eq!(lhs.len(), 2);
+                assert_eq!(lhs.len(), 2);
-              assert_eq!(rhs.len(), 2);
+                assert_eq!(rhs.len(), 2);
-              assert_eq!(dst.len(), 2);
+                assert_eq!(dst.len(), 2);
-              // 128bit mul formula:
+                // 128bit mul formula:
-              //   dst_lo = lhs_lo * rhs_lo
+                //   dst_lo = lhs_lo * rhs_lo
-              //   dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo)
+                //   dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo)
-              //
+                //
-              // We can convert the above formula into the following
+                // We can convert the above formula into the following
-              // umulh   dst_hi, lhs_lo, rhs_lo
+                // umulh   dst_hi, lhs_lo, rhs_lo
-              // madd    dst_hi, lhs_lo, rhs_hi, dst_hi
+                // madd    dst_hi, lhs_lo, rhs_hi, dst_hi
-              // madd    dst_hi, lhs_hi, rhs_lo, dst_hi
+                // madd    dst_hi, lhs_hi, rhs_lo, dst_hi
-              // mul     dst_lo, lhs_lo, rhs_lo
+                // mul     dst_lo, lhs_lo, rhs_lo
-              ctx.emit(Inst::AluRRR {
+                ctx.emit(Inst::AluRRR {
-                  alu_op: ALUOp::UMulH,
+                    alu_op: ALUOp::UMulH,
-                  rd: dst.regs()[1],
+                    rd: dst.regs()[1],
-                  rn: lhs.regs()[0],
+                    rn: lhs.regs()[0],
-                  rm: rhs.regs()[0],
+                    rm: rhs.regs()[0],
-              });
+                });
-              ctx.emit(Inst::AluRRRR {
+                ctx.emit(Inst::AluRRRR {
-                  alu_op: ALUOp3::MAdd64,
+                    alu_op: ALUOp3::MAdd64,
-                  rd: dst.regs()[1],
+                    rd: dst.regs()[1],
-                  rn: lhs.regs()[0],
+                    rn: lhs.regs()[0],
-                  rm: rhs.regs()[1],
+                    rm: rhs.regs()[1],
-                  ra: dst.regs()[1].to_reg(),
+                    ra: dst.regs()[1].to_reg(),
-              });
+                });
-              ctx.emit(Inst::AluRRRR {
+                ctx.emit(Inst::AluRRRR {
-                  alu_op: ALUOp3::MAdd64,
+                    alu_op: ALUOp3::MAdd64,
-                  rd: dst.regs()[1],
+                    rd: dst.regs()[1],
-                  rn: lhs.regs()[1],
+                    rn: lhs.regs()[1],
-                  rm: rhs.regs()[0],
+                    rm: rhs.regs()[0],
-                  ra: dst.regs()[1].to_reg(),
+                    ra: dst.regs()[1].to_reg(),
-              });
+                });
-              ctx.emit(Inst::AluRRRR {
+                ctx.emit(Inst::AluRRRR {
-                  alu_op: ALUOp3::MAdd64,
+                    alu_op: ALUOp3::MAdd64,
-                  rd: dst.regs()[0],
+                    rd: dst.regs()[0],
-                  rn: lhs.regs()[0],
+                    rn: lhs.regs()[0],
-                  rm: rhs.regs()[0],
+                    rm: rhs.regs()[0],
-                  ra: zero_reg(),
+                    ra: zero_reg(),
-              });
+                });
            } else if ty.is_vector() {
-                for ext_op in &[Opcode::SwidenLow, Opcode::SwidenHigh,
+                for ext_op in &[
-                                Opcode::UwidenLow, Opcode::UwidenHigh] {
+                    Opcode::SwidenLow,
-                  if let Some((alu_op, rn, rm, high_half)) = match_vec_long_mul(ctx, insn, *ext_op) {
+                    Opcode::SwidenHigh,
                    Opcode::UwidenLow,
                    Opcode::UwidenHigh,
                ] {
                    if let Some((alu_op, rn, rm, high_half)) =
                        match_vec_long_mul(ctx, insn, *ext_op)
                    {
                        let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
                        ctx.emit(Inst::VecRRRLong {
                            alu_op,
                            rd,
                            rn,
                            rm,
                            high_half,
                        });
                        return Ok(());
                    }
                }
                if ty == I64X2 {
                    lower_i64x2_mul(ctx, insn);
                } else {
                    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
                    let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                    ctx.emit(Inst::VecRRRLong {
+                    ctx.emit(Inst::VecRRR {
-                        alu_op,
+                        alu_op: VecALUOp::Mul,
                        rd,
                        rn,
                        rm,
-                        high_half,
+                        size: VectorSize::from_ty(ty),
                    });
                    return Ok(());
                  }
                }
-                if ty == I64X2 {
+            } else {
                  lower_i64x2_mul(ctx, insn);
                } else {
                  let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                  let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
                  let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
                  ctx.emit(Inst::VecRRR {
                      alu_op: VecALUOp::Mul,
                      rd,
                      rn,
                      rm,
                      size: VectorSize::from_ty(ty),
                  });
                }
             } else {
                let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);