From f2806a919284feab01e04b5d041e61e1de715d38 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker@arm.com>
Date: Fri, 9 Jul 2021 10:13:04 +0100
Subject: [PATCH] rebase and ran cargo fmt

Copyright (c) 2021, Arm Limited.
---
 .../codegen/src/isa/aarch64/inst/emit.rs      |  34 +++--
 .../src/isa/aarch64/inst/emit_tests.rs        |   6 +-
 cranelift/codegen/src/isa/aarch64/inst/mod.rs | 103 +++++++------
 cranelift/codegen/src/isa/aarch64/lower.rs    |  50 +++----
 .../codegen/src/isa/aarch64/lower_inst.rs     | 136 +++++++++---------
 5 files changed, 176 insertions(+), 153 deletions(-)
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
index 91335e3bba..54886b010e 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -287,13 +287,21 @@ fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -
         | machreg_to_vec(rd.to_reg())
 }
 
-fn enc_vec_rrr_long(q: u32, u: u32, size: u32, bit14: u32, rm: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
-  debug_assert_eq!(q & 0b1, q);
-  debug_assert_eq!(u & 0b1, u);
-  debug_assert_eq!(size & 0b11, size);
-  debug_assert_eq!(bit14 & 0b1, bit14);
+fn enc_vec_rrr_long(
+    q: u32,
+    u: u32,
+    size: u32,
+    bit14: u32,
+    rm: Reg,
+    rn: Reg,
+    rd: Writable<Reg>,
+) -> u32 {
+    debug_assert_eq!(q & 0b1, q);
+    debug_assert_eq!(u & 0b1, u);
+    debug_assert_eq!(size & 0b11, size);
+    debug_assert_eq!(bit14 & 0b1, bit14);
 
-  0b0_0_0_01110_00_1_00000_100000_00000_00000
+    0b0_0_0_01110_00_1_00000_100000_00000_00000
         | q << 30
         | u << 29
         | size << 22
@@ -2207,7 +2215,15 @@ impl MachInstEmit for Inst {
                     VecRRRLongOp::Umlal16 => (0b1, 0b01, 0b0),
                     VecRRRLongOp::Umlal32 => (0b1, 0b10, 0b0),
                 };
-                sink.put4(enc_vec_rrr_long(high_half as u32, u, size, bit14, rm, rn, rd));
+                sink.put4(enc_vec_rrr_long(
+                    high_half as u32,
+                    u,
+                    size,
+                    bit14,
+                    rm,
+                    rn,
+                    rd,
+                ));
             }
             &Inst::VecRRR {
                 rd,
@@ -2289,9 +2305,9 @@ impl MachInstEmit for Inst {
                     }
                 };
                 let top11 = if is_float {
-                  top11 | enc_float_size << 1
+                    top11 | enc_float_size << 1
                 } else {
-                  top11
+                    top11
                 };
                 sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
             }
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
index 46d3fd6b69..d3afca2a77 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -3705,7 +3705,7 @@ fn test_aarch64_binemit() {
             rd: writable_vreg(16),
             rn: vreg(12),
             rm: vreg(1),
-            high_half: false
+            high_half: false,
         },
         "90C1210E",
         "smull v16.8h, v12.8b, v1.8b",
@@ -3717,7 +3717,7 @@ fn test_aarch64_binemit() {
             rd: writable_vreg(15),
             rn: vreg(11),
             rm: vreg(2),
-            high_half: false
+            high_half: false,
         },
         "6FC1222E",
         "umull v15.8h, v11.8b, v2.8b",
@@ -3729,7 +3729,7 @@ fn test_aarch64_binemit() {
             rd: writable_vreg(4),
             rn: vreg(8),
             rm: vreg(16),
-            high_half: false
+            high_half: false,
         },
         "0481302E",
         "umlal v4.8h, v8.8b, v16.8b",
diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
index 6ca551360b..8c993492bd 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -412,7 +412,6 @@ pub enum VecRRRLongOp {
     Umlal32,
 }
 
-
 /// A vector operation on a pair of elements with one register.
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
 pub enum VecPairOp {
@@ -2159,9 +2158,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             alu_op, rd, rn, rm, ..
         } => {
             match alu_op {
-                VecRRRLongOp::Umlal8
-                | VecRRRLongOp::Umlal16
-                | VecRRRLongOp::Umlal32 => collector.add_mod(rd),
+                VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
+                    collector.add_mod(rd)
+                }
                 _ => collector.add_def(rd),
             };
             collector.add_use(rn);
@@ -2985,9 +2984,9 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
             ..
         } => {
             match alu_op {
-                VecRRRLongOp::Umlal8
-                | VecRRRLongOp::Umlal16
-                | VecRRRLongOp::Umlal32 => map_mod(mapper, rd),
+                VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
+                    map_mod(mapper, rd)
+                }
                 _ => map_def(mapper, rd),
             };
             map_use(mapper, rn);
@@ -4212,42 +4211,60 @@ impl Inst {
                 high_half,
             } => {
                 let (op, dest_size, src_size) = match (alu_op, high_half) {
-                    (VecRRRLongOp::Smull8, false) =>
-                        ("smull", VectorSize::Size16x8, VectorSize::Size8x8),
-                    (VecRRRLongOp::Smull8, true) =>
-                        ("smull2", VectorSize::Size16x8, VectorSize::Size8x16),
-                    (VecRRRLongOp::Smull16, false) =>
-                        ("smull", VectorSize::Size32x4, VectorSize::Size16x4),
-                    (VecRRRLongOp::Smull16, true) =>
-                        ("smull2", VectorSize::Size32x4, VectorSize::Size16x8),
-                    (VecRRRLongOp::Smull32, false) =>
-                        ("smull", VectorSize::Size64x2, VectorSize::Size32x2),
-                    (VecRRRLongOp::Smull32, true) =>
-                        ("smull2", VectorSize::Size64x2, VectorSize::Size32x4),
-                    (VecRRRLongOp::Umull8, false) =>
-                        ("umull", VectorSize::Size16x8, VectorSize::Size8x8),
-                    (VecRRRLongOp::Umull8, true) =>
-                        ("umull2", VectorSize::Size16x8, VectorSize::Size8x16),
-                    (VecRRRLongOp::Umull16, false) =>
-                        ("umull", VectorSize::Size32x4, VectorSize::Size16x4),
-                    (VecRRRLongOp::Umull16, true) =>
-                        ("umull2", VectorSize::Size32x4, VectorSize::Size16x8),
-                    (VecRRRLongOp::Umull32, false) =>
-                        ("umull", VectorSize::Size64x2, VectorSize::Size32x2),
-                    (VecRRRLongOp::Umull32, true) =>
-                        ("umull2", VectorSize::Size64x2, VectorSize::Size32x4),
-                    (VecRRRLongOp::Umlal8, false) =>
-                        ("umlal", VectorSize::Size16x8, VectorSize::Size8x8),
-                    (VecRRRLongOp::Umlal8, true) =>
-                        ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16),
-                    (VecRRRLongOp::Umlal16, false) =>
-                        ("umlal", VectorSize::Size32x4, VectorSize::Size16x4),
-                    (VecRRRLongOp::Umlal16, true) =>
-                        ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8),
-                    (VecRRRLongOp::Umlal32, false) =>
-                        ("umlal", VectorSize::Size64x2, VectorSize::Size32x2),
-                    (VecRRRLongOp::Umlal32, true) =>
-                        ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4),
+                    (VecRRRLongOp::Smull8, false) => {
+                        ("smull", VectorSize::Size16x8, VectorSize::Size8x8)
+                    }
+                    (VecRRRLongOp::Smull8, true) => {
+                        ("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
+                    }
+                    (VecRRRLongOp::Smull16, false) => {
+                        ("smull", VectorSize::Size32x4, VectorSize::Size16x4)
+                    }
+                    (VecRRRLongOp::Smull16, true) => {
+                        ("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
+                    }
+                    (VecRRRLongOp::Smull32, false) => {
+                        ("smull", VectorSize::Size64x2, VectorSize::Size32x2)
+                    }
+                    (VecRRRLongOp::Smull32, true) => {
+                        ("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
+                    }
+                    (VecRRRLongOp::Umull8, false) => {
+                        ("umull", VectorSize::Size16x8, VectorSize::Size8x8)
+                    }
+                    (VecRRRLongOp::Umull8, true) => {
+                        ("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
+                    }
+                    (VecRRRLongOp::Umull16, false) => {
+                        ("umull", VectorSize::Size32x4, VectorSize::Size16x4)
+                    }
+                    (VecRRRLongOp::Umull16, true) => {
+                        ("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
+                    }
+                    (VecRRRLongOp::Umull32, false) => {
+                        ("umull", VectorSize::Size64x2, VectorSize::Size32x2)
+                    }
+                    (VecRRRLongOp::Umull32, true) => {
+                        ("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
+                    }
+                    (VecRRRLongOp::Umlal8, false) => {
+                        ("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
+                    }
+                    (VecRRRLongOp::Umlal8, true) => {
+                        ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
+                    }
+                    (VecRRRLongOp::Umlal16, false) => {
+                        ("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
+                    }
+                    (VecRRRLongOp::Umlal16, true) => {
+                        ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
+                    }
+                    (VecRRRLongOp::Umlal32, false) => {
+                        ("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
+                    }
+                    (VecRRRLongOp::Umlal32, true) => {
+                        ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
+                    }
                 };
                 let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
                 let rn = show_vreg_vector(rn, mb_rru, src_size);
diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs
index 68f745f0b5..cd4af48e30 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -1253,11 +1253,10 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
     None
 }
 
-
 pub(crate) fn match_vec_long_mul<C: LowerCtx<I = Inst>>(
     c: &mut C,
     insn: IRInst,
-    ext_op: Opcode
+    ext_op: Opcode,
 ) -> Option<(VecRRRLongOp, regalloc::Reg, regalloc::Reg, bool)> {
     let inputs = insn_inputs(c, insn);
     if let Some(lhs) = maybe_input_insn(c, inputs[0], ext_op) {
@@ -1268,41 +1267,26 @@ pub(crate) fn match_vec_long_mul<C: LowerCtx<I = Inst>>(
             let rm = put_input_in_reg(c, rhs_input, NarrowValueMode::None);
             let lane_type = c.output_ty(insn, 0).lane_type();
             match (lane_type, ext_op) {
-                (I16, Opcode::SwidenLow) =>
-                    return Some((VecRRRLongOp::Smull8, rn, rm, false)),
-                (I16, Opcode::SwidenHigh) =>
-                    return Some((VecRRRLongOp::Smull8, rn, rm, true)),
-                (I16, Opcode::UwidenLow) =>
-                    return Some((VecRRRLongOp::Umull8, rn, rm, false)),
-                (I16, Opcode::UwidenHigh) =>
-                    return Some((VecRRRLongOp::Umull8, rn, rm, true)),
-                (I32, Opcode::SwidenLow) =>
-                    return Some((VecRRRLongOp::Smull16, rn, rm, false)),
-                (I32, Opcode::SwidenHigh) =>
-                    return Some((VecRRRLongOp::Smull16, rn, rm, true)),
-                (I32, Opcode::UwidenLow) =>
-                    return Some((VecRRRLongOp::Umull16, rn, rm, false)),
-                (I32, Opcode::UwidenHigh) =>
-                    return Some((VecRRRLongOp::Umull16, rn, rm, true)),
-                (I64, Opcode::SwidenLow) =>
-                    return Some((VecRRRLongOp::Smull32, rn, rm, false)),
-                (I64, Opcode::SwidenHigh) =>
-                    return Some((VecRRRLongOp::Smull32, rn, rm, true)),
-                (I64, Opcode::UwidenLow) =>
-                    return Some((VecRRRLongOp::Umull32, rn, rm, false)),
-                (I64, Opcode::UwidenHigh) =>
-                    return Some((VecRRRLongOp::Umull32, rn, rm, true)),
-                _ => {},
-             };
-         }
+                (I16, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull8, rn, rm, false)),
+                (I16, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull8, rn, rm, true)),
+                (I16, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull8, rn, rm, false)),
+                (I16, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull8, rn, rm, true)),
+                (I32, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull16, rn, rm, false)),
+                (I32, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull16, rn, rm, true)),
+                (I32, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull16, rn, rm, false)),
+                (I32, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull16, rn, rm, true)),
+                (I64, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull32, rn, rm, false)),
+                (I64, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull32, rn, rm, true)),
+                (I64, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull32, rn, rm, false)),
+                (I64, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull32, rn, rm, true)),
+                _ => {}
+            };
+        }
     }
     None
 }
 
-pub(crate) fn lower_i64x2_mul<C: LowerCtx<I = Inst>>(
-    c: &mut C,
-    insn: IRInst,
-) {
+pub(crate) fn lower_i64x2_mul<C: LowerCtx<I = Inst>>(c: &mut C, insn: IRInst) {
     let inputs = insn_inputs(c, insn);
     let outputs = insn_outputs(c, insn);
     let rd = get_output_reg(c, outputs[0]).regs()[0];
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index a4c37fba1e..754e2f7b95 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -246,80 +246,86 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         Opcode::Imul => {
             let ty = ty.unwrap();
             if ty == I128 {
-              let lhs = put_input_in_regs(ctx, inputs[0]);
-              let rhs = put_input_in_regs(ctx, inputs[1]);
-              let dst = get_output_reg(ctx, outputs[0]);
-              assert_eq!(lhs.len(), 2);
-              assert_eq!(rhs.len(), 2);
-              assert_eq!(dst.len(), 2);
+                let lhs = put_input_in_regs(ctx, inputs[0]);
+                let rhs = put_input_in_regs(ctx, inputs[1]);
+                let dst = get_output_reg(ctx, outputs[0]);
+                assert_eq!(lhs.len(), 2);
+                assert_eq!(rhs.len(), 2);
+                assert_eq!(dst.len(), 2);
 
-              // 128bit mul formula:
-              //   dst_lo = lhs_lo * rhs_lo
-              //   dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo)
-              //
-              // We can convert the above formula into the following
-              // umulh   dst_hi, lhs_lo, rhs_lo
-              // madd    dst_hi, lhs_lo, rhs_hi, dst_hi
-              // madd    dst_hi, lhs_hi, rhs_lo, dst_hi
-              // mul     dst_lo, lhs_lo, rhs_lo
+                // 128bit mul formula:
+                //   dst_lo = lhs_lo * rhs_lo
+                //   dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo)
+                //
+                // We can convert the above formula into the following
+                // umulh   dst_hi, lhs_lo, rhs_lo
+                // madd    dst_hi, lhs_lo, rhs_hi, dst_hi
+                // madd    dst_hi, lhs_hi, rhs_lo, dst_hi
+                // mul     dst_lo, lhs_lo, rhs_lo
 
-              ctx.emit(Inst::AluRRR {
-                  alu_op: ALUOp::UMulH,
-                  rd: dst.regs()[1],
-                  rn: lhs.regs()[0],
-                  rm: rhs.regs()[0],
-              });
-              ctx.emit(Inst::AluRRRR {
-                  alu_op: ALUOp3::MAdd64,
-                  rd: dst.regs()[1],
-                  rn: lhs.regs()[0],
-                  rm: rhs.regs()[1],
-                  ra: dst.regs()[1].to_reg(),
-              });
-              ctx.emit(Inst::AluRRRR {
-                  alu_op: ALUOp3::MAdd64,
-                  rd: dst.regs()[1],
-                  rn: lhs.regs()[1],
-                  rm: rhs.regs()[0],
-                  ra: dst.regs()[1].to_reg(),
-              });
-              ctx.emit(Inst::AluRRRR {
-                  alu_op: ALUOp3::MAdd64,
-                  rd: dst.regs()[0],
-                  rn: lhs.regs()[0],
-                  rm: rhs.regs()[0],
-                  ra: zero_reg(),
-              });
+                ctx.emit(Inst::AluRRR {
+                    alu_op: ALUOp::UMulH,
+                    rd: dst.regs()[1],
+                    rn: lhs.regs()[0],
+                    rm: rhs.regs()[0],
+                });
+                ctx.emit(Inst::AluRRRR {
+                    alu_op: ALUOp3::MAdd64,
+                    rd: dst.regs()[1],
+                    rn: lhs.regs()[0],
+                    rm: rhs.regs()[1],
+                    ra: dst.regs()[1].to_reg(),
+                });
+                ctx.emit(Inst::AluRRRR {
+                    alu_op: ALUOp3::MAdd64,
+                    rd: dst.regs()[1],
+                    rn: lhs.regs()[1],
+                    rm: rhs.regs()[0],
+                    ra: dst.regs()[1].to_reg(),
+                });
+                ctx.emit(Inst::AluRRRR {
+                    alu_op: ALUOp3::MAdd64,
+                    rd: dst.regs()[0],
+                    rn: lhs.regs()[0],
+                    rm: rhs.regs()[0],
+                    ra: zero_reg(),
+                });
             } else if ty.is_vector() {
-                for ext_op in &[Opcode::SwidenLow, Opcode::SwidenHigh,
-                                Opcode::UwidenLow, Opcode::UwidenHigh] {
-                  if let Some((alu_op, rn, rm, high_half)) = match_vec_long_mul(ctx, insn, *ext_op) {
+                for ext_op in &[
+                    Opcode::SwidenLow,
+                    Opcode::SwidenHigh,
+                    Opcode::UwidenLow,
+                    Opcode::UwidenHigh,
+                ] {
+                    if let Some((alu_op, rn, rm, high_half)) =
+                        match_vec_long_mul(ctx, insn, *ext_op)
+                    {
+                        let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+                        ctx.emit(Inst::VecRRRLong {
+                            alu_op,
+                            rd,
+                            rn,
+                            rm,
+                            high_half,
+                        });
+                        return Ok(());
+                    }
+                }
+                if ty == I64X2 {
+                    lower_i64x2_mul(ctx, insn);
+                } else {
+                    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
                     let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                    ctx.emit(Inst::VecRRRLong {
-                        alu_op,
+                    ctx.emit(Inst::VecRRR {
+                        alu_op: VecALUOp::Mul,
                         rd,
                         rn,
                         rm,
-                        high_half,
+                        size: VectorSize::from_ty(ty),
                     });
-                    return Ok(());
-                  }
                 }
-                if ty == I64X2 {
-                  lower_i64x2_mul(ctx, insn);
-                } else {
-                  let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                  let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-                  let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                  ctx.emit(Inst::VecRRR {
-                      alu_op: VecALUOp::Mul,
-                      rd,
-                      rn,
-                      rm,
-                      size: VectorSize::from_ty(ty),
-                  });
-                }
-             } else {
+            } else {
                 let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
                 let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                 let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);