From f2806a919284feab01e04b5d041e61e1de715d38 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Fri, 9 Jul 2021 10:13:04 +0100 Subject: [PATCH] rebase and ran cargo fmt Copyright (c) 2021, Arm Limited. --- .../codegen/src/isa/aarch64/inst/emit.rs | 34 +++-- .../src/isa/aarch64/inst/emit_tests.rs | 6 +- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 103 +++++++------ cranelift/codegen/src/isa/aarch64/lower.rs | 50 +++---- .../codegen/src/isa/aarch64/lower_inst.rs | 136 +++++++++--------- 5 files changed, 176 insertions(+), 153 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 91335e3bba..54886b010e 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -287,13 +287,21 @@ fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable) - | machreg_to_vec(rd.to_reg()) } -fn enc_vec_rrr_long(q: u32, u: u32, size: u32, bit14: u32, rm: Reg, rn: Reg, rd: Writable) -> u32 { - debug_assert_eq!(q & 0b1, q); - debug_assert_eq!(u & 0b1, u); - debug_assert_eq!(size & 0b11, size); - debug_assert_eq!(bit14 & 0b1, bit14); +fn enc_vec_rrr_long( + q: u32, + u: u32, + size: u32, + bit14: u32, + rm: Reg, + rn: Reg, + rd: Writable, +) -> u32 { + debug_assert_eq!(q & 0b1, q); + debug_assert_eq!(u & 0b1, u); + debug_assert_eq!(size & 0b11, size); + debug_assert_eq!(bit14 & 0b1, bit14); - 0b0_0_0_01110_00_1_00000_100000_00000_00000 + 0b0_0_0_01110_00_1_00000_100000_00000_00000 | q << 30 | u << 29 | size << 22 @@ -2207,7 +2215,15 @@ impl MachInstEmit for Inst { VecRRRLongOp::Umlal16 => (0b1, 0b01, 0b0), VecRRRLongOp::Umlal32 => (0b1, 0b10, 0b0), }; - sink.put4(enc_vec_rrr_long(high_half as u32, u, size, bit14, rm, rn, rd)); + sink.put4(enc_vec_rrr_long( + high_half as u32, + u, + size, + bit14, + rm, + rn, + rd, + )); } &Inst::VecRRR { rd, @@ -2289,9 +2305,9 @@ impl MachInstEmit for Inst { } }; let top11 = if is_float { - top11 | enc_float_size << 1 + top11 | enc_float_size << 1 } else { - top11 + top11 }; sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd)); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 46d3fd6b69..d3afca2a77 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -3705,7 +3705,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(16), rn: vreg(12), rm: vreg(1), - high_half: false + high_half: false, }, "90C1210E", "smull v16.8h, v12.8b, v1.8b", @@ -3717,7 +3717,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(15), rn: vreg(11), rm: vreg(2), - high_half: false + high_half: false, }, "6FC1222E", "umull v15.8h, v11.8b, v2.8b", @@ -3729,7 +3729,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(4), rn: vreg(8), rm: vreg(16), - high_half: false + high_half: false, }, "0481302E", "umlal v4.8h, v8.8b, v16.8b", diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 6ca551360b..8c993492bd 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -412,7 +412,6 @@ pub enum VecRRRLongOp { Umlal32, } - /// A vector operation on a pair of elements with one register. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum VecPairOp { @@ -2159,9 +2158,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { alu_op, rd, rn, rm, .. } => { match alu_op { - VecRRRLongOp::Umlal8 - | VecRRRLongOp::Umlal16 - | VecRRRLongOp::Umlal32 => collector.add_mod(rd), + VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => { + collector.add_mod(rd) + } _ => collector.add_def(rd), }; collector.add_use(rn); @@ -2985,9 +2984,9 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { .. } => { match alu_op { - VecRRRLongOp::Umlal8 - | VecRRRLongOp::Umlal16 - | VecRRRLongOp::Umlal32 => map_mod(mapper, rd), + VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => { + map_mod(mapper, rd) + } _ => map_def(mapper, rd), }; map_use(mapper, rn); @@ -4212,42 +4211,60 @@ impl Inst { high_half, } => { let (op, dest_size, src_size) = match (alu_op, high_half) { - (VecRRRLongOp::Smull8, false) => - ("smull", VectorSize::Size16x8, VectorSize::Size8x8), - (VecRRRLongOp::Smull8, true) => - ("smull2", VectorSize::Size16x8, VectorSize::Size8x16), - (VecRRRLongOp::Smull16, false) => - ("smull", VectorSize::Size32x4, VectorSize::Size16x4), - (VecRRRLongOp::Smull16, true) => - ("smull2", VectorSize::Size32x4, VectorSize::Size16x8), - (VecRRRLongOp::Smull32, false) => - ("smull", VectorSize::Size64x2, VectorSize::Size32x2), - (VecRRRLongOp::Smull32, true) => - ("smull2", VectorSize::Size64x2, VectorSize::Size32x4), - (VecRRRLongOp::Umull8, false) => - ("umull", VectorSize::Size16x8, VectorSize::Size8x8), - (VecRRRLongOp::Umull8, true) => - ("umull2", VectorSize::Size16x8, VectorSize::Size8x16), - (VecRRRLongOp::Umull16, false) => - ("umull", VectorSize::Size32x4, VectorSize::Size16x4), - (VecRRRLongOp::Umull16, true) => - ("umull2", VectorSize::Size32x4, VectorSize::Size16x8), - (VecRRRLongOp::Umull32, false) => - ("umull", VectorSize::Size64x2, VectorSize::Size32x2), - (VecRRRLongOp::Umull32, true) => - ("umull2", VectorSize::Size64x2, VectorSize::Size32x4), - (VecRRRLongOp::Umlal8, false) => - ("umlal", VectorSize::Size16x8, VectorSize::Size8x8), - (VecRRRLongOp::Umlal8, true) => - ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16), - (VecRRRLongOp::Umlal16, false) => - ("umlal", VectorSize::Size32x4, VectorSize::Size16x4), - (VecRRRLongOp::Umlal16, true) => - ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8), - (VecRRRLongOp::Umlal32, false) => - ("umlal", VectorSize::Size64x2, VectorSize::Size32x2), - (VecRRRLongOp::Umlal32, true) => - ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4), + (VecRRRLongOp::Smull8, false) => { + ("smull", VectorSize::Size16x8, VectorSize::Size8x8) + } + (VecRRRLongOp::Smull8, true) => { + ("smull2", VectorSize::Size16x8, VectorSize::Size8x16) + } + (VecRRRLongOp::Smull16, false) => { + ("smull", VectorSize::Size32x4, VectorSize::Size16x4) + } + (VecRRRLongOp::Smull16, true) => { + ("smull2", VectorSize::Size32x4, VectorSize::Size16x8) + } + (VecRRRLongOp::Smull32, false) => { + ("smull", VectorSize::Size64x2, VectorSize::Size32x2) + } + (VecRRRLongOp::Smull32, true) => { + ("smull2", VectorSize::Size64x2, VectorSize::Size32x4) + } + (VecRRRLongOp::Umull8, false) => { + ("umull", VectorSize::Size16x8, VectorSize::Size8x8) + } + (VecRRRLongOp::Umull8, true) => { + ("umull2", VectorSize::Size16x8, VectorSize::Size8x16) + } + (VecRRRLongOp::Umull16, false) => { + ("umull", VectorSize::Size32x4, VectorSize::Size16x4) + } + (VecRRRLongOp::Umull16, true) => { + ("umull2", VectorSize::Size32x4, VectorSize::Size16x8) + } + (VecRRRLongOp::Umull32, false) => { + ("umull", VectorSize::Size64x2, VectorSize::Size32x2) + } + (VecRRRLongOp::Umull32, true) => { + ("umull2", VectorSize::Size64x2, VectorSize::Size32x4) + } + (VecRRRLongOp::Umlal8, false) => { + ("umlal", VectorSize::Size16x8, VectorSize::Size8x8) + } + (VecRRRLongOp::Umlal8, true) => { + ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16) + } + (VecRRRLongOp::Umlal16, false) => { + ("umlal", VectorSize::Size32x4, VectorSize::Size16x4) + } + (VecRRRLongOp::Umlal16, true) => { + ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8) + } + (VecRRRLongOp::Umlal32, false) => { + ("umlal", VectorSize::Size64x2, VectorSize::Size32x2) + } + (VecRRRLongOp::Umlal32, true) => { + ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4) + } }; let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size); let rn = show_vreg_vector(rn, mb_rru, src_size); diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 68f745f0b5..cd4af48e30 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1253,11 +1253,10 @@ pub(crate) fn maybe_input_insn_via_conv>( None } - pub(crate) fn match_vec_long_mul>( c: &mut C, insn: IRInst, - ext_op: Opcode + ext_op: Opcode, ) -> Option<(VecRRRLongOp, regalloc::Reg, regalloc::Reg, bool)> { let inputs = insn_inputs(c, insn); if let Some(lhs) = maybe_input_insn(c, inputs[0], ext_op) { @@ -1268,41 +1267,26 @@ pub(crate) fn match_vec_long_mul>( let rm = put_input_in_reg(c, rhs_input, NarrowValueMode::None); let lane_type = c.output_ty(insn, 0).lane_type(); match (lane_type, ext_op) { - (I16, Opcode::SwidenLow) => - return Some((VecRRRLongOp::Smull8, rn, rm, false)), - (I16, Opcode::SwidenHigh) => - return Some((VecRRRLongOp::Smull8, rn, rm, true)), - (I16, Opcode::UwidenLow) => - return Some((VecRRRLongOp::Umull8, rn, rm, false)), - (I16, Opcode::UwidenHigh) => - return Some((VecRRRLongOp::Umull8, rn, rm, true)), - (I32, Opcode::SwidenLow) => - return Some((VecRRRLongOp::Smull16, rn, rm, false)), - (I32, Opcode::SwidenHigh) => - return Some((VecRRRLongOp::Smull16, rn, rm, true)), - (I32, Opcode::UwidenLow) => - return Some((VecRRRLongOp::Umull16, rn, rm, false)), - (I32, Opcode::UwidenHigh) => - return Some((VecRRRLongOp::Umull16, rn, rm, true)), - (I64, Opcode::SwidenLow) => - return Some((VecRRRLongOp::Smull32, rn, rm, false)), - (I64, Opcode::SwidenHigh) => - return Some((VecRRRLongOp::Smull32, rn, rm, true)), - (I64, Opcode::UwidenLow) => - return Some((VecRRRLongOp::Umull32, rn, rm, false)), - (I64, Opcode::UwidenHigh) => - return Some((VecRRRLongOp::Umull32, rn, rm, true)), - _ => {}, - }; - } + (I16, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull8, rn, rm, false)), + (I16, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull8, rn, rm, true)), + (I16, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull8, rn, rm, false)), + (I16, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull8, rn, rm, true)), + (I32, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull16, rn, rm, false)), + (I32, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull16, rn, rm, true)), + (I32, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull16, rn, rm, false)), + (I32, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull16, rn, rm, true)), + (I64, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull32, rn, rm, false)), + (I64, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull32, rn, rm, true)), + (I64, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull32, rn, rm, false)), + (I64, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull32, rn, rm, true)), + _ => {} + }; + } } None } -pub(crate) fn lower_i64x2_mul>( - c: &mut C, - insn: IRInst, -) { +pub(crate) fn lower_i64x2_mul>(c: &mut C, insn: IRInst) { let inputs = insn_inputs(c, insn); let outputs = insn_outputs(c, insn); let rd = get_output_reg(c, outputs[0]).regs()[0]; diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index a4c37fba1e..754e2f7b95 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -246,80 +246,86 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Imul => { let ty = ty.unwrap(); if ty == I128 { - let lhs = put_input_in_regs(ctx, inputs[0]); - let rhs = put_input_in_regs(ctx, inputs[1]); - let dst = get_output_reg(ctx, outputs[0]); - assert_eq!(lhs.len(), 2); - assert_eq!(rhs.len(), 2); - assert_eq!(dst.len(), 2); + let lhs = put_input_in_regs(ctx, inputs[0]); + let rhs = put_input_in_regs(ctx, inputs[1]); + let dst = get_output_reg(ctx, outputs[0]); + assert_eq!(lhs.len(), 2); + assert_eq!(rhs.len(), 2); + assert_eq!(dst.len(), 2); - // 128bit mul formula: - // dst_lo = lhs_lo * rhs_lo - // dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo) - // - // We can convert the above formula into the following - // umulh dst_hi, lhs_lo, rhs_lo - // madd dst_hi, lhs_lo, rhs_hi, dst_hi - // madd dst_hi, lhs_hi, rhs_lo, dst_hi - // mul dst_lo, lhs_lo, rhs_lo + // 128bit mul formula: + // dst_lo = lhs_lo * rhs_lo + // dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo) + // + // We can convert the above formula into the following + // umulh dst_hi, lhs_lo, rhs_lo + // madd dst_hi, lhs_lo, rhs_hi, dst_hi + // madd dst_hi, lhs_hi, rhs_lo, dst_hi + // mul dst_lo, lhs_lo, rhs_lo - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::UMulH, - rd: dst.regs()[1], - rn: lhs.regs()[0], - rm: rhs.regs()[0], - }); - ctx.emit(Inst::AluRRRR { - alu_op: ALUOp3::MAdd64, - rd: dst.regs()[1], - rn: lhs.regs()[0], - rm: rhs.regs()[1], - ra: dst.regs()[1].to_reg(), - }); - ctx.emit(Inst::AluRRRR { - alu_op: ALUOp3::MAdd64, - rd: dst.regs()[1], - rn: lhs.regs()[1], - rm: rhs.regs()[0], - ra: dst.regs()[1].to_reg(), - }); - ctx.emit(Inst::AluRRRR { - alu_op: ALUOp3::MAdd64, - rd: dst.regs()[0], - rn: lhs.regs()[0], - rm: rhs.regs()[0], - ra: zero_reg(), - }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::UMulH, + rd: dst.regs()[1], + rn: lhs.regs()[0], + rm: rhs.regs()[0], + }); + ctx.emit(Inst::AluRRRR { + alu_op: ALUOp3::MAdd64, + rd: dst.regs()[1], + rn: lhs.regs()[0], + rm: rhs.regs()[1], + ra: dst.regs()[1].to_reg(), + }); + ctx.emit(Inst::AluRRRR { + alu_op: ALUOp3::MAdd64, + rd: dst.regs()[1], + rn: lhs.regs()[1], + rm: rhs.regs()[0], + ra: dst.regs()[1].to_reg(), + }); + ctx.emit(Inst::AluRRRR { + alu_op: ALUOp3::MAdd64, + rd: dst.regs()[0], + rn: lhs.regs()[0], + rm: rhs.regs()[0], + ra: zero_reg(), + }); } else if ty.is_vector() { - for ext_op in &[Opcode::SwidenLow, Opcode::SwidenHigh, - Opcode::UwidenLow, Opcode::UwidenHigh] { - if let Some((alu_op, rn, rm, high_half)) = match_vec_long_mul(ctx, insn, *ext_op) { + for ext_op in &[ + Opcode::SwidenLow, + Opcode::SwidenHigh, + Opcode::UwidenLow, + Opcode::UwidenHigh, + ] { + if let Some((alu_op, rn, rm, high_half)) = + match_vec_long_mul(ctx, insn, *ext_op) + { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::VecRRRLong { + alu_op, + rd, + rn, + rm, + high_half, + }); + return Ok(()); + } + } + if ty == I64X2 { + lower_i64x2_mul(ctx, insn); + } else { + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::VecRRRLong { - alu_op, + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::Mul, rd, rn, rm, - high_half, + size: VectorSize::from_ty(ty), }); - return Ok(()); - } } - if ty == I64X2 { - lower_i64x2_mul(ctx, insn); - } else { - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::VecRRR { - alu_op: VecALUOp::Mul, - rd, - rn, - rm, - size: VectorSize::from_ty(ty), - }); - } - } else { + } else { let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);