rebase and ran cargo fmt

Copyright (c) 2021, Arm Limited.
This commit is contained in:
Sam Parker
2021-07-09 10:13:04 +01:00
parent 541a4ee428
commit f2806a9192
5 changed files with 176 additions and 153 deletions

View File

@@ -287,13 +287,21 @@ fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -
| machreg_to_vec(rd.to_reg())
}
fn enc_vec_rrr_long(q: u32, u: u32, size: u32, bit14: u32, rm: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
debug_assert_eq!(q & 0b1, q);
debug_assert_eq!(u & 0b1, u);
debug_assert_eq!(size & 0b11, size);
debug_assert_eq!(bit14 & 0b1, bit14);
fn enc_vec_rrr_long(
q: u32,
u: u32,
size: u32,
bit14: u32,
rm: Reg,
rn: Reg,
rd: Writable<Reg>,
) -> u32 {
debug_assert_eq!(q & 0b1, q);
debug_assert_eq!(u & 0b1, u);
debug_assert_eq!(size & 0b11, size);
debug_assert_eq!(bit14 & 0b1, bit14);
0b0_0_0_01110_00_1_00000_100000_00000_00000
0b0_0_0_01110_00_1_00000_100000_00000_00000
| q << 30
| u << 29
| size << 22
@@ -2207,7 +2215,15 @@ impl MachInstEmit for Inst {
VecRRRLongOp::Umlal16 => (0b1, 0b01, 0b0),
VecRRRLongOp::Umlal32 => (0b1, 0b10, 0b0),
};
sink.put4(enc_vec_rrr_long(high_half as u32, u, size, bit14, rm, rn, rd));
sink.put4(enc_vec_rrr_long(
high_half as u32,
u,
size,
bit14,
rm,
rn,
rd,
));
}
&Inst::VecRRR {
rd,
@@ -2289,9 +2305,9 @@ impl MachInstEmit for Inst {
}
};
let top11 = if is_float {
top11 | enc_float_size << 1
top11 | enc_float_size << 1
} else {
top11
top11
};
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
}

View File

@@ -3705,7 +3705,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(16),
rn: vreg(12),
rm: vreg(1),
high_half: false
high_half: false,
},
"90C1210E",
"smull v16.8h, v12.8b, v1.8b",
@@ -3717,7 +3717,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(15),
rn: vreg(11),
rm: vreg(2),
high_half: false
high_half: false,
},
"6FC1222E",
"umull v15.8h, v11.8b, v2.8b",
@@ -3729,7 +3729,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(4),
rn: vreg(8),
rm: vreg(16),
high_half: false
high_half: false,
},
"0481302E",
"umlal v4.8h, v8.8b, v16.8b",

View File

@@ -412,7 +412,6 @@ pub enum VecRRRLongOp {
Umlal32,
}
/// A vector operation on a pair of elements with one register.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecPairOp {
@@ -2159,9 +2158,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
alu_op, rd, rn, rm, ..
} => {
match alu_op {
VecRRRLongOp::Umlal8
| VecRRRLongOp::Umlal16
| VecRRRLongOp::Umlal32 => collector.add_mod(rd),
VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
collector.add_mod(rd)
}
_ => collector.add_def(rd),
};
collector.add_use(rn);
@@ -2985,9 +2984,9 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
..
} => {
match alu_op {
VecRRRLongOp::Umlal8
| VecRRRLongOp::Umlal16
| VecRRRLongOp::Umlal32 => map_mod(mapper, rd),
VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
map_mod(mapper, rd)
}
_ => map_def(mapper, rd),
};
map_use(mapper, rn);
@@ -4212,42 +4211,60 @@ impl Inst {
high_half,
} => {
let (op, dest_size, src_size) = match (alu_op, high_half) {
(VecRRRLongOp::Smull8, false) =>
("smull", VectorSize::Size16x8, VectorSize::Size8x8),
(VecRRRLongOp::Smull8, true) =>
("smull2", VectorSize::Size16x8, VectorSize::Size8x16),
(VecRRRLongOp::Smull16, false) =>
("smull", VectorSize::Size32x4, VectorSize::Size16x4),
(VecRRRLongOp::Smull16, true) =>
("smull2", VectorSize::Size32x4, VectorSize::Size16x8),
(VecRRRLongOp::Smull32, false) =>
("smull", VectorSize::Size64x2, VectorSize::Size32x2),
(VecRRRLongOp::Smull32, true) =>
("smull2", VectorSize::Size64x2, VectorSize::Size32x4),
(VecRRRLongOp::Umull8, false) =>
("umull", VectorSize::Size16x8, VectorSize::Size8x8),
(VecRRRLongOp::Umull8, true) =>
("umull2", VectorSize::Size16x8, VectorSize::Size8x16),
(VecRRRLongOp::Umull16, false) =>
("umull", VectorSize::Size32x4, VectorSize::Size16x4),
(VecRRRLongOp::Umull16, true) =>
("umull2", VectorSize::Size32x4, VectorSize::Size16x8),
(VecRRRLongOp::Umull32, false) =>
("umull", VectorSize::Size64x2, VectorSize::Size32x2),
(VecRRRLongOp::Umull32, true) =>
("umull2", VectorSize::Size64x2, VectorSize::Size32x4),
(VecRRRLongOp::Umlal8, false) =>
("umlal", VectorSize::Size16x8, VectorSize::Size8x8),
(VecRRRLongOp::Umlal8, true) =>
("umlal2", VectorSize::Size16x8, VectorSize::Size8x16),
(VecRRRLongOp::Umlal16, false) =>
("umlal", VectorSize::Size32x4, VectorSize::Size16x4),
(VecRRRLongOp::Umlal16, true) =>
("umlal2", VectorSize::Size32x4, VectorSize::Size16x8),
(VecRRRLongOp::Umlal32, false) =>
("umlal", VectorSize::Size64x2, VectorSize::Size32x2),
(VecRRRLongOp::Umlal32, true) =>
("umlal2", VectorSize::Size64x2, VectorSize::Size32x4),
(VecRRRLongOp::Smull8, false) => {
("smull", VectorSize::Size16x8, VectorSize::Size8x8)
}
(VecRRRLongOp::Smull8, true) => {
("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
}
(VecRRRLongOp::Smull16, false) => {
("smull", VectorSize::Size32x4, VectorSize::Size16x4)
}
(VecRRRLongOp::Smull16, true) => {
("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
}
(VecRRRLongOp::Smull32, false) => {
("smull", VectorSize::Size64x2, VectorSize::Size32x2)
}
(VecRRRLongOp::Smull32, true) => {
("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
}
(VecRRRLongOp::Umull8, false) => {
("umull", VectorSize::Size16x8, VectorSize::Size8x8)
}
(VecRRRLongOp::Umull8, true) => {
("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
}
(VecRRRLongOp::Umull16, false) => {
("umull", VectorSize::Size32x4, VectorSize::Size16x4)
}
(VecRRRLongOp::Umull16, true) => {
("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
}
(VecRRRLongOp::Umull32, false) => {
("umull", VectorSize::Size64x2, VectorSize::Size32x2)
}
(VecRRRLongOp::Umull32, true) => {
("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
}
(VecRRRLongOp::Umlal8, false) => {
("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
}
(VecRRRLongOp::Umlal8, true) => {
("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
}
(VecRRRLongOp::Umlal16, false) => {
("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
}
(VecRRRLongOp::Umlal16, true) => {
("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
}
(VecRRRLongOp::Umlal32, false) => {
("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
}
(VecRRRLongOp::Umlal32, true) => {
("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
}
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
let rn = show_vreg_vector(rn, mb_rru, src_size);

View File

@@ -1253,11 +1253,10 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
None
}
pub(crate) fn match_vec_long_mul<C: LowerCtx<I = Inst>>(
c: &mut C,
insn: IRInst,
ext_op: Opcode
ext_op: Opcode,
) -> Option<(VecRRRLongOp, regalloc::Reg, regalloc::Reg, bool)> {
let inputs = insn_inputs(c, insn);
if let Some(lhs) = maybe_input_insn(c, inputs[0], ext_op) {
@@ -1268,41 +1267,26 @@ pub(crate) fn match_vec_long_mul<C: LowerCtx<I = Inst>>(
let rm = put_input_in_reg(c, rhs_input, NarrowValueMode::None);
let lane_type = c.output_ty(insn, 0).lane_type();
match (lane_type, ext_op) {
(I16, Opcode::SwidenLow) =>
return Some((VecRRRLongOp::Smull8, rn, rm, false)),
(I16, Opcode::SwidenHigh) =>
return Some((VecRRRLongOp::Smull8, rn, rm, true)),
(I16, Opcode::UwidenLow) =>
return Some((VecRRRLongOp::Umull8, rn, rm, false)),
(I16, Opcode::UwidenHigh) =>
return Some((VecRRRLongOp::Umull8, rn, rm, true)),
(I32, Opcode::SwidenLow) =>
return Some((VecRRRLongOp::Smull16, rn, rm, false)),
(I32, Opcode::SwidenHigh) =>
return Some((VecRRRLongOp::Smull16, rn, rm, true)),
(I32, Opcode::UwidenLow) =>
return Some((VecRRRLongOp::Umull16, rn, rm, false)),
(I32, Opcode::UwidenHigh) =>
return Some((VecRRRLongOp::Umull16, rn, rm, true)),
(I64, Opcode::SwidenLow) =>
return Some((VecRRRLongOp::Smull32, rn, rm, false)),
(I64, Opcode::SwidenHigh) =>
return Some((VecRRRLongOp::Smull32, rn, rm, true)),
(I64, Opcode::UwidenLow) =>
return Some((VecRRRLongOp::Umull32, rn, rm, false)),
(I64, Opcode::UwidenHigh) =>
return Some((VecRRRLongOp::Umull32, rn, rm, true)),
_ => {},
};
}
(I16, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull8, rn, rm, false)),
(I16, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull8, rn, rm, true)),
(I16, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull8, rn, rm, false)),
(I16, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull8, rn, rm, true)),
(I32, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull16, rn, rm, false)),
(I32, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull16, rn, rm, true)),
(I32, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull16, rn, rm, false)),
(I32, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull16, rn, rm, true)),
(I64, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull32, rn, rm, false)),
(I64, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull32, rn, rm, true)),
(I64, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull32, rn, rm, false)),
(I64, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull32, rn, rm, true)),
_ => {}
};
}
}
None
}
pub(crate) fn lower_i64x2_mul<C: LowerCtx<I = Inst>>(
c: &mut C,
insn: IRInst,
) {
pub(crate) fn lower_i64x2_mul<C: LowerCtx<I = Inst>>(c: &mut C, insn: IRInst) {
let inputs = insn_inputs(c, insn);
let outputs = insn_outputs(c, insn);
let rd = get_output_reg(c, outputs[0]).regs()[0];

View File

@@ -246,80 +246,86 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Imul => {
let ty = ty.unwrap();
if ty == I128 {
let lhs = put_input_in_regs(ctx, inputs[0]);
let rhs = put_input_in_regs(ctx, inputs[1]);
let dst = get_output_reg(ctx, outputs[0]);
assert_eq!(lhs.len(), 2);
assert_eq!(rhs.len(), 2);
assert_eq!(dst.len(), 2);
let lhs = put_input_in_regs(ctx, inputs[0]);
let rhs = put_input_in_regs(ctx, inputs[1]);
let dst = get_output_reg(ctx, outputs[0]);
assert_eq!(lhs.len(), 2);
assert_eq!(rhs.len(), 2);
assert_eq!(dst.len(), 2);
// 128bit mul formula:
// dst_lo = lhs_lo * rhs_lo
// dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo)
//
// We can convert the above formula into the following
// umulh dst_hi, lhs_lo, rhs_lo
// madd dst_hi, lhs_lo, rhs_hi, dst_hi
// madd dst_hi, lhs_hi, rhs_lo, dst_hi
// mul dst_lo, lhs_lo, rhs_lo
// 128bit mul formula:
// dst_lo = lhs_lo * rhs_lo
// dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo)
//
// We can convert the above formula into the following
// umulh dst_hi, lhs_lo, rhs_lo
// madd dst_hi, lhs_lo, rhs_hi, dst_hi
// madd dst_hi, lhs_hi, rhs_lo, dst_hi
// mul dst_lo, lhs_lo, rhs_lo
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::UMulH,
rd: dst.regs()[1],
rn: lhs.regs()[0],
rm: rhs.regs()[0],
});
ctx.emit(Inst::AluRRRR {
alu_op: ALUOp3::MAdd64,
rd: dst.regs()[1],
rn: lhs.regs()[0],
rm: rhs.regs()[1],
ra: dst.regs()[1].to_reg(),
});
ctx.emit(Inst::AluRRRR {
alu_op: ALUOp3::MAdd64,
rd: dst.regs()[1],
rn: lhs.regs()[1],
rm: rhs.regs()[0],
ra: dst.regs()[1].to_reg(),
});
ctx.emit(Inst::AluRRRR {
alu_op: ALUOp3::MAdd64,
rd: dst.regs()[0],
rn: lhs.regs()[0],
rm: rhs.regs()[0],
ra: zero_reg(),
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::UMulH,
rd: dst.regs()[1],
rn: lhs.regs()[0],
rm: rhs.regs()[0],
});
ctx.emit(Inst::AluRRRR {
alu_op: ALUOp3::MAdd64,
rd: dst.regs()[1],
rn: lhs.regs()[0],
rm: rhs.regs()[1],
ra: dst.regs()[1].to_reg(),
});
ctx.emit(Inst::AluRRRR {
alu_op: ALUOp3::MAdd64,
rd: dst.regs()[1],
rn: lhs.regs()[1],
rm: rhs.regs()[0],
ra: dst.regs()[1].to_reg(),
});
ctx.emit(Inst::AluRRRR {
alu_op: ALUOp3::MAdd64,
rd: dst.regs()[0],
rn: lhs.regs()[0],
rm: rhs.regs()[0],
ra: zero_reg(),
});
} else if ty.is_vector() {
for ext_op in &[Opcode::SwidenLow, Opcode::SwidenHigh,
Opcode::UwidenLow, Opcode::UwidenHigh] {
if let Some((alu_op, rn, rm, high_half)) = match_vec_long_mul(ctx, insn, *ext_op) {
for ext_op in &[
Opcode::SwidenLow,
Opcode::SwidenHigh,
Opcode::UwidenLow,
Opcode::UwidenHigh,
] {
if let Some((alu_op, rn, rm, high_half)) =
match_vec_long_mul(ctx, insn, *ext_op)
{
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::VecRRRLong {
alu_op,
rd,
rn,
rm,
high_half,
});
return Ok(());
}
}
if ty == I64X2 {
lower_i64x2_mul(ctx, insn);
} else {
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::VecRRRLong {
alu_op,
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Mul,
rd,
rn,
rm,
high_half,
size: VectorSize::from_ty(ty),
});
return Ok(());
}
}
if ty == I64X2 {
lower_i64x2_mul(ctx, insn);
} else {
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Mul,
rd,
rn,
rm,
size: VectorSize::from_ty(ty),
});
}
} else {
} else {
let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);