rebase and ran cargo fmt
Copyright (c) 2021, Arm Limited.
This commit is contained in:
@@ -287,13 +287,21 @@ fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -
|
||||
| machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
fn enc_vec_rrr_long(q: u32, u: u32, size: u32, bit14: u32, rm: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
|
||||
debug_assert_eq!(q & 0b1, q);
|
||||
debug_assert_eq!(u & 0b1, u);
|
||||
debug_assert_eq!(size & 0b11, size);
|
||||
debug_assert_eq!(bit14 & 0b1, bit14);
|
||||
fn enc_vec_rrr_long(
|
||||
q: u32,
|
||||
u: u32,
|
||||
size: u32,
|
||||
bit14: u32,
|
||||
rm: Reg,
|
||||
rn: Reg,
|
||||
rd: Writable<Reg>,
|
||||
) -> u32 {
|
||||
debug_assert_eq!(q & 0b1, q);
|
||||
debug_assert_eq!(u & 0b1, u);
|
||||
debug_assert_eq!(size & 0b11, size);
|
||||
debug_assert_eq!(bit14 & 0b1, bit14);
|
||||
|
||||
0b0_0_0_01110_00_1_00000_100000_00000_00000
|
||||
0b0_0_0_01110_00_1_00000_100000_00000_00000
|
||||
| q << 30
|
||||
| u << 29
|
||||
| size << 22
|
||||
@@ -2207,7 +2215,15 @@ impl MachInstEmit for Inst {
|
||||
VecRRRLongOp::Umlal16 => (0b1, 0b01, 0b0),
|
||||
VecRRRLongOp::Umlal32 => (0b1, 0b10, 0b0),
|
||||
};
|
||||
sink.put4(enc_vec_rrr_long(high_half as u32, u, size, bit14, rm, rn, rd));
|
||||
sink.put4(enc_vec_rrr_long(
|
||||
high_half as u32,
|
||||
u,
|
||||
size,
|
||||
bit14,
|
||||
rm,
|
||||
rn,
|
||||
rd,
|
||||
));
|
||||
}
|
||||
&Inst::VecRRR {
|
||||
rd,
|
||||
@@ -2289,9 +2305,9 @@ impl MachInstEmit for Inst {
|
||||
}
|
||||
};
|
||||
let top11 = if is_float {
|
||||
top11 | enc_float_size << 1
|
||||
top11 | enc_float_size << 1
|
||||
} else {
|
||||
top11
|
||||
top11
|
||||
};
|
||||
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
|
||||
}
|
||||
|
||||
@@ -3705,7 +3705,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(16),
|
||||
rn: vreg(12),
|
||||
rm: vreg(1),
|
||||
high_half: false
|
||||
high_half: false,
|
||||
},
|
||||
"90C1210E",
|
||||
"smull v16.8h, v12.8b, v1.8b",
|
||||
@@ -3717,7 +3717,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(15),
|
||||
rn: vreg(11),
|
||||
rm: vreg(2),
|
||||
high_half: false
|
||||
high_half: false,
|
||||
},
|
||||
"6FC1222E",
|
||||
"umull v15.8h, v11.8b, v2.8b",
|
||||
@@ -3729,7 +3729,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(4),
|
||||
rn: vreg(8),
|
||||
rm: vreg(16),
|
||||
high_half: false
|
||||
high_half: false,
|
||||
},
|
||||
"0481302E",
|
||||
"umlal v4.8h, v8.8b, v16.8b",
|
||||
|
||||
@@ -412,7 +412,6 @@ pub enum VecRRRLongOp {
|
||||
Umlal32,
|
||||
}
|
||||
|
||||
|
||||
/// A vector operation on a pair of elements with one register.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum VecPairOp {
|
||||
@@ -2159,9 +2158,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
alu_op, rd, rn, rm, ..
|
||||
} => {
|
||||
match alu_op {
|
||||
VecRRRLongOp::Umlal8
|
||||
| VecRRRLongOp::Umlal16
|
||||
| VecRRRLongOp::Umlal32 => collector.add_mod(rd),
|
||||
VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
|
||||
collector.add_mod(rd)
|
||||
}
|
||||
_ => collector.add_def(rd),
|
||||
};
|
||||
collector.add_use(rn);
|
||||
@@ -2985,9 +2984,9 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
..
|
||||
} => {
|
||||
match alu_op {
|
||||
VecRRRLongOp::Umlal8
|
||||
| VecRRRLongOp::Umlal16
|
||||
| VecRRRLongOp::Umlal32 => map_mod(mapper, rd),
|
||||
VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => {
|
||||
map_mod(mapper, rd)
|
||||
}
|
||||
_ => map_def(mapper, rd),
|
||||
};
|
||||
map_use(mapper, rn);
|
||||
@@ -4212,42 +4211,60 @@ impl Inst {
|
||||
high_half,
|
||||
} => {
|
||||
let (op, dest_size, src_size) = match (alu_op, high_half) {
|
||||
(VecRRRLongOp::Smull8, false) =>
|
||||
("smull", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
(VecRRRLongOp::Smull8, true) =>
|
||||
("smull2", VectorSize::Size16x8, VectorSize::Size8x16),
|
||||
(VecRRRLongOp::Smull16, false) =>
|
||||
("smull", VectorSize::Size32x4, VectorSize::Size16x4),
|
||||
(VecRRRLongOp::Smull16, true) =>
|
||||
("smull2", VectorSize::Size32x4, VectorSize::Size16x8),
|
||||
(VecRRRLongOp::Smull32, false) =>
|
||||
("smull", VectorSize::Size64x2, VectorSize::Size32x2),
|
||||
(VecRRRLongOp::Smull32, true) =>
|
||||
("smull2", VectorSize::Size64x2, VectorSize::Size32x4),
|
||||
(VecRRRLongOp::Umull8, false) =>
|
||||
("umull", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
(VecRRRLongOp::Umull8, true) =>
|
||||
("umull2", VectorSize::Size16x8, VectorSize::Size8x16),
|
||||
(VecRRRLongOp::Umull16, false) =>
|
||||
("umull", VectorSize::Size32x4, VectorSize::Size16x4),
|
||||
(VecRRRLongOp::Umull16, true) =>
|
||||
("umull2", VectorSize::Size32x4, VectorSize::Size16x8),
|
||||
(VecRRRLongOp::Umull32, false) =>
|
||||
("umull", VectorSize::Size64x2, VectorSize::Size32x2),
|
||||
(VecRRRLongOp::Umull32, true) =>
|
||||
("umull2", VectorSize::Size64x2, VectorSize::Size32x4),
|
||||
(VecRRRLongOp::Umlal8, false) =>
|
||||
("umlal", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
(VecRRRLongOp::Umlal8, true) =>
|
||||
("umlal2", VectorSize::Size16x8, VectorSize::Size8x16),
|
||||
(VecRRRLongOp::Umlal16, false) =>
|
||||
("umlal", VectorSize::Size32x4, VectorSize::Size16x4),
|
||||
(VecRRRLongOp::Umlal16, true) =>
|
||||
("umlal2", VectorSize::Size32x4, VectorSize::Size16x8),
|
||||
(VecRRRLongOp::Umlal32, false) =>
|
||||
("umlal", VectorSize::Size64x2, VectorSize::Size32x2),
|
||||
(VecRRRLongOp::Umlal32, true) =>
|
||||
("umlal2", VectorSize::Size64x2, VectorSize::Size32x4),
|
||||
(VecRRRLongOp::Smull8, false) => {
|
||||
("smull", VectorSize::Size16x8, VectorSize::Size8x8)
|
||||
}
|
||||
(VecRRRLongOp::Smull8, true) => {
|
||||
("smull2", VectorSize::Size16x8, VectorSize::Size8x16)
|
||||
}
|
||||
(VecRRRLongOp::Smull16, false) => {
|
||||
("smull", VectorSize::Size32x4, VectorSize::Size16x4)
|
||||
}
|
||||
(VecRRRLongOp::Smull16, true) => {
|
||||
("smull2", VectorSize::Size32x4, VectorSize::Size16x8)
|
||||
}
|
||||
(VecRRRLongOp::Smull32, false) => {
|
||||
("smull", VectorSize::Size64x2, VectorSize::Size32x2)
|
||||
}
|
||||
(VecRRRLongOp::Smull32, true) => {
|
||||
("smull2", VectorSize::Size64x2, VectorSize::Size32x4)
|
||||
}
|
||||
(VecRRRLongOp::Umull8, false) => {
|
||||
("umull", VectorSize::Size16x8, VectorSize::Size8x8)
|
||||
}
|
||||
(VecRRRLongOp::Umull8, true) => {
|
||||
("umull2", VectorSize::Size16x8, VectorSize::Size8x16)
|
||||
}
|
||||
(VecRRRLongOp::Umull16, false) => {
|
||||
("umull", VectorSize::Size32x4, VectorSize::Size16x4)
|
||||
}
|
||||
(VecRRRLongOp::Umull16, true) => {
|
||||
("umull2", VectorSize::Size32x4, VectorSize::Size16x8)
|
||||
}
|
||||
(VecRRRLongOp::Umull32, false) => {
|
||||
("umull", VectorSize::Size64x2, VectorSize::Size32x2)
|
||||
}
|
||||
(VecRRRLongOp::Umull32, true) => {
|
||||
("umull2", VectorSize::Size64x2, VectorSize::Size32x4)
|
||||
}
|
||||
(VecRRRLongOp::Umlal8, false) => {
|
||||
("umlal", VectorSize::Size16x8, VectorSize::Size8x8)
|
||||
}
|
||||
(VecRRRLongOp::Umlal8, true) => {
|
||||
("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)
|
||||
}
|
||||
(VecRRRLongOp::Umlal16, false) => {
|
||||
("umlal", VectorSize::Size32x4, VectorSize::Size16x4)
|
||||
}
|
||||
(VecRRRLongOp::Umlal16, true) => {
|
||||
("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)
|
||||
}
|
||||
(VecRRRLongOp::Umlal32, false) => {
|
||||
("umlal", VectorSize::Size64x2, VectorSize::Size32x2)
|
||||
}
|
||||
(VecRRRLongOp::Umlal32, true) => {
|
||||
("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)
|
||||
}
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
|
||||
let rn = show_vreg_vector(rn, mb_rru, src_size);
|
||||
|
||||
@@ -1253,11 +1253,10 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
|
||||
None
|
||||
}
|
||||
|
||||
|
||||
pub(crate) fn match_vec_long_mul<C: LowerCtx<I = Inst>>(
|
||||
c: &mut C,
|
||||
insn: IRInst,
|
||||
ext_op: Opcode
|
||||
ext_op: Opcode,
|
||||
) -> Option<(VecRRRLongOp, regalloc::Reg, regalloc::Reg, bool)> {
|
||||
let inputs = insn_inputs(c, insn);
|
||||
if let Some(lhs) = maybe_input_insn(c, inputs[0], ext_op) {
|
||||
@@ -1268,41 +1267,26 @@ pub(crate) fn match_vec_long_mul<C: LowerCtx<I = Inst>>(
|
||||
let rm = put_input_in_reg(c, rhs_input, NarrowValueMode::None);
|
||||
let lane_type = c.output_ty(insn, 0).lane_type();
|
||||
match (lane_type, ext_op) {
|
||||
(I16, Opcode::SwidenLow) =>
|
||||
return Some((VecRRRLongOp::Smull8, rn, rm, false)),
|
||||
(I16, Opcode::SwidenHigh) =>
|
||||
return Some((VecRRRLongOp::Smull8, rn, rm, true)),
|
||||
(I16, Opcode::UwidenLow) =>
|
||||
return Some((VecRRRLongOp::Umull8, rn, rm, false)),
|
||||
(I16, Opcode::UwidenHigh) =>
|
||||
return Some((VecRRRLongOp::Umull8, rn, rm, true)),
|
||||
(I32, Opcode::SwidenLow) =>
|
||||
return Some((VecRRRLongOp::Smull16, rn, rm, false)),
|
||||
(I32, Opcode::SwidenHigh) =>
|
||||
return Some((VecRRRLongOp::Smull16, rn, rm, true)),
|
||||
(I32, Opcode::UwidenLow) =>
|
||||
return Some((VecRRRLongOp::Umull16, rn, rm, false)),
|
||||
(I32, Opcode::UwidenHigh) =>
|
||||
return Some((VecRRRLongOp::Umull16, rn, rm, true)),
|
||||
(I64, Opcode::SwidenLow) =>
|
||||
return Some((VecRRRLongOp::Smull32, rn, rm, false)),
|
||||
(I64, Opcode::SwidenHigh) =>
|
||||
return Some((VecRRRLongOp::Smull32, rn, rm, true)),
|
||||
(I64, Opcode::UwidenLow) =>
|
||||
return Some((VecRRRLongOp::Umull32, rn, rm, false)),
|
||||
(I64, Opcode::UwidenHigh) =>
|
||||
return Some((VecRRRLongOp::Umull32, rn, rm, true)),
|
||||
_ => {},
|
||||
};
|
||||
}
|
||||
(I16, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull8, rn, rm, false)),
|
||||
(I16, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull8, rn, rm, true)),
|
||||
(I16, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull8, rn, rm, false)),
|
||||
(I16, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull8, rn, rm, true)),
|
||||
(I32, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull16, rn, rm, false)),
|
||||
(I32, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull16, rn, rm, true)),
|
||||
(I32, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull16, rn, rm, false)),
|
||||
(I32, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull16, rn, rm, true)),
|
||||
(I64, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull32, rn, rm, false)),
|
||||
(I64, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull32, rn, rm, true)),
|
||||
(I64, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull32, rn, rm, false)),
|
||||
(I64, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull32, rn, rm, true)),
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub(crate) fn lower_i64x2_mul<C: LowerCtx<I = Inst>>(
|
||||
c: &mut C,
|
||||
insn: IRInst,
|
||||
) {
|
||||
pub(crate) fn lower_i64x2_mul<C: LowerCtx<I = Inst>>(c: &mut C, insn: IRInst) {
|
||||
let inputs = insn_inputs(c, insn);
|
||||
let outputs = insn_outputs(c, insn);
|
||||
let rd = get_output_reg(c, outputs[0]).regs()[0];
|
||||
|
||||
@@ -246,80 +246,86 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::Imul => {
|
||||
let ty = ty.unwrap();
|
||||
if ty == I128 {
|
||||
let lhs = put_input_in_regs(ctx, inputs[0]);
|
||||
let rhs = put_input_in_regs(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
assert_eq!(lhs.len(), 2);
|
||||
assert_eq!(rhs.len(), 2);
|
||||
assert_eq!(dst.len(), 2);
|
||||
let lhs = put_input_in_regs(ctx, inputs[0]);
|
||||
let rhs = put_input_in_regs(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
assert_eq!(lhs.len(), 2);
|
||||
assert_eq!(rhs.len(), 2);
|
||||
assert_eq!(dst.len(), 2);
|
||||
|
||||
// 128bit mul formula:
|
||||
// dst_lo = lhs_lo * rhs_lo
|
||||
// dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo)
|
||||
//
|
||||
// We can convert the above formula into the following
|
||||
// umulh dst_hi, lhs_lo, rhs_lo
|
||||
// madd dst_hi, lhs_lo, rhs_hi, dst_hi
|
||||
// madd dst_hi, lhs_hi, rhs_lo, dst_hi
|
||||
// mul dst_lo, lhs_lo, rhs_lo
|
||||
// 128bit mul formula:
|
||||
// dst_lo = lhs_lo * rhs_lo
|
||||
// dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo)
|
||||
//
|
||||
// We can convert the above formula into the following
|
||||
// umulh dst_hi, lhs_lo, rhs_lo
|
||||
// madd dst_hi, lhs_lo, rhs_hi, dst_hi
|
||||
// madd dst_hi, lhs_hi, rhs_lo, dst_hi
|
||||
// mul dst_lo, lhs_lo, rhs_lo
|
||||
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: ALUOp::UMulH,
|
||||
rd: dst.regs()[1],
|
||||
rn: lhs.regs()[0],
|
||||
rm: rhs.regs()[0],
|
||||
});
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op: ALUOp3::MAdd64,
|
||||
rd: dst.regs()[1],
|
||||
rn: lhs.regs()[0],
|
||||
rm: rhs.regs()[1],
|
||||
ra: dst.regs()[1].to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op: ALUOp3::MAdd64,
|
||||
rd: dst.regs()[1],
|
||||
rn: lhs.regs()[1],
|
||||
rm: rhs.regs()[0],
|
||||
ra: dst.regs()[1].to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op: ALUOp3::MAdd64,
|
||||
rd: dst.regs()[0],
|
||||
rn: lhs.regs()[0],
|
||||
rm: rhs.regs()[0],
|
||||
ra: zero_reg(),
|
||||
});
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: ALUOp::UMulH,
|
||||
rd: dst.regs()[1],
|
||||
rn: lhs.regs()[0],
|
||||
rm: rhs.regs()[0],
|
||||
});
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op: ALUOp3::MAdd64,
|
||||
rd: dst.regs()[1],
|
||||
rn: lhs.regs()[0],
|
||||
rm: rhs.regs()[1],
|
||||
ra: dst.regs()[1].to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op: ALUOp3::MAdd64,
|
||||
rd: dst.regs()[1],
|
||||
rn: lhs.regs()[1],
|
||||
rm: rhs.regs()[0],
|
||||
ra: dst.regs()[1].to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op: ALUOp3::MAdd64,
|
||||
rd: dst.regs()[0],
|
||||
rn: lhs.regs()[0],
|
||||
rm: rhs.regs()[0],
|
||||
ra: zero_reg(),
|
||||
});
|
||||
} else if ty.is_vector() {
|
||||
for ext_op in &[Opcode::SwidenLow, Opcode::SwidenHigh,
|
||||
Opcode::UwidenLow, Opcode::UwidenHigh] {
|
||||
if let Some((alu_op, rn, rm, high_half)) = match_vec_long_mul(ctx, insn, *ext_op) {
|
||||
for ext_op in &[
|
||||
Opcode::SwidenLow,
|
||||
Opcode::SwidenHigh,
|
||||
Opcode::UwidenLow,
|
||||
Opcode::UwidenHigh,
|
||||
] {
|
||||
if let Some((alu_op, rn, rm, high_half)) =
|
||||
match_vec_long_mul(ctx, insn, *ext_op)
|
||||
{
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::VecRRRLong {
|
||||
alu_op,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
high_half,
|
||||
});
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
if ty == I64X2 {
|
||||
lower_i64x2_mul(ctx, insn);
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::VecRRRLong {
|
||||
alu_op,
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Mul,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
high_half,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
if ty == I64X2 {
|
||||
lower_i64x2_mul(ctx, insn);
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Mul,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
} else {
|
||||
let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
|
||||
Reference in New Issue
Block a user