Merge pull request #2062 from akirilov-arm/extract_lane
AArch64: Improve code generation for Extractlane + Sextend / Uextend
This commit is contained in:
@@ -1272,6 +1272,38 @@ impl MachInstEmit for Inst {
|
|||||||
| machreg_to_gpr(rd.to_reg()),
|
| machreg_to_gpr(rd.to_reg()),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
&Inst::MovFromVecSigned {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
idx,
|
||||||
|
size,
|
||||||
|
scalar_size,
|
||||||
|
} => {
|
||||||
|
let (imm5, shift, half) = match size {
|
||||||
|
VectorSize::Size8x8 => (0b00001, 1, true),
|
||||||
|
VectorSize::Size8x16 => (0b00001, 1, false),
|
||||||
|
VectorSize::Size16x4 => (0b00010, 2, true),
|
||||||
|
VectorSize::Size16x8 => (0b00010, 2, false),
|
||||||
|
VectorSize::Size32x2 => {
|
||||||
|
debug_assert_ne!(scalar_size, OperandSize::Size32);
|
||||||
|
(0b00100, 3, true)
|
||||||
|
}
|
||||||
|
VectorSize::Size32x4 => {
|
||||||
|
debug_assert_ne!(scalar_size, OperandSize::Size32);
|
||||||
|
(0b00100, 3, false)
|
||||||
|
}
|
||||||
|
_ => panic!("Unexpected vector operand size"),
|
||||||
|
};
|
||||||
|
debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
|
||||||
|
let imm5 = imm5 | ((idx as u32) << shift);
|
||||||
|
sink.put4(
|
||||||
|
0b000_01110000_00000_0_0101_1_00000_00000
|
||||||
|
| (scalar_size.is64() as u32) << 30
|
||||||
|
| (imm5 << 16)
|
||||||
|
| (machreg_to_vec(rn) << 5)
|
||||||
|
| machreg_to_gpr(rd.to_reg()),
|
||||||
|
);
|
||||||
|
}
|
||||||
&Inst::VecDup { rd, rn, size } => {
|
&Inst::VecDup { rd, rn, size } => {
|
||||||
let imm5 = match size {
|
let imm5 = match size {
|
||||||
VectorSize::Size8x16 => 0b00001,
|
VectorSize::Size8x16 => 0b00001,
|
||||||
|
|||||||
@@ -1876,6 +1876,50 @@ fn test_aarch64_binemit() {
|
|||||||
"953E084E",
|
"953E084E",
|
||||||
"mov x21, v20.d[0]",
|
"mov x21, v20.d[0]",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::MovFromVecSigned {
|
||||||
|
rd: writable_xreg(0),
|
||||||
|
rn: vreg(0),
|
||||||
|
idx: 15,
|
||||||
|
size: VectorSize::Size8x16,
|
||||||
|
scalar_size: OperandSize::Size32,
|
||||||
|
},
|
||||||
|
"002C1F0E",
|
||||||
|
"smov w0, v0.b[15]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::MovFromVecSigned {
|
||||||
|
rd: writable_xreg(12),
|
||||||
|
rn: vreg(13),
|
||||||
|
idx: 7,
|
||||||
|
size: VectorSize::Size8x8,
|
||||||
|
scalar_size: OperandSize::Size64,
|
||||||
|
},
|
||||||
|
"AC2D0F4E",
|
||||||
|
"smov x12, v13.b[7]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::MovFromVecSigned {
|
||||||
|
rd: writable_xreg(23),
|
||||||
|
rn: vreg(31),
|
||||||
|
idx: 7,
|
||||||
|
size: VectorSize::Size16x8,
|
||||||
|
scalar_size: OperandSize::Size32,
|
||||||
|
},
|
||||||
|
"F72F1E0E",
|
||||||
|
"smov w23, v31.h[7]",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::MovFromVecSigned {
|
||||||
|
rd: writable_xreg(24),
|
||||||
|
rn: vreg(5),
|
||||||
|
idx: 1,
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
scalar_size: OperandSize::Size64,
|
||||||
|
},
|
||||||
|
"B82C0C4E",
|
||||||
|
"smov x24, v5.s[1]",
|
||||||
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::MovToNZCV { rn: xreg(13) },
|
Inst::MovToNZCV { rn: xreg(13) },
|
||||||
"0D421BD5",
|
"0D421BD5",
|
||||||
|
|||||||
@@ -745,7 +745,7 @@ pub enum Inst {
|
|||||||
rn: Reg,
|
rn: Reg,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Move to a GPR from a vector element.
|
/// Unsigned move from a vector element to a GPR.
|
||||||
MovFromVec {
|
MovFromVec {
|
||||||
rd: Writable<Reg>,
|
rd: Writable<Reg>,
|
||||||
rn: Reg,
|
rn: Reg,
|
||||||
@@ -753,6 +753,15 @@ pub enum Inst {
|
|||||||
size: VectorSize,
|
size: VectorSize,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Signed move from a vector element to a GPR.
|
||||||
|
MovFromVecSigned {
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
idx: u8,
|
||||||
|
size: VectorSize,
|
||||||
|
scalar_size: OperandSize,
|
||||||
|
},
|
||||||
|
|
||||||
/// Duplicate general-purpose register to vector.
|
/// Duplicate general-purpose register to vector.
|
||||||
VecDup {
|
VecDup {
|
||||||
rd: Writable<Reg>,
|
rd: Writable<Reg>,
|
||||||
@@ -1319,7 +1328,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::MovFromVec { rd, rn, .. } => {
|
&Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
|
||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
@@ -1896,6 +1905,11 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
ref mut rd,
|
ref mut rd,
|
||||||
ref mut rn,
|
ref mut rn,
|
||||||
..
|
..
|
||||||
|
}
|
||||||
|
| &mut Inst::MovFromVecSigned {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
..
|
||||||
} => {
|
} => {
|
||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
@@ -2726,6 +2740,17 @@ impl Inst {
|
|||||||
let rn = show_vreg_element(rn, mb_rru, idx, size);
|
let rn = show_vreg_element(rn, mb_rru, idx, size);
|
||||||
format!("{} {}, {}", op, rd, rn)
|
format!("{} {}, {}", op, rd, rn)
|
||||||
}
|
}
|
||||||
|
&Inst::MovFromVecSigned {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
idx,
|
||||||
|
size,
|
||||||
|
scalar_size,
|
||||||
|
} => {
|
||||||
|
let rd = show_ireg_sized(rd.to_reg(), mb_rru, scalar_size);
|
||||||
|
let rn = show_vreg_element(rn, mb_rru, idx, size);
|
||||||
|
format!("smov {}, {}", rd, rn)
|
||||||
|
}
|
||||||
&Inst::VecDup { rd, rn, size } => {
|
&Inst::VecDup { rd, rn, size } => {
|
||||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||||
let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
|
let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
|
||||||
|
|||||||
@@ -392,11 +392,40 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
assert!(from_bits <= to_bits);
|
assert!(from_bits <= to_bits);
|
||||||
if from_bits < to_bits {
|
if from_bits < to_bits {
|
||||||
let signed = op == Opcode::Sextend;
|
let signed = op == Opcode::Sextend;
|
||||||
|
let rd = get_output_reg(ctx, outputs[0]);
|
||||||
|
|
||||||
|
if let Some(extract_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Extractlane) {
|
||||||
|
let idx =
|
||||||
|
if let InstructionData::BinaryImm8 { imm, .. } = ctx.data(extract_insn) {
|
||||||
|
*imm
|
||||||
|
} else {
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
let input = InsnInput {
|
||||||
|
insn: extract_insn,
|
||||||
|
input: 0,
|
||||||
|
};
|
||||||
|
let rn = put_input_in_reg(ctx, input, NarrowValueMode::None);
|
||||||
|
let size = VectorSize::from_ty(ctx.input_ty(extract_insn, 0));
|
||||||
|
|
||||||
|
if signed {
|
||||||
|
let scalar_size = OperandSize::from_ty(output_ty);
|
||||||
|
|
||||||
|
ctx.emit(Inst::MovFromVecSigned {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
idx,
|
||||||
|
size,
|
||||||
|
scalar_size,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
ctx.emit(Inst::MovFromVec { rd, rn, idx, size });
|
||||||
|
}
|
||||||
|
} else {
|
||||||
// If we reach this point, we weren't able to incorporate the extend as
|
// If we reach this point, we weren't able to incorporate the extend as
|
||||||
// a register-mode on another instruction, so we have a 'None'
|
// a register-mode on another instruction, so we have a 'None'
|
||||||
// narrow-value/extend mode here, and we emit the explicit instruction.
|
// narrow-value/extend mode here, and we emit the explicit instruction.
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
let rd = get_output_reg(ctx, outputs[0]);
|
|
||||||
ctx.emit(Inst::Extend {
|
ctx.emit(Inst::Extend {
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
@@ -406,6 +435,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Bnot => {
|
Opcode::Bnot => {
|
||||||
let rd = get_output_reg(ctx, outputs[0]);
|
let rd = get_output_reg(ctx, outputs[0]);
|
||||||
|
|||||||
Reference in New Issue
Block a user