Merge pull request #1865 from cfallin/aarch64-amode-reg-reg-extend
AArch64: make use of reg-reg-extend amode.
This commit is contained in:
@@ -133,6 +133,9 @@ pub enum MemArg {
|
|||||||
/// first.
|
/// first.
|
||||||
RegScaledExtended(Reg, Reg, Type, ExtendOp),
|
RegScaledExtended(Reg, Reg, Type, ExtendOp),
|
||||||
|
|
||||||
|
/// Register plus register offset, with index sign- or zero-extended first.
|
||||||
|
RegExtended(Reg, Reg, ExtendOp),
|
||||||
|
|
||||||
/// Unscaled signed 9-bit immediate offset from reg.
|
/// Unscaled signed 9-bit immediate offset from reg.
|
||||||
Unscaled(Reg, SImm9),
|
Unscaled(Reg, SImm9),
|
||||||
|
|
||||||
@@ -412,6 +415,19 @@ impl ShowWithRRU for MemArg {
|
|||||||
shift
|
shift
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
&MemArg::RegExtended(r1, r2, op) => {
|
||||||
|
let size = match op {
|
||||||
|
ExtendOp::SXTW | ExtendOp::UXTW => InstSize::Size32,
|
||||||
|
_ => InstSize::Size64,
|
||||||
|
};
|
||||||
|
let op = op.show_rru(mb_rru);
|
||||||
|
format!(
|
||||||
|
"[{}, {}, {}]",
|
||||||
|
r1.show_rru(mb_rru),
|
||||||
|
show_ireg_sized(r2, mb_rru, size),
|
||||||
|
op,
|
||||||
|
)
|
||||||
|
}
|
||||||
&MemArg::Label(ref label) => label.show_rru(mb_rru),
|
&MemArg::Label(ref label) => label.show_rru(mb_rru),
|
||||||
&MemArg::PreIndexed(r, simm9) => format!(
|
&MemArg::PreIndexed(r, simm9) => format!(
|
||||||
"[{}, {}]!",
|
"[{}, {}]!",
|
||||||
|
|||||||
@@ -707,6 +707,16 @@ impl MachInstEmit for Inst {
|
|||||||
op, r1, r2, /* scaled = */ true, extendop, rd,
|
op, r1, r2, /* scaled = */ true, extendop, rd,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
&MemArg::RegExtended(r1, r2, extendop) => {
|
||||||
|
sink.put4(enc_ldst_reg(
|
||||||
|
op,
|
||||||
|
r1,
|
||||||
|
r2,
|
||||||
|
/* scaled = */ false,
|
||||||
|
Some(extendop),
|
||||||
|
rd,
|
||||||
|
));
|
||||||
|
}
|
||||||
&MemArg::Label(ref label) => {
|
&MemArg::Label(ref label) => {
|
||||||
let offset = match label {
|
let offset = match label {
|
||||||
// cast i32 to u32 (two's-complement)
|
// cast i32 to u32 (two's-complement)
|
||||||
@@ -833,6 +843,16 @@ impl MachInstEmit for Inst {
|
|||||||
op, r1, r2, /* scaled = */ true, extendop, rd,
|
op, r1, r2, /* scaled = */ true, extendop, rd,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
&MemArg::RegExtended(r1, r2, extendop) => {
|
||||||
|
sink.put4(enc_ldst_reg(
|
||||||
|
op,
|
||||||
|
r1,
|
||||||
|
r2,
|
||||||
|
/* scaled = */ false,
|
||||||
|
Some(extendop),
|
||||||
|
rd,
|
||||||
|
));
|
||||||
|
}
|
||||||
&MemArg::Label(..) => {
|
&MemArg::Label(..) => {
|
||||||
panic!("Store to a MemLabel not implemented!");
|
panic!("Store to a MemLabel not implemented!");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1281,6 +1281,15 @@ fn test_aarch64_binemit() {
|
|||||||
"41D863F8",
|
"41D863F8",
|
||||||
"ldr x1, [x2, w3, SXTW #3]",
|
"ldr x1, [x2, w3, SXTW #3]",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::ULoad64 {
|
||||||
|
rd: writable_xreg(1),
|
||||||
|
mem: MemArg::RegExtended(xreg(2), xreg(3), ExtendOp::SXTW),
|
||||||
|
srcloc: None,
|
||||||
|
},
|
||||||
|
"41C863F8",
|
||||||
|
"ldr x1, [x2, w3, SXTW]",
|
||||||
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::ULoad64 {
|
Inst::ULoad64 {
|
||||||
rd: writable_xreg(1),
|
rd: writable_xreg(1),
|
||||||
@@ -1474,6 +1483,15 @@ fn test_aarch64_binemit() {
|
|||||||
"415823F8",
|
"415823F8",
|
||||||
"str x1, [x2, w3, UXTW #3]",
|
"str x1, [x2, w3, UXTW #3]",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::Store64 {
|
||||||
|
rd: xreg(1),
|
||||||
|
mem: MemArg::RegExtended(xreg(2), xreg(3), ExtendOp::UXTW),
|
||||||
|
srcloc: None,
|
||||||
|
},
|
||||||
|
"414823F8",
|
||||||
|
"str x1, [x2, w3, UXTW]",
|
||||||
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::Store64 {
|
Inst::Store64 {
|
||||||
rd: xreg(1),
|
rd: xreg(1),
|
||||||
|
|||||||
@@ -1049,7 +1049,8 @@ fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) {
|
|||||||
}
|
}
|
||||||
&MemArg::RegReg(r1, r2, ..)
|
&MemArg::RegReg(r1, r2, ..)
|
||||||
| &MemArg::RegScaled(r1, r2, ..)
|
| &MemArg::RegScaled(r1, r2, ..)
|
||||||
| &MemArg::RegScaledExtended(r1, r2, ..) => {
|
| &MemArg::RegScaledExtended(r1, r2, ..)
|
||||||
|
| &MemArg::RegExtended(r1, r2, ..) => {
|
||||||
collector.add_use(r1);
|
collector.add_use(r1);
|
||||||
collector.add_use(r2);
|
collector.add_use(r2);
|
||||||
}
|
}
|
||||||
@@ -1384,15 +1385,10 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
match mem {
|
match mem {
|
||||||
&mut MemArg::Unscaled(ref mut reg, ..) => map_use(m, reg),
|
&mut MemArg::Unscaled(ref mut reg, ..) => map_use(m, reg),
|
||||||
&mut MemArg::UnsignedOffset(ref mut reg, ..) => map_use(m, reg),
|
&mut MemArg::UnsignedOffset(ref mut reg, ..) => map_use(m, reg),
|
||||||
&mut MemArg::RegReg(ref mut r1, ref mut r2) => {
|
&mut MemArg::RegReg(ref mut r1, ref mut r2)
|
||||||
map_use(m, r1);
|
| &mut MemArg::RegScaled(ref mut r1, ref mut r2, ..)
|
||||||
map_use(m, r2);
|
| &mut MemArg::RegScaledExtended(ref mut r1, ref mut r2, ..)
|
||||||
}
|
| &mut MemArg::RegExtended(ref mut r1, ref mut r2, ..) => {
|
||||||
&mut MemArg::RegScaled(ref mut r1, ref mut r2, ..) => {
|
|
||||||
map_use(m, r1);
|
|
||||||
map_use(m, r2);
|
|
||||||
}
|
|
||||||
&mut MemArg::RegScaledExtended(ref mut r1, ref mut r2, ..) => {
|
|
||||||
map_use(m, r1);
|
map_use(m, r1);
|
||||||
map_use(m, r2);
|
map_use(m, r2);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -550,7 +550,51 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
|||||||
return MemArg::RegOffset(reg, offset as i64, elem_ty);
|
return MemArg::RegOffset(reg, offset as i64, elem_ty);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle two regs and a zero offset, if possible.
|
// Handle two regs and a zero offset with built-in extend, if possible.
|
||||||
|
if addends.len() == 2 && offset == 0 {
|
||||||
|
// r1, r2 (to be extended), r2_bits, is_signed
|
||||||
|
let mut parts: Option<(Reg, Reg, usize, bool)> = None;
|
||||||
|
// Handle extension of either first or second addend.
|
||||||
|
for i in 0..2 {
|
||||||
|
if let Some((op, ext_insn)) =
|
||||||
|
maybe_input_insn_multi(ctx, addends[i], &[Opcode::Uextend, Opcode::Sextend])
|
||||||
|
{
|
||||||
|
// Non-extended addend.
|
||||||
|
let r1 = input_to_reg(ctx, addends[1 - i], NarrowValueMode::ZeroExtend64);
|
||||||
|
// Extended addend.
|
||||||
|
let r2 = input_to_reg(
|
||||||
|
ctx,
|
||||||
|
InsnInput {
|
||||||
|
insn: ext_insn,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
NarrowValueMode::None,
|
||||||
|
);
|
||||||
|
let r2_bits = ty_bits(ctx.input_ty(ext_insn, 0));
|
||||||
|
parts = Some((
|
||||||
|
r1,
|
||||||
|
r2,
|
||||||
|
r2_bits,
|
||||||
|
/* is_signed = */ op == Opcode::Sextend,
|
||||||
|
));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some((r1, r2, r2_bits, is_signed)) = parts {
|
||||||
|
match (r2_bits, is_signed) {
|
||||||
|
(32, false) => {
|
||||||
|
return MemArg::RegExtended(r1, r2, ExtendOp::UXTW);
|
||||||
|
}
|
||||||
|
(32, true) => {
|
||||||
|
return MemArg::RegExtended(r1, r2, ExtendOp::SXTW);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle two regs and a zero offset in the general case, if possible.
|
||||||
if addends.len() == 2 && offset == 0 {
|
if addends.len() == 2 && offset == 0 {
|
||||||
let ra = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
|
let ra = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
|
||||||
let rb = input_to_reg(ctx, addends[1], NarrowValueMode::ZeroExtend64);
|
let rb = input_to_reg(ctx, addends[1], NarrowValueMode::ZeroExtend64);
|
||||||
@@ -812,6 +856,20 @@ pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Checks for an instance of any one of `ops` feeding the given input.
|
||||||
|
pub(crate) fn maybe_input_insn_multi<C: LowerCtx<I = Inst>>(
|
||||||
|
c: &mut C,
|
||||||
|
input: InsnInput,
|
||||||
|
ops: &[Opcode],
|
||||||
|
) -> Option<(Opcode, IRInst)> {
|
||||||
|
for &op in ops {
|
||||||
|
if let Some(inst) = maybe_input_insn(c, input, op) {
|
||||||
|
return Some((op, inst));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
|
/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
|
||||||
/// Bint or a bitcast).
|
/// Bint or a bitcast).
|
||||||
///
|
///
|
||||||
|
|||||||
58
cranelift/filetests/filetests/vcode/aarch64/amodes.clif
Normal file
58
cranelift/filetests/filetests/vcode/aarch64/amodes.clif
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
test compile
|
||||||
|
target aarch64
|
||||||
|
|
||||||
|
function %f0(i64, i32) -> i32 {
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v2 = uextend.i64 v1
|
||||||
|
v3 = load_complex.i32 v0+v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: ldr w0, [x0, w1, UXTW]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f1(i64, i32) -> i32 {
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v2 = uextend.i64 v1
|
||||||
|
v3 = load_complex.i32 v2+v0
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: ldr w0, [x0, w1, UXTW]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f1(i64, i32) -> i32 {
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v2 = sextend.i64 v1
|
||||||
|
v3 = load_complex.i32 v0+v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: ldr w0, [x0, w1, SXTW]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f1(i64, i32) -> i32 {
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v2 = sextend.i64 v1
|
||||||
|
v3 = load_complex.i32 v2+v0
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: ldr w0, [x0, w1, SXTW]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
Reference in New Issue
Block a user