aarch64: Rework amode compilation to produce SSA code (#5369)
Rework the compilation of amodes in the aarch64 backend to stop reusing registers and instead generate fresh virtual registers for intermediates. This resolves some SSA checker violations with the aarch64 backend, and as a nice side-effect removes some unnecessary movs in the generated code.
This commit is contained in:
@@ -342,19 +342,18 @@ pub(crate) fn lower_pair_address(ctx: &mut Lower<Inst>, addr: Value, offset: i32
|
|||||||
zero_reg()
|
zero_reg()
|
||||||
};
|
};
|
||||||
|
|
||||||
let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
|
|
||||||
ctx.emit(Inst::gen_move(addr, base_reg, I64));
|
|
||||||
|
|
||||||
// We have the base register, if we have any others, we need to add them
|
// We have the base register, if we have any others, we need to add them
|
||||||
lower_add_addends(ctx, addr, addends64, addends32);
|
let addr = lower_add_addends(ctx, base_reg, addends64, addends32);
|
||||||
|
|
||||||
// Figure out what offset we should emit
|
// Figure out what offset we should emit
|
||||||
let imm7 = SImm7Scaled::maybe_from_i64(offset, I64).unwrap_or_else(|| {
|
let (addr, imm7) = if let Some(imm7) = SImm7Scaled::maybe_from_i64(offset, I64) {
|
||||||
lower_add_immediate(ctx, addr, addr.to_reg(), offset);
|
(addr, imm7)
|
||||||
SImm7Scaled::maybe_from_i64(0, I64).unwrap()
|
} else {
|
||||||
});
|
let res = lower_add_immediate(ctx, addr, offset);
|
||||||
|
(res, SImm7Scaled::maybe_from_i64(0, I64).unwrap())
|
||||||
|
};
|
||||||
|
|
||||||
PairAMode::SignedOffset(addr.to_reg(), imm7)
|
PairAMode::SignedOffset(addr, imm7)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lower the address of a load or store.
|
/// Lower the address of a load or store.
|
||||||
@@ -454,63 +453,48 @@ pub(crate) fn lower_address(
|
|||||||
return memarg;
|
return memarg;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate the temp and shoehorn it into the AMode.
|
// Extract the first register from the memarg so that we can add all the
|
||||||
let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
|
// immediate values to it.
|
||||||
let (reg, memarg) = match memarg {
|
let addr = match memarg {
|
||||||
AMode::RegExtended { rn, rm, extendop } => (
|
AMode::RegExtended { rn, .. } => rn,
|
||||||
rn,
|
AMode::RegOffset { rn, .. } => rn,
|
||||||
AMode::RegExtended {
|
AMode::RegReg { rm, .. } => rm,
|
||||||
rn: addr.to_reg(),
|
AMode::UnsignedOffset { rn, .. } => rn,
|
||||||
rm,
|
|
||||||
extendop,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
AMode::RegOffset { rn, off, ty } => (
|
|
||||||
rn,
|
|
||||||
AMode::RegOffset {
|
|
||||||
rn: addr.to_reg(),
|
|
||||||
off,
|
|
||||||
ty,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
AMode::RegReg { rn, rm } => (
|
|
||||||
rm,
|
|
||||||
AMode::RegReg {
|
|
||||||
rn: addr.to_reg(),
|
|
||||||
rm: rn,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
AMode::UnsignedOffset { rn, uimm12 } => (
|
|
||||||
rn,
|
|
||||||
AMode::UnsignedOffset {
|
|
||||||
rn: addr.to_reg(),
|
|
||||||
uimm12,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
// If there is any offset, load that first into `addr`, and add the `reg`
|
// If there is any offset, load that first into `addr`, and add the `reg`
|
||||||
// that we kicked out of the `AMode`; otherwise, start with that reg.
|
// that we kicked out of the `AMode`; otherwise, start with that reg.
|
||||||
if offset != 0 {
|
let addr = if offset != 0 {
|
||||||
lower_add_immediate(ctx, addr, reg, offset)
|
lower_add_immediate(ctx, addr, offset)
|
||||||
} else {
|
} else {
|
||||||
ctx.emit(Inst::gen_move(addr, reg, I64));
|
addr
|
||||||
}
|
};
|
||||||
|
|
||||||
// Now handle reg64 and reg32-extended components.
|
// Now handle reg64 and reg32-extended components.
|
||||||
lower_add_addends(ctx, addr, addends64, addends32);
|
let addr = lower_add_addends(ctx, addr, addends64, addends32);
|
||||||
|
|
||||||
memarg
|
// Shoehorn addr into the AMode.
|
||||||
|
match memarg {
|
||||||
|
AMode::RegExtended { rm, extendop, .. } => AMode::RegExtended {
|
||||||
|
rn: addr,
|
||||||
|
rm,
|
||||||
|
extendop,
|
||||||
|
},
|
||||||
|
AMode::RegOffset { off, ty, .. } => AMode::RegOffset { rn: addr, off, ty },
|
||||||
|
AMode::RegReg { rn, .. } => AMode::RegReg { rn: addr, rm: rn },
|
||||||
|
AMode::UnsignedOffset { uimm12, .. } => AMode::UnsignedOffset { rn: addr, uimm12 },
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lower_add_addends(
|
fn lower_add_addends(
|
||||||
ctx: &mut Lower<Inst>,
|
ctx: &mut Lower<Inst>,
|
||||||
rd: Writable<Reg>,
|
init: Reg,
|
||||||
addends64: AddressAddend64List,
|
addends64: AddressAddend64List,
|
||||||
addends32: AddressAddend32List,
|
addends32: AddressAddend32List,
|
||||||
) {
|
) -> Reg {
|
||||||
for reg in addends64 {
|
let init = addends64.into_iter().fold(init, |prev, reg| {
|
||||||
// If the register is the stack reg, we must move it to another reg
|
// If the register is the stack reg, we must move it to another reg
|
||||||
// before adding it.
|
// before adding it.
|
||||||
let reg = if reg == stack_reg() {
|
let reg = if reg == stack_reg() {
|
||||||
@@ -520,30 +504,43 @@ fn lower_add_addends(
|
|||||||
} else {
|
} else {
|
||||||
reg
|
reg
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let rd = ctx.alloc_tmp(I64).only_reg().unwrap();
|
||||||
|
|
||||||
ctx.emit(Inst::AluRRR {
|
ctx.emit(Inst::AluRRR {
|
||||||
alu_op: ALUOp::Add,
|
alu_op: ALUOp::Add,
|
||||||
size: OperandSize::Size64,
|
size: OperandSize::Size64,
|
||||||
rd,
|
rd,
|
||||||
rn: rd.to_reg(),
|
rn: prev,
|
||||||
rm: reg,
|
rm: reg,
|
||||||
});
|
});
|
||||||
}
|
|
||||||
for (reg, extendop) in addends32 {
|
rd.to_reg()
|
||||||
|
});
|
||||||
|
|
||||||
|
addends32.into_iter().fold(init, |prev, (reg, extendop)| {
|
||||||
assert!(reg != stack_reg());
|
assert!(reg != stack_reg());
|
||||||
|
|
||||||
|
let rd = ctx.alloc_tmp(I64).only_reg().unwrap();
|
||||||
|
|
||||||
ctx.emit(Inst::AluRRRExtend {
|
ctx.emit(Inst::AluRRRExtend {
|
||||||
alu_op: ALUOp::Add,
|
alu_op: ALUOp::Add,
|
||||||
size: OperandSize::Size64,
|
size: OperandSize::Size64,
|
||||||
rd,
|
rd,
|
||||||
rn: rd.to_reg(),
|
rn: prev,
|
||||||
rm: reg,
|
rm: reg,
|
||||||
extendop,
|
extendop,
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
rd.to_reg()
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Adds into `rd` a signed imm pattern matching the best instruction for it.
|
/// Adds into `rd` a signed imm pattern matching the best instruction for it.
|
||||||
// TODO: This function is duplicated in ctx.gen_add_imm
|
// TODO: This function is duplicated in ctx.gen_add_imm
|
||||||
fn lower_add_immediate(ctx: &mut Lower<Inst>, dst: Writable<Reg>, src: Reg, imm: i64) {
|
fn lower_add_immediate(ctx: &mut Lower<Inst>, src: Reg, imm: i64) -> Reg {
|
||||||
|
let dst = ctx.alloc_tmp(I64).only_reg().unwrap();
|
||||||
|
|
||||||
// If we can fit offset or -offset in an imm12, use an add-imm
|
// If we can fit offset or -offset in an imm12, use an add-imm
|
||||||
// Otherwise, lower the constant first then add.
|
// Otherwise, lower the constant first then add.
|
||||||
if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) {
|
if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) {
|
||||||
@@ -563,15 +560,18 @@ fn lower_add_immediate(ctx: &mut Lower<Inst>, dst: Writable<Reg>, src: Reg, imm:
|
|||||||
imm12,
|
imm12,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
lower_constant_u64(ctx, dst, imm as u64);
|
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
|
||||||
|
lower_constant_u64(ctx, tmp, imm as u64);
|
||||||
ctx.emit(Inst::AluRRR {
|
ctx.emit(Inst::AluRRR {
|
||||||
alu_op: ALUOp::Add,
|
alu_op: ALUOp::Add,
|
||||||
size: OperandSize::Size64,
|
size: OperandSize::Size64,
|
||||||
rd: dst,
|
rd: dst,
|
||||||
rn: dst.to_reg(),
|
rn: tmp.to_reg(),
|
||||||
rm: src,
|
rm: src,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dst.to_reg()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn lower_constant_u64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: u64) {
|
pub(crate) fn lower_constant_u64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: u64) {
|
||||||
|
|||||||
@@ -53,9 +53,9 @@ block0(v0: i64, v1: i32):
|
|||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; add x3, x0, #68
|
; add x3, x0, #68
|
||||||
; add x3, x3, x0
|
; add x5, x3, x0
|
||||||
; add x3, x3, x1, SXTW
|
; add x7, x5, x1, SXTW
|
||||||
; ldr w0, [x3, w1, SXTW]
|
; ldr w0, [x7, w1, SXTW]
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f9(i64, i64, i64) -> i32 {
|
function %f9(i64, i64, i64) -> i32 {
|
||||||
@@ -69,10 +69,9 @@ block0(v0: i64, v1: i64, v2: i64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov x5, x0
|
; add x4, x0, x2
|
||||||
; add x5, x5, x2
|
; add x6, x4, x1
|
||||||
; add x5, x5, x1
|
; ldr w0, [x6, #48]
|
||||||
; ldr w0, [x5, #48]
|
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f10(i64, i64, i64) -> i32 {
|
function %f10(i64, i64, i64) -> i32 {
|
||||||
@@ -86,10 +85,10 @@ block0(v0: i64, v1: i64, v2: i64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x4, #4100
|
; movz x5, #4100
|
||||||
; add x4, x4, x1
|
; add x5, x5, x1
|
||||||
; add x4, x4, x2
|
; add x8, x5, x2
|
||||||
; ldr w0, [x4, x0]
|
; ldr w0, [x8, x0]
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f10() -> i32 {
|
function %f10() -> i32 {
|
||||||
@@ -139,10 +138,10 @@ block0(v0: i64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz w2, #51712
|
; movz w3, #51712
|
||||||
; movk w2, w2, #15258, LSL #16
|
; movk w3, w3, #15258, LSL #16
|
||||||
; add x2, x2, x0
|
; add x4, x3, x0
|
||||||
; ldr w0, [x2]
|
; ldr w0, [x4]
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f14(i32) -> i32 {
|
function %f14(i32) -> i32 {
|
||||||
@@ -233,10 +232,8 @@ block0(v0: i64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov x6, x0
|
; mov x5, x0
|
||||||
; mov x4, x6
|
; ldp x0, x1, [x5]
|
||||||
; ldp x0, x1, [x4]
|
|
||||||
; mov x5, x6
|
|
||||||
; stp x0, x1, [x5]
|
; stp x0, x1, [x5]
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -248,10 +245,8 @@ block0(v0: i64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov x6, x0
|
; mov x5, x0
|
||||||
; mov x4, x6
|
; ldp x0, x1, [x5, #16]
|
||||||
; ldp x0, x1, [x4, #16]
|
|
||||||
; mov x5, x6
|
|
||||||
; stp x0, x1, [x5, #16]
|
; stp x0, x1, [x5, #16]
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -263,10 +258,8 @@ block0(v0: i64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov x6, x0
|
; mov x5, x0
|
||||||
; mov x4, x6
|
; ldp x0, x1, [x5, #504]
|
||||||
; ldp x0, x1, [x4, #504]
|
|
||||||
; mov x5, x6
|
|
||||||
; stp x0, x1, [x5, #504]
|
; stp x0, x1, [x5, #504]
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -278,10 +271,8 @@ block0(v0: i64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov x6, x0
|
; mov x5, x0
|
||||||
; mov x4, x6
|
; ldp x0, x1, [x5, #-512]
|
||||||
; ldp x0, x1, [x4, #-512]
|
|
||||||
; mov x5, x6
|
|
||||||
; stp x0, x1, [x5, #-512]
|
; stp x0, x1, [x5, #-512]
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -294,10 +285,8 @@ block0(v0: i64):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov x6, x0
|
; mov x5, x0
|
||||||
; mov x4, x6
|
; ldp x0, x1, [x5, #32]
|
||||||
; ldp x0, x1, [x4, #32]
|
|
||||||
; mov x5, x6
|
|
||||||
; stp x0, x1, [x5, #32]
|
; stp x0, x1, [x5, #32]
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -310,11 +299,11 @@ block0(v0: i32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; sxtw x4, w0
|
; sxtw x3, w0
|
||||||
; mov x11, x0
|
; mov x8, x0
|
||||||
; ldp x0, x1, [x4]
|
; ldp x0, x1, [x3]
|
||||||
; sxtw x5, w11
|
; sxtw x4, w8
|
||||||
; stp x0, x1, [x5]
|
; stp x0, x1, [x4]
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %i128_32bit_sextend(i64, i32) -> i128 {
|
function %i128_32bit_sextend(i64, i32) -> i128 {
|
||||||
@@ -328,13 +317,11 @@ block0(v0: i64, v1: i32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; mov x9, x0
|
; add x4, x0, x1, SXTW
|
||||||
; mov x5, x9
|
; mov x11, x0
|
||||||
; add x5, x5, x1, SXTW
|
; mov x9, x1
|
||||||
; mov x11, x1
|
; ldp x0, x1, [x4, #24]
|
||||||
; ldp x0, x1, [x5, #24]
|
; add x5, x11, x9, SXTW
|
||||||
; mov x7, x9
|
; stp x0, x1, [x5, #24]
|
||||||
; add x7, x7, x11, SXTW
|
|
||||||
; stp x0, x1, [x7, #24]
|
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|||||||
@@ -442,8 +442,8 @@ block0(v0: i128):
|
|||||||
; mov fp, sp
|
; mov fp, sp
|
||||||
; sub sp, sp, #16
|
; sub sp, sp, #16
|
||||||
; block0:
|
; block0:
|
||||||
; mov x4, sp
|
; mov x3, sp
|
||||||
; stp x0, x1, [x4]
|
; stp x0, x1, [x3]
|
||||||
; add sp, sp, #16
|
; add sp, sp, #16
|
||||||
; ldp fp, lr, [sp], #16
|
; ldp fp, lr, [sp], #16
|
||||||
; ret
|
; ret
|
||||||
@@ -461,8 +461,8 @@ block0(v0: i128):
|
|||||||
; mov fp, sp
|
; mov fp, sp
|
||||||
; sub sp, sp, #32
|
; sub sp, sp, #32
|
||||||
; block0:
|
; block0:
|
||||||
; add x4, sp, #32
|
; add x3, sp, #32
|
||||||
; stp x0, x1, [x4]
|
; stp x0, x1, [x3]
|
||||||
; add sp, sp, #32
|
; add sp, sp, #32
|
||||||
; ldp fp, lr, [sp], #16
|
; ldp fp, lr, [sp], #16
|
||||||
; ret
|
; ret
|
||||||
@@ -482,8 +482,8 @@ block0(v0: i128):
|
|||||||
; movk w16, w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; sub sp, sp, x16, UXTX
|
; sub sp, sp, x16, UXTX
|
||||||
; block0:
|
; block0:
|
||||||
; mov x4, sp
|
; mov x3, sp
|
||||||
; stp x0, x1, [x4]
|
; stp x0, x1, [x3]
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; add sp, sp, x16, UXTX
|
; add sp, sp, x16, UXTX
|
||||||
@@ -502,8 +502,8 @@ block0:
|
|||||||
; mov fp, sp
|
; mov fp, sp
|
||||||
; sub sp, sp, #16
|
; sub sp, sp, #16
|
||||||
; block0:
|
; block0:
|
||||||
; mov x3, sp
|
; mov x2, sp
|
||||||
; ldp x0, x1, [x3]
|
; ldp x0, x1, [x2]
|
||||||
; add sp, sp, #16
|
; add sp, sp, #16
|
||||||
; ldp fp, lr, [sp], #16
|
; ldp fp, lr, [sp], #16
|
||||||
; ret
|
; ret
|
||||||
@@ -521,8 +521,8 @@ block0:
|
|||||||
; mov fp, sp
|
; mov fp, sp
|
||||||
; sub sp, sp, #32
|
; sub sp, sp, #32
|
||||||
; block0:
|
; block0:
|
||||||
; add x3, sp, #32
|
; add x2, sp, #32
|
||||||
; ldp x0, x1, [x3]
|
; ldp x0, x1, [x2]
|
||||||
; add sp, sp, #32
|
; add sp, sp, #32
|
||||||
; ldp fp, lr, [sp], #16
|
; ldp fp, lr, [sp], #16
|
||||||
; ret
|
; ret
|
||||||
@@ -542,8 +542,8 @@ block0:
|
|||||||
; movk w16, w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; sub sp, sp, x16, UXTX
|
; sub sp, sp, x16, UXTX
|
||||||
; block0:
|
; block0:
|
||||||
; mov x3, sp
|
; mov x2, sp
|
||||||
; ldp x0, x1, [x3]
|
; ldp x0, x1, [x2]
|
||||||
; movz w16, #34480
|
; movz w16, #34480
|
||||||
; movk w16, w16, #1, LSL #16
|
; movk w16, w16, #1, LSL #16
|
||||||
; add sp, sp, x16, UXTX
|
; add sp, sp, x16, UXTX
|
||||||
|
|||||||
Reference in New Issue
Block a user