Rework aarch64 stack frame implementation.

This PR changes the aarch64 ABI implementation to use positive offsets from SP, rather than negative offsets from FP, to refer to spill slots and stack-local storage. This allows for better addressing-mode options, and hence slightly better code: e.g., the unsigned scaled 12-bit offset mode can be used to reach anywhere in a 32KB frame without extra address-construction instructions, whereas negative offsets are limited to a signed 9-bit unscaled mode (-256 bytes). To enable this, the PR introduces a notion of "nominal SP offsets" as a virtual addressing mode, lowered during the emission pass. The offsets are relative to "SP after adjusting downward to allocate stack/spill slots", but before pushing clobbers. This allows the addressing-mode expressions to be generated before register allocation (or during it, for spill/reload sequences). To convert these offsets into *true* offsets from SP, we need to track how much further SP is moved downward, and compensate for this. We do so with "virtual SP offset adjustment" pseudo-instructions: these are seen by the emission pass, and result in no instruction (0 byte output), but update state that is now threaded through each instruction emission in turn. In this way, we can push e.g. stack args for a call and adjust the virtual SP offset, allowing reloads from nominal-SP-relative spillslots while we do the argument setup with "real SP offsets" at the same time.
2020-04-24 22:32:35 -07:00
parent 176b3a8382
commit a66724aafd
16 changed files with 496 additions and 320 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs
@@ -20,23 +20,21 @@ pub const PINNED_REG: u8 = 21;
 const XREG_INDICES: [u8; 31] = [
    // X0 - X7
    32, 33, 34, 35, 36, 37, 38, 39,
-    // X8 - X14
-    40, 41, 42, 43, 44, 45, 46,
-    // X15
-    59,
+    // X8 - X15
+    40, 41, 42, 43, 44, 45, 46, 47,
    // X16, X17
-    47, 48,
+    58, 59,
    // X18
    60,
    // X19, X20
-    49, 50,
+    48, 49,
    // X21, put aside because it's the pinned register.
-    58,
+    57,
    // X22 - X28
-    51, 52, 53, 54, 55, 56, 57,
-    // X29
+    50, 51, 52, 53, 54, 55, 56,
+    // X29 (FP)
    61,
-    // X30
+    // X30 (LR)
    62,
 ];

@@ -125,14 +123,17 @@ pub fn writable_fp_reg() -> Writable<Reg> {
    Writable::from_reg(fp_reg())
 }

-/// Get a reference to the "spill temp" register. This register is used to
-/// compute the address of a spill slot when a direct offset addressing mode from
-/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc
-/// and reserve it for this purpose for simplicity; otherwise we need a
-/// multi-stage analysis where we first determine how many spill slots we have,
-/// then perhaps remove the reg from the pool and recompute regalloc.
+/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
+/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
+/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
+/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
+/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
+///
+/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is
+/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it
+/// to live through call instructions.
 pub fn spilltmp_reg() -> Reg {
-    xreg(15)
+    xreg(16)
 }

 /// Get a writable reference to the spilltmp reg.
@@ -140,6 +141,20 @@ pub fn writable_spilltmp_reg() -> Writable<Reg> {
    Writable::from_reg(spilltmp_reg())
 }

+/// Get a reference to the second temp register. We need this in some edge cases
+/// where we need both the spilltmp and another temporary.
+///
+/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is
+/// free to use otherwise.
+pub fn tmp2_reg() -> Reg {
+    xreg(17)
+}
+
+/// Get a writable reference to the tmp2 reg.
+pub fn writable_tmp2_reg() -> Writable<Reg> {
+    Writable::from_reg(tmp2_reg())
+}
+
 /// Create the register universe for AArch64.
 pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
    let mut regs = vec![];
@@ -173,7 +188,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {

    for i in 0u8..32u8 {
        // See above for excluded registers.
-        if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
+        if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
            continue;
        }
        let reg = Reg::new_real(
@@ -211,7 +226,8 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
        regs.len()
    };

-    regs.push((xreg(15).to_real_reg(), "x15".to_string()));
+    regs.push((xreg(16).to_real_reg(), "x16".to_string()));
+    regs.push((xreg(17).to_real_reg(), "x17".to_string()));
    regs.push((xreg(18).to_real_reg(), "x18".to_string()));
    regs.push((fp_reg().to_real_reg(), "fp".to_string()));
    regs.push((link_reg().to_real_reg(), "lr".to_string()));