Switch Cranelift over to regalloc2. (#3989)

This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
2022-04-14 10:28:21 -07:00
parent bfae6384aa
commit a0318f36f0
181 changed files with 16887 additions and 21587 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs
@@ -3,11 +3,13 @@
 use crate::isa::aarch64::inst::OperandSize;
 use crate::isa::aarch64::inst::ScalarSize;
 use crate::isa::aarch64::inst::VectorSize;
+use crate::machinst::AllocationConsumer;
+use crate::machinst::RealReg;
+use crate::machinst::{Reg, RegClass, Writable};
 use crate::settings;
-
-use regalloc::{
-    PrettyPrint, RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES,
-};
+use regalloc2::MachineEnv;
+use regalloc2::PReg;
+use regalloc2::VReg;

 use std::string::{String, ToString};

@@ -19,40 +21,12 @@ use std::string::{String, ToString};
 /// https://searchfox.org/mozilla-central/source/js/src/jit/arm64/Assembler-arm64.h#103
 pub const PINNED_REG: u8 = 21;

-#[rustfmt::skip]
-const XREG_INDICES: [u8; 31] = [
-    // X0 - X7
-    32, 33, 34, 35, 36, 37, 38, 39,
-    // X8 - X15
-    40, 41, 42, 43, 44, 45, 46, 47,
-    // X16, X17
-    58, 59,
-    // X18
-    60,
-    // X19, X20
-    48, 49,
-    // X21, put aside because it's the pinned register.
-    57,
-    // X22 - X28
-    50, 51, 52, 53, 54, 55, 56,
-    // X29 (FP)
-    61,
-    // X30 (LR)
-    62,
-];
-
-const ZERO_REG_INDEX: u8 = 63;
-
-const SP_REG_INDEX: u8 = 64;
-
-/// Get a reference to an X-register (integer register).
+/// Get a reference to an X-register (integer register). Do not use
+/// this for xsp / xzr; we have two special registers for those.
 pub fn xreg(num: u8) -> Reg {
    assert!(num < 31);
-    Reg::new_real(
-        RegClass::I64,
-        /* enc = */ num,
-        /* index = */ XREG_INDICES[num as usize],
-    )
+    let preg = PReg::new(num as usize, RegClass::Int);
+    Reg::from(VReg::new(preg.index(), RegClass::Int))
 }

 /// Get a writable reference to an X-register.
@@ -63,7 +37,8 @@ pub fn writable_xreg(num: u8) -> Writable<Reg> {
 /// Get a reference to a V-register (vector/FP register).
 pub fn vreg(num: u8) -> Reg {
    assert!(num < 32);
-    Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num)
+    let preg = PReg::new(num as usize, RegClass::Float);
+    Reg::from(VReg::new(preg.index(), RegClass::Float))
 }

 /// Get a writable reference to a V-register.
@@ -73,13 +48,8 @@ pub fn writable_vreg(num: u8) -> Writable<Reg> {

 /// Get a reference to the zero-register.
 pub fn zero_reg() -> Reg {
-    // This should be the same as what xreg(31) returns, except that
-    // we use the special index into the register index space.
-    Reg::new_real(
-        RegClass::I64,
-        /* enc = */ 31,
-        /* index = */ ZERO_REG_INDEX,
-    )
+    let preg = PReg::new(31, RegClass::Int);
+    Reg::from(VReg::new(preg.index(), RegClass::Int))
 }

 /// Get a writable reference to the zero-register (this discards a result).
@@ -89,16 +59,19 @@ pub fn writable_zero_reg() -> Writable<Reg> {

 /// Get a reference to the stack-pointer register.
 pub fn stack_reg() -> Reg {
-    // XSP (stack) and XZR (zero) are logically different registers which have
-    // the same hardware encoding, and whose meaning, in real aarch64
-    // instructions, is context-dependent.  For convenience of
-    // universe-construction and for correct printing, we make them be two
-    // different real registers.
-    Reg::new_real(
-        RegClass::I64,
-        /* enc = */ 31,
-        /* index = */ SP_REG_INDEX,
-    )
+    // XSP (stack) and XZR (zero) are logically different registers
+    // which have the same hardware encoding, and whose meaning, in
+    // real aarch64 instructions, is context-dependent. For extra
+    // correctness assurances and for correct printing, we make them
+    // be two different real registers from a regalloc perspective.
+    //
+    // We represent XZR as if it were xreg(31); XSP is xreg(31 +
+    // 32). The PReg bit-packing allows 6 bits (64 registers) so we
+    // make use of this extra space to distinguish xzr and xsp. We
+    // mask off the 6th bit (hw_enc & 31) to get the actual hardware
+    // register encoding.
+    let preg = PReg::new(31 + 32, RegClass::Int);
+    Reg::from(VReg::new(preg.index(), RegClass::Int))
 }

 /// Get a writable reference to the stack-pointer register.
@@ -159,158 +132,193 @@ pub fn writable_tmp2_reg() -> Writable<Reg> {
 }

 /// Create the register universe for AArch64.
-pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
-    let mut regs = vec![];
-    let mut allocable_by_class = [None; NUM_REG_CLASSES];
-
-    // Numbering Scheme: we put V-regs first, then X-regs. The X-regs exclude several registers:
-    // x18 (globally reserved for platform-specific purposes), x29 (frame pointer), x30 (link
-    // register), x31 (stack pointer or zero register, depending on context).
-
-    let v_reg_base = 0u8; // in contiguous real-register index space
-    let v_reg_count = 32;
-    for i in 0u8..v_reg_count {
-        let reg = Reg::new_real(
-            RegClass::V128,
-            /* enc = */ i,
-            /* index = */ v_reg_base + i,
-        )
-        .to_real_reg();
-        let name = format!("v{}", i);
-        regs.push((reg, name));
+pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
+    fn preg(r: Reg) -> PReg {
+        r.to_real_reg().unwrap().into()
    }
-    let v_reg_last = v_reg_base + v_reg_count - 1;

-    // Add the X registers. N.B.: the order here must match the order implied
-    // by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above.
-
-    let x_reg_base = 32u8; // in contiguous real-register index space
-    let mut x_reg_count = 0;
-
-    let uses_pinned_reg = flags.enable_pinned_reg();
-
-    for i in 0u8..32u8 {
-        // See above for excluded registers.
-        if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
-            continue;
-        }
-        let reg = Reg::new_real(
-            RegClass::I64,
-            /* enc = */ i,
-            /* index = */ x_reg_base + x_reg_count,
-        )
-        .to_real_reg();
-        let name = format!("x{}", i);
-        regs.push((reg, name));
-        x_reg_count += 1;
-    }
-    let x_reg_last = x_reg_base + x_reg_count - 1;
-
-    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
-        first: x_reg_base as usize,
-        last: x_reg_last as usize,
-        suggested_scratch: Some(XREG_INDICES[19] as usize),
-    });
-    allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
-        first: v_reg_base as usize,
-        last: v_reg_last as usize,
-        suggested_scratch: Some(/* V31: */ 31),
-    });
-
-    // Other regs, not available to the allocator.
-    let allocable = if uses_pinned_reg {
-        // The pinned register is not allocatable in this case, so record the length before adding
-        // it.
-        let len = regs.len();
-        regs.push((xreg(PINNED_REG).to_real_reg(), "x21/pinned_reg".to_string()));
-        len
-    } else {
-        regs.push((xreg(PINNED_REG).to_real_reg(), "x21".to_string()));
-        regs.len()
+    let mut env = MachineEnv {
+        preferred_regs_by_class: [
+            vec![
+                preg(xreg(0)),
+                preg(xreg(1)),
+                preg(xreg(2)),
+                preg(xreg(3)),
+                preg(xreg(4)),
+                preg(xreg(5)),
+                preg(xreg(6)),
+                preg(xreg(7)),
+                preg(xreg(8)),
+                preg(xreg(9)),
+                preg(xreg(10)),
+                preg(xreg(11)),
+                preg(xreg(12)),
+                preg(xreg(13)),
+                preg(xreg(14)),
+                preg(xreg(15)),
+                // x16 and x17 are spilltmp and tmp2 (see above).
+                // x19-28 are callee-saved and so not preferred.
+                // x21 is the pinned register (if enabled) and not allocatable if so.
+                // x29 is FP, x30 is LR, x31 is SP/ZR.
+            ],
+            vec![
+                preg(vreg(0)),
+                preg(vreg(1)),
+                preg(vreg(2)),
+                preg(vreg(3)),
+                preg(vreg(4)),
+                preg(vreg(5)),
+                preg(vreg(6)),
+                preg(vreg(7)),
+                preg(vreg(8)),
+                preg(vreg(9)),
+                preg(vreg(10)),
+                preg(vreg(11)),
+                preg(vreg(12)),
+                preg(vreg(13)),
+                preg(vreg(14)),
+                preg(vreg(15)),
+            ],
+        ],
+        non_preferred_regs_by_class: [
+            vec![
+                preg(xreg(19)),
+                preg(xreg(20)),
+                // x21 is pinned reg if enabled; we add to this list below if not.
+                preg(xreg(22)),
+                preg(xreg(23)),
+                preg(xreg(24)),
+                preg(xreg(25)),
+                preg(xreg(26)),
+                preg(xreg(27)),
+                preg(xreg(28)),
+            ],
+            vec![
+                preg(vreg(16)),
+                preg(vreg(17)),
+                preg(vreg(18)),
+                preg(vreg(19)),
+                preg(vreg(20)),
+                preg(vreg(21)),
+                preg(vreg(22)),
+                preg(vreg(23)),
+                preg(vreg(24)),
+                preg(vreg(25)),
+                preg(vreg(26)),
+                preg(vreg(27)),
+                preg(vreg(28)),
+                preg(vreg(29)),
+                preg(vreg(30)),
+                // v31 is the scratch reg, to allow for parallel moves.
+            ],
+        ],
+        scratch_by_class: [
+            // We use tmp2 (x17) as the regalloc scratch register,
+            // used to resolve cyclic parallel moves. This is valid
+            // because tmp2 is never live between regalloc-visible
+            // instructions, only within them (i.e. in expansion into
+            // multiple machine instructions when that
+            // occurs). spilltmp is used for moves to/from spillslots,
+            // but tmp2 never is, so it is available for this
+            // purpose. (Its only other use is in prologue stack
+            // checks, and the prologue is prepended after regalloc
+            // runs.)
+            preg(tmp2_reg()),
+            // We use v31 for Float/Vec-class parallel moves.
+            preg(vreg(31)),
+        ],
+        fixed_stack_slots: vec![],
    };

-    regs.push((xreg(16).to_real_reg(), "x16".to_string()));
-    regs.push((xreg(17).to_real_reg(), "x17".to_string()));
-    regs.push((xreg(18).to_real_reg(), "x18".to_string()));
-    regs.push((fp_reg().to_real_reg(), "fp".to_string()));
-    regs.push((link_reg().to_real_reg(), "lr".to_string()));
-    regs.push((zero_reg().to_real_reg(), "xzr".to_string()));
-    regs.push((stack_reg().to_real_reg(), "sp".to_string()));
-
-    // FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs
-    // to 65, which is potentially inconvenient from a compiler performance
-    // standpoint.  We could possibly drop back to 64 by "losing" a vector
-    // register in future.
-
-    // Assert sanity: the indices in the register structs must match their
-    // actual indices in the array.
-    for (i, reg) in regs.iter().enumerate() {
-        assert_eq!(i, reg.0.get_index());
+    if !flags.enable_pinned_reg() {
+        debug_assert_eq!(PINNED_REG, 21); // We assumed this above in hardcoded reg list.
+        env.non_preferred_regs_by_class[0].push(preg(xreg(PINNED_REG)));
    }

-    RealRegUniverse {
-        regs,
-        allocable,
-        allocable_by_class,
+    env
+}
+
+// PrettyPrint cannot be implemented for Reg; we need to invoke
+// backend-specific functions from higher level (inst, arg, ...)
+// types.
+
+fn show_ireg(reg: RealReg) -> String {
+    match reg.hw_enc() {
+        29 => "fp".to_string(),
+        30 => "lr".to_string(),
+        31 => "xzr".to_string(),
+        63 => "sp".to_string(),
+        x => {
+            debug_assert!(x < 29);
+            format!("x{}", x)
+        }
    }
 }

-/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
+fn show_vreg(reg: RealReg) -> String {
+    format!("v{}", reg.hw_enc() & 31)
+}
+
+fn show_reg(reg: Reg) -> String {
+    if let Some(rreg) = reg.to_real_reg() {
+        match rreg.class() {
+            RegClass::Int => show_ireg(rreg),
+            RegClass::Float => show_vreg(rreg),
+        }
+    } else {
+        format!("%{:?}", reg)
+    }
+}
+
+pub fn pretty_print_reg(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> String {
+    let reg = allocs.next(reg);
+    show_reg(reg)
+}
+
+/// If `ireg` denotes an Int-classed reg, make a best-effort attempt to show
 /// its name at the 32-bit size.
-pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: OperandSize) -> String {
-    let mut s = reg.show_rru(mb_rru);
-    if reg.get_class() != RegClass::I64 || !size.is32() {
+pub fn show_ireg_sized(reg: Reg, size: OperandSize) -> String {
+    let mut s = show_reg(reg);
+    if reg.class() != RegClass::Int || !size.is32() {
        // We can't do any better.
        return s;
    }

-    if reg.is_real() {
-        // Change (eg) "x42" into "w42" as appropriate
-        if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") {
-            s = "w".to_string() + &s[1..];
-        }
-    } else {
-        // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role
-        if reg.get_class() == RegClass::I64 && size.is32() {
-            s.push('w');
-        }
+    // Change (eg) "x42" into "w42" as appropriate
+    if reg.class() == RegClass::Int && size.is32() && s.starts_with("x") {
+        s = "w".to_string() + &s[1..];
    }
+
    s
 }

 /// Show a vector register used in a scalar context.
-pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: ScalarSize) -> String {
-    let mut s = reg.show_rru(mb_rru);
-    if reg.get_class() != RegClass::V128 {
+pub fn show_vreg_scalar(reg: Reg, size: ScalarSize) -> String {
+    let mut s = show_reg(reg);
+    if reg.class() != RegClass::Float {
        // We can't do any better.
        return s;
    }

-    if reg.is_real() {
-        // Change (eg) "v0" into "d0".
-        if s.starts_with("v") {
-            let replacement = match size {
-                ScalarSize::Size8 => "b",
-                ScalarSize::Size16 => "h",
-                ScalarSize::Size32 => "s",
-                ScalarSize::Size64 => "d",
-                ScalarSize::Size128 => "q",
-            };
-            s.replace_range(0..1, replacement);
-        }
-    } else {
-        // Add a "d" suffix to RegClass::V128 vregs.
-        if reg.get_class() == RegClass::V128 {
-            s.push('d');
-        }
+    // Change (eg) "v0" into "d0".
+    if s.starts_with("v") {
+        let replacement = match size {
+            ScalarSize::Size8 => "b",
+            ScalarSize::Size16 => "h",
+            ScalarSize::Size32 => "s",
+            ScalarSize::Size64 => "d",
+            ScalarSize::Size128 => "q",
+        };
+        s.replace_range(0..1, replacement);
    }
+
    s
 }

 /// Show a vector register.
-pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String {
-    assert_eq!(RegClass::V128, reg.get_class());
-    let mut s = reg.show_rru(mb_rru);
+pub fn show_vreg_vector(reg: Reg, size: VectorSize) -> String {
+    assert_eq!(RegClass::Float, reg.class());
+    let mut s = show_reg(reg);

    let suffix = match size {
        VectorSize::Size8x8 => ".8b",
@@ -327,25 +335,54 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: Vector
 }

 /// Show an indexed vector element.
-pub fn show_vreg_element(
-    reg: Reg,
-    mb_rru: Option<&RealRegUniverse>,
-    idx: u8,
-    size: VectorSize,
-) -> String {
-    assert_eq!(RegClass::V128, reg.get_class());
-    let mut s = reg.show_rru(mb_rru);
-
+pub fn show_vreg_element(reg: Reg, idx: u8, size: VectorSize) -> String {
+    assert_eq!(RegClass::Float, reg.class());
+    let s = show_reg(reg);
    let suffix = match size {
-        VectorSize::Size8x8 => "b",
-        VectorSize::Size8x16 => "b",
-        VectorSize::Size16x4 => "h",
-        VectorSize::Size16x8 => "h",
-        VectorSize::Size32x2 => "s",
-        VectorSize::Size32x4 => "s",
-        VectorSize::Size64x2 => "d",
+        VectorSize::Size8x8 => ".b",
+        VectorSize::Size8x16 => ".b",
+        VectorSize::Size16x4 => ".h",
+        VectorSize::Size16x8 => ".h",
+        VectorSize::Size32x2 => ".s",
+        VectorSize::Size32x4 => ".s",
+        VectorSize::Size64x2 => ".d",
    };
-
-    s.push_str(&format!(".{}[{}]", suffix, idx));
-    s
+    format!("{}{}[{}]", s, suffix, idx)
+}
+
+pub fn pretty_print_ireg(
+    reg: Reg,
+    size: OperandSize,
+    allocs: &mut AllocationConsumer<'_>,
+) -> String {
+    let reg = allocs.next(reg);
+    show_ireg_sized(reg, size)
+}
+
+pub fn pretty_print_vreg_scalar(
+    reg: Reg,
+    size: ScalarSize,
+    allocs: &mut AllocationConsumer<'_>,
+) -> String {
+    let reg = allocs.next(reg);
+    show_vreg_scalar(reg, size)
+}
+
+pub fn pretty_print_vreg_vector(
+    reg: Reg,
+    size: VectorSize,
+    allocs: &mut AllocationConsumer<'_>,
+) -> String {
+    let reg = allocs.next(reg);
+    show_vreg_vector(reg, size)
+}
+
+pub fn pretty_print_vreg_element(
+    reg: Reg,
+    idx: usize,
+    size: VectorSize,
+    allocs: &mut AllocationConsumer<'_>,
+) -> String {
+    let reg = allocs.next(reg);
+    show_vreg_element(reg, idx as u8, size)
 }