Switch Cranelift over to regalloc2. (#3989)

This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
2022-04-14 10:28:21 -07:00
parent bfae6384aa
commit a0318f36f0
181 changed files with 16887 additions and 21587 deletions
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -20,7 +20,6 @@ use crate::settings::{Flags, TlsModel};
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use log::trace;
-use regalloc::{Reg, RegClass, Writable};
 use smallvec::SmallVec;
 use std::convert::TryFrom;
 use target_lexicon::Triple;
@@ -1005,7 +1004,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                // simply use the flags here.
                let cc = CC::from_intcc(cond_code);

-                ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
+                ctx.emit(Inst::TrapIf { trap_code, cc });
            } else if op == Opcode::Trapif {
                let cond_code = ctx.data(insn).cond_code().unwrap();

@@ -1014,7 +1013,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                let cond_code = emit_cmp(ctx, ifcmp, cond_code);
                let cc = CC::from_intcc(cond_code);

-                ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
+                ctx.emit(Inst::TrapIf { trap_code, cc });
            } else {
                let cond_code = ctx.data(insn).fp_cond_code().unwrap();

@@ -1022,9 +1021,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap();

                match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) {
-                    FcmpCondResult::Condition(cc) => {
-                        ctx.emit_safepoint(Inst::TrapIf { trap_code, cc })
-                    }
+                    FcmpCondResult::Condition(cc) => ctx.emit(Inst::TrapIf { trap_code, cc }),
                    FcmpCondResult::AndConditions(cc1, cc2) => {
                        // A bit unfortunate, but materialize the flags in their own register, and
                        // check against this.
@@ -1038,14 +1035,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                            RegMemImm::reg(tmp.to_reg()),
                            tmp2,
                        ));
-                        ctx.emit_safepoint(Inst::TrapIf {
+                        ctx.emit(Inst::TrapIf {
                            trap_code,
                            cc: CC::NZ,
                        });
                    }
                    FcmpCondResult::OrConditions(cc1, cc2) => {
-                        ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc1 });
-                        ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc2 });
+                        ctx.emit(Inst::TrapIf { trap_code, cc: cc1 });
+                        ctx.emit(Inst::TrapIf { trap_code, cc: cc2 });
                    }
                    FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
                };
@@ -2917,7 +2914,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let src_ty = ctx.input_ty(insn, 0);
            debug_assert!(src_ty.is_vector() && src_ty.bits() == 128);
            let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+            debug_assert!(dst.to_reg().class() == RegClass::Int);

            // The Intel specification allows using both 32-bit and 64-bit GPRs as destination for
            // the "move mask" instructions. This is controlled by the REX.R bit: "In 64-bit mode,