Switch Cranelift over to regalloc2. (#3989)

This PR switches Cranelift over to the new register allocator, regalloc2.

See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801)
for a summary of the design changes. This switchover has implications for
core VCode/MachInst types and the lowering pass.

Overall, this change brings improvements to both compile time and speed of
generated code (runtime), as reported in #3942:

```
Benchmark       Compilation (wallclock)     Execution (wallclock)
blake3-scalar   25% faster                  28% faster
blake3-simd     no diff                     no diff
meshoptimizer   19% faster                  17% faster
pulldown-cmark  17% faster                  no diff
bz2             15% faster                  no diff
SpiderMonkey,   21% faster                  2% faster
  fib(30)
clang.wasm      42% faster                  N/A
```
This commit is contained in:
Chris Fallin
2022-04-14 10:28:21 -07:00
committed by GitHub
parent bfae6384aa
commit a0318f36f0
181 changed files with 16887 additions and 21587 deletions

View File

@@ -20,7 +20,6 @@ use crate::settings::{Flags, TlsModel};
use alloc::boxed::Box;
use alloc::vec::Vec;
use log::trace;
use regalloc::{Reg, RegClass, Writable};
use smallvec::SmallVec;
use std::convert::TryFrom;
use target_lexicon::Triple;
@@ -1005,7 +1004,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// simply use the flags here.
let cc = CC::from_intcc(cond_code);
ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
ctx.emit(Inst::TrapIf { trap_code, cc });
} else if op == Opcode::Trapif {
let cond_code = ctx.data(insn).cond_code().unwrap();
@@ -1014,7 +1013,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let cond_code = emit_cmp(ctx, ifcmp, cond_code);
let cc = CC::from_intcc(cond_code);
ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
ctx.emit(Inst::TrapIf { trap_code, cc });
} else {
let cond_code = ctx.data(insn).fp_cond_code().unwrap();
@@ -1022,9 +1021,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap();
match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) {
FcmpCondResult::Condition(cc) => {
ctx.emit_safepoint(Inst::TrapIf { trap_code, cc })
}
FcmpCondResult::Condition(cc) => ctx.emit(Inst::TrapIf { trap_code, cc }),
FcmpCondResult::AndConditions(cc1, cc2) => {
// A bit unfortunate, but materialize the flags in their own register, and
// check against this.
@@ -1038,14 +1035,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
RegMemImm::reg(tmp.to_reg()),
tmp2,
));
ctx.emit_safepoint(Inst::TrapIf {
ctx.emit(Inst::TrapIf {
trap_code,
cc: CC::NZ,
});
}
FcmpCondResult::OrConditions(cc1, cc2) => {
ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc1 });
ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc2 });
ctx.emit(Inst::TrapIf { trap_code, cc: cc1 });
ctx.emit(Inst::TrapIf { trap_code, cc: cc2 });
}
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
};
@@ -2917,7 +2914,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let src_ty = ctx.input_ty(insn, 0);
debug_assert!(src_ty.is_vector() && src_ty.bits() == 128);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
debug_assert!(dst.to_reg().class() == RegClass::Int);
// The Intel specification allows using both 32-bit and 64-bit GPRs as destination for
// the "move mask" instructions. This is controlled by the REX.R bit: "In 64-bit mode,