Switch Cranelift over to regalloc2. (#3989)

This PR switches Cranelift over to the new register allocator, regalloc2.

See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801)
for a summary of the design changes. This switchover has implications for
core VCode/MachInst types and the lowering pass.

Overall, this change brings improvements to both compile time and speed of
generated code (runtime), as reported in #3942:

```
Benchmark       Compilation (wallclock)     Execution (wallclock)
blake3-scalar   25% faster                  28% faster
blake3-simd     no diff                     no diff
meshoptimizer   19% faster                  17% faster
pulldown-cmark  17% faster                  no diff
bz2             15% faster                  no diff
SpiderMonkey,   21% faster                  2% faster
  fib(30)
clang.wasm      42% faster                  N/A
```
This commit is contained in:
Chris Fallin
2022-04-14 10:28:21 -07:00
committed by GitHub
parent bfae6384aa
commit a0318f36f0
181 changed files with 16887 additions and 21587 deletions

View File

@@ -11,7 +11,7 @@ use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use args::*;
use regalloc::{RealReg, Reg, RegClass, Set, Writable};
use regalloc2::VReg;
use smallvec::{smallvec, SmallVec};
use std::convert::TryFrom;
@@ -32,7 +32,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
&ir::ArgumentPurpose::VMContext => {
// This is SpiderMonkey's `WasmTlsReg`.
Some(ABIArg::reg(
regs::r14().to_real_reg(),
regs::r14().to_real_reg().unwrap(),
types::I64,
param.extension,
param.purpose,
@@ -41,7 +41,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
&ir::ArgumentPurpose::SignatureId => {
// This is SpiderMonkey's `WasmTableCallSigReg`.
Some(ABIArg::reg(
regs::r10().to_real_reg(),
regs::r10().to_real_reg().unwrap(),
types::I64,
param.extension,
param.purpose,
@@ -204,7 +204,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let mut slots = vec![];
for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
let intreg = *rc == RegClass::I64;
let intreg = *rc == RegClass::Int;
let nextreg = if intreg {
match args_or_rets {
ArgsOrRets::Args => {
@@ -232,7 +232,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
next_vreg += 1;
}
slots.push(ABIArgSlot::Reg {
reg: reg.to_real_reg(),
reg: reg.to_real_reg().unwrap(),
ty: *reg_ty,
extension: param.extension,
});
@@ -277,7 +277,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
ret.push(ABIArg::reg(
reg.to_real_reg(),
reg.to_real_reg().unwrap(),
types::I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
@@ -357,8 +357,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
}
}
fn gen_ret() -> Self::I {
Inst::ret()
fn gen_ret(rets: Vec<Reg>) -> Self::I {
Inst::ret(rets)
}
fn gen_epilogue_placeholder() -> Self::I {
@@ -397,8 +397,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
fn get_stacklimit_reg() -> Reg {
debug_assert!(
!is_callee_save_systemv(regs::r10().to_real_reg())
&& !is_callee_save_baldrdash(regs::r10().to_real_reg())
!is_callee_save_systemv(regs::r10().to_real_reg().unwrap())
&& !is_callee_save_baldrdash(regs::r10().to_real_reg().unwrap())
);
// As per comment on trait definition, we must return a caller-save
@@ -499,7 +499,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
_call_conv: isa::CallConv,
setup_frame: bool,
flags: &settings::Flags,
clobbered_callee_saves: &Vec<Writable<RealReg>>,
clobbered_callee_saves: &[Writable<RealReg>],
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> (u64, SmallVec<[Self::I; 16]>) {
@@ -536,25 +536,24 @@ impl ABIMachineSpec for X64ABIMachineSpec {
for reg in clobbered_callee_saves {
let r_reg = reg.to_reg();
let off = cur_offset;
match r_reg.get_class() {
RegClass::I64 => {
match r_reg.class() {
RegClass::Int => {
insts.push(Inst::store(
types::I64,
r_reg.to_reg(),
r_reg.into(),
Amode::imm_reg(cur_offset, regs::rsp()),
));
cur_offset += 8;
}
RegClass::V128 => {
RegClass::Float => {
cur_offset = align_to(cur_offset, 16);
insts.push(Inst::store(
types::I8X16,
r_reg.to_reg(),
r_reg.into(),
Amode::imm_reg(cur_offset, regs::rsp()),
));
cur_offset += 16;
}
_ => unreachable!(),
};
if flags.unwind_info() {
insts.push(Inst::Unwind {
@@ -572,7 +571,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
fn gen_clobber_restore(
call_conv: isa::CallConv,
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
clobbers: &[Writable<RealReg>],
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> SmallVec<[Self::I; 16]> {
@@ -587,25 +586,24 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let mut cur_offset = fixed_frame_storage_size;
for reg in &clobbered_callee_saves {
let rreg = reg.to_reg();
match rreg.get_class() {
RegClass::I64 => {
match rreg.class() {
RegClass::Int => {
insts.push(Inst::mov64_m_r(
Amode::imm_reg(cur_offset, regs::rsp()),
Writable::from_reg(rreg.to_reg()),
Writable::from_reg(rreg.into()),
));
cur_offset += 8;
}
RegClass::V128 => {
RegClass::Float => {
cur_offset = align_to(cur_offset, 16);
insts.push(Inst::load(
types::I8X16,
Amode::imm_reg(cur_offset, regs::rsp()),
Writable::from_reg(rreg.to_reg()),
Writable::from_reg(rreg.into()),
ExtKind::None,
));
cur_offset += 16;
}
_ => unreachable!(),
}
}
// Adjust RSP back upward.
@@ -641,34 +639,27 @@ impl ABIMachineSpec for X64ABIMachineSpec {
tmp: Writable<Reg>,
_callee_conv: isa::CallConv,
_caller_conv: isa::CallConv,
) -> SmallVec<[(InstIsSafepoint, Self::I); 2]> {
) -> SmallVec<[Self::I; 2]> {
let mut insts = SmallVec::new();
match dest {
&CallDest::ExtName(ref name, RelocDistance::Near) => {
insts.push((
InstIsSafepoint::Yes,
Inst::call_known(name.clone(), uses, defs, opcode),
));
insts.push(Inst::call_known(name.clone(), uses, defs, opcode));
}
&CallDest::ExtName(ref name, RelocDistance::Far) => {
insts.push((
InstIsSafepoint::No,
Inst::LoadExtName {
dst: tmp,
name: Box::new(name.clone()),
offset: 0,
},
));
insts.push((
InstIsSafepoint::Yes,
Inst::call_unknown(RegMem::reg(tmp.to_reg()), uses, defs, opcode),
insts.push(Inst::LoadExtName {
dst: tmp,
name: Box::new(name.clone()),
offset: 0,
});
insts.push(Inst::call_unknown(
RegMem::reg(tmp.to_reg()),
uses,
defs,
opcode,
));
}
&CallDest::Reg(reg) => {
insts.push((
InstIsSafepoint::Yes,
Inst::call_unknown(RegMem::reg(reg), uses, defs, opcode),
));
insts.push(Inst::call_unknown(RegMem::reg(reg), uses, defs, opcode));
}
}
insts
@@ -722,9 +713,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
// We allocate in terms of 8-byte slots.
match rc {
RegClass::I64 => 1,
RegClass::V128 => 2,
_ => panic!("Unexpected register class!"),
RegClass::Int => 1,
RegClass::Float => 2,
}
}
@@ -798,7 +788,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
fn get_clobbered_callee_saves(
call_conv: CallConv,
regs: &Set<Writable<RealReg>>,
regs: &[Writable<RealReg>],
) -> Vec<Writable<RealReg>> {
let mut regs: Vec<Writable<RealReg>> = match call_conv {
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
@@ -824,7 +814,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
};
// Sort registers for deterministic code output. We can do an unstable sort because the
// registers will be unique (there are no dups).
regs.sort_unstable_by_key(|r| r.to_reg().get_index());
regs.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg());
regs
}
@@ -981,21 +971,20 @@ fn get_fltreg_for_retval(
fn is_callee_save_systemv(r: RealReg) -> bool {
use regs::*;
match r.get_class() {
RegClass::I64 => match r.get_hw_encoding() as u8 {
match r.class() {
RegClass::Int => match r.hw_enc() {
ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
_ => false,
},
RegClass::V128 => false,
_ => unimplemented!(),
RegClass::Float => false,
}
}
fn is_callee_save_baldrdash(r: RealReg) -> bool {
use regs::*;
match r.get_class() {
RegClass::I64 => {
if r.get_hw_encoding() as u8 == ENC_R14 {
match r.class() {
RegClass::Int => {
if r.hw_enc() == ENC_R14 {
// r14 is the WasmTlsReg and is preserved implicitly.
false
} else {
@@ -1003,38 +992,35 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool {
is_callee_save_systemv(r)
}
}
RegClass::V128 => false,
_ => unimplemented!(),
RegClass::Float => false,
}
}
fn is_callee_save_fastcall(r: RealReg) -> bool {
use regs::*;
match r.get_class() {
RegClass::I64 => match r.get_hw_encoding() as u8 {
match r.class() {
RegClass::Int => match r.hw_enc() {
ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
_ => false,
},
RegClass::V128 => match r.get_hw_encoding() as u8 {
RegClass::Float => match r.hw_enc() {
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
_ => false,
},
_ => panic!("Unknown register class: {:?}", r.get_class()),
}
}
fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
let mut clobbered_size = 0;
for reg in clobbers {
match reg.to_reg().get_class() {
RegClass::I64 => {
match reg.to_reg().class() {
RegClass::Int => {
clobbered_size += 8;
}
RegClass::V128 => {
RegClass::Float => {
clobbered_size = align_to(clobbered_size, 16);
clobbered_size += 16;
}
_ => unreachable!(),
}
}
align_to(clobbered_size, 16)

View File

@@ -369,8 +369,8 @@ mod tests {
.map(OpcodeMap::_0F38)
.w(true)
.opcode(0x1F)
.reg(dst.get_hw_encoding())
.rm(src.get_hw_encoding())
.reg(dst.to_real_reg().unwrap().hw_enc())
.rm(src.to_real_reg().unwrap().hw_enc())
.length(EvexVectorLength::V128)
.encode(&mut sink0);
@@ -393,8 +393,8 @@ mod tests {
.map(OpcodeMap::None)
.w(false)
.opcode(0x00)
.reg(regs::rax().get_hw_encoding())
.rm(regs::rax().get_hw_encoding())
.reg(regs::rax().to_real_reg().unwrap().hw_enc())
.rm(regs::rax().to_real_reg().unwrap().hw_enc())
.mask(EvexMasking::None)
.encode(&mut sink1);

View File

@@ -8,6 +8,7 @@
//! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following
//! means "hardware register encoding number".
use crate::machinst::{Reg, RegClass};
use crate::{
ir::TrapCode,
isa::x64::inst::{
@@ -16,7 +17,6 @@ use crate::{
},
machinst::MachBuffer,
};
use regalloc::{Reg, RegClass};
pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool {
let xs = (x as i32) as i64;
@@ -50,8 +50,8 @@ pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
pub(crate) fn int_reg_enc(reg: impl Into<Reg>) -> u8 {
let reg = reg.into();
debug_assert!(reg.is_real());
debug_assert_eq!(reg.get_class(), RegClass::I64);
reg.get_hw_encoding()
debug_assert_eq!(reg.class(), RegClass::Int);
reg.to_real_reg().unwrap().hw_enc()
}
/// Get the encoding number of any register.
@@ -59,7 +59,7 @@ pub(crate) fn int_reg_enc(reg: impl Into<Reg>) -> u8 {
pub(crate) fn reg_enc(reg: impl Into<Reg>) -> u8 {
let reg = reg.into();
debug_assert!(reg.is_real());
reg.get_hw_encoding()
reg.to_real_reg().unwrap().hw_enc()
}
/// A small bit field to record a REX prefix specification:

View File

@@ -41,7 +41,8 @@
(Div (size OperandSize) ;; 1, 2, 4, or 8
(signed bool)
(divisor GprMem)
(dividend Gpr)
(dividend_lo Gpr)
(dividend_hi Gpr)
(dst_quotient WritableGpr)
(dst_remainder WritableGpr))
@@ -69,7 +70,8 @@
;; regalloc failures where %rdx is live before its first def!
(CheckedDivOrRemSeq (kind DivOrRemKind)
(size OperandSize)
(dividend Gpr)
(dividend_lo Gpr)
(dividend_hi Gpr)
;; The divisor operand. Note it's marked as modified
;; so that it gets assigned a register different from
;; the temporary.
@@ -318,7 +320,7 @@
(opcode Opcode))
;; Return.
(Ret)
(Ret (rets VecReg))
;; A placeholder instruction, generating no code, meaning that a function
;; epilogue must be inserted there.
@@ -476,13 +478,12 @@
;; `rax`.
(MachOTlsGetAddr (symbol ExternalName))
;; A definition of a value label.
(ValueLabelMarker (reg Reg)
(label ValueLabel))
;; An unwind pseudoinstruction describing the state of the machine at
;; this program point.
(Unwind (inst UnwindInst))))
(Unwind (inst UnwindInst))
;; A pseudoinstruction that just keeps a value alive.
(DummyUse (reg Reg))))
(type OperandSize extern
(enum Size8

View File

@@ -1,14 +1,13 @@
//! Instruction operand sub-components (aka "parts"): definitions and printing.
use super::regs::{self, show_ireg_sized};
use super::regs::{self};
use super::EmitState;
use crate::ir::condcodes::{FloatCC, IntCC};
use crate::ir::{MemFlags, Type};
use crate::isa::x64::inst::regs::pretty_print_reg;
use crate::isa::x64::inst::Inst;
use crate::machinst::*;
use regalloc::{
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, Writable,
};
use regalloc2::VReg;
use smallvec::{smallvec, SmallVec};
use std::fmt;
use std::string::String;
@@ -23,25 +22,6 @@ pub trait FromWritableReg: Sized {
fn from_writable_reg(w: Writable<Reg>) -> Option<Self>;
}
/// An extension trait for mapping register uses on `{Xmm,Gpr}`.
pub trait MapUseExt {
fn map_use<RM>(&mut self, mapper: &RM)
where
RM: RegMapper;
}
/// An extension trait for mapping register mods and defs on
/// `Writable{Xmm,Gpr}`.
pub trait MapDefModExt {
fn map_def<RM>(&mut self, mapper: &RM)
where
RM: RegMapper;
fn map_mod<RM>(&mut self, mapper: &RM)
where
RM: RegMapper;
}
/// A macro for defining a newtype of `Reg` that enforces some invariant about
/// the wrapped `Reg` (such as that it is of a particular register class).
macro_rules! newtype_of_reg {
@@ -55,7 +35,7 @@ macro_rules! newtype_of_reg {
|$check_reg:ident| $check:expr
) => {
/// A newtype wrapper around `Reg`.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct $newtype_reg(Reg);
impl PartialEq<Reg> for $newtype_reg {
@@ -70,12 +50,6 @@ macro_rules! newtype_of_reg {
}
}
impl PrettyPrint for $newtype_reg {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.0.show_rru(mb_rru)
}
}
impl $newtype_reg {
/// Create this newtype from the given register, or return `None` if the register
/// is not a valid instance of this newtype.
@@ -107,21 +81,6 @@ macro_rules! newtype_of_reg {
}
}
impl MapUseExt for $newtype_reg {
fn map_use<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
let mut reg = self.0;
mapper.map_use(&mut reg);
debug_assert!({
let $check_reg = reg;
$check
});
*self = $newtype_reg(reg);
}
}
pub type $newtype_writable_reg = Writable<$newtype_reg>;
#[allow(dead_code)] // Used by some newtypes and not others.
@@ -139,34 +98,6 @@ macro_rules! newtype_of_reg {
}
}
impl MapDefModExt for $newtype_writable_reg {
fn map_def<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
let mut reg = self.to_writable_reg();
mapper.map_def(&mut reg);
debug_assert!({
let $check_reg = reg.to_reg();
$check
});
*self = Writable::from_reg($newtype_reg(reg.to_reg()));
}
fn map_mod<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
let mut reg = self.to_writable_reg();
mapper.map_mod(&mut reg);
debug_assert!({
let $check_reg = reg.to_reg();
$check
});
*self = Writable::from_reg($newtype_reg(reg.to_reg()));
}
}
/// A newtype wrapper around `RegMem` for general-purpose registers.
#[derive(Clone, Debug)]
pub struct $newtype_reg_mem(RegMem);
@@ -201,44 +132,16 @@ macro_rules! newtype_of_reg {
}
#[allow(dead_code)] // Used by some newtypes and not others.
pub fn map_uses<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
self.0.map_uses(mapper);
debug_assert!(match self.0 {
RegMem::Reg { reg: $check_reg } => $check,
_ => true,
});
}
#[allow(dead_code)] // Used by some newtypes and not others.
pub fn map_as_def<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
self.0.map_as_def(mapper);
debug_assert!(match self.0 {
RegMem::Reg { reg: $check_reg } => $check,
_ => true,
});
}
#[allow(dead_code)] // Used by some newtypes and not others.
pub fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
self.0.get_regs_as_uses(collector);
pub fn get_operands<F: Fn(VReg) -> VReg>(
&self,
collector: &mut OperandCollector<'_, F>,
) {
self.0.get_operands(collector);
}
}
impl PrettyPrint for $newtype_reg_mem {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.0.show_rru(mb_rru)
}
}
impl PrettyPrintSized for $newtype_reg_mem {
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
self.0.show_rru_sized(mb_rru, size)
fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
self.0.pretty_print(size, allocs)
}
}
@@ -278,44 +181,17 @@ macro_rules! newtype_of_reg {
}
#[allow(dead_code)] // Used by some newtypes and not others.
pub fn map_uses<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
self.0.map_uses(mapper);
debug_assert!(match self.0 {
RegMemImm::Reg { reg: $check_reg } => $check,
_ => true,
});
}
#[allow(dead_code)] // Used by some newtypes and not others.
pub fn map_as_def<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
self.0.map_as_def(mapper);
debug_assert!(match self.0 {
RegMemImm::Reg { reg: $check_reg } => $check,
_ => true,
});
}
#[allow(dead_code)] // Used by some newtypes and not others.
pub fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
self.0.get_regs_as_uses(collector);
pub fn get_operands<F: Fn(VReg) -> VReg>(
&self,
collector: &mut OperandCollector<'_, F>,
) {
self.0.get_operands(collector);
}
}
impl PrettyPrint for $newtype_reg_mem_imm {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.0.show_rru(mb_rru)
}
}
impl PrettyPrintSized for $newtype_reg_mem_imm {
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
self.0.show_rru_sized(mb_rru, size)
fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
self.0.pretty_print(size, allocs)
}
}
@@ -359,7 +235,7 @@ newtype_of_reg!(
GprMem,
GprMemImm,
Imm8Gpr,
|reg| reg.get_class() == RegClass::I64
|reg| reg.class() == RegClass::Int
);
// Define a newtype of `Reg` for XMM registers.
@@ -370,7 +246,7 @@ newtype_of_reg!(
XmmMem,
XmmMemImm,
Imm8Xmm,
|reg| reg.get_class() == RegClass::V128
|reg| reg.class() == RegClass::Float
);
/// A possible addressing mode (amode) that can be used in instructions.
@@ -400,7 +276,7 @@ pub enum Amode {
impl Amode {
pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
debug_assert!(base.get_class() == RegClass::I64);
debug_assert!(base.class() == RegClass::Int);
Self::ImmReg {
simm32,
base,
@@ -409,8 +285,8 @@ impl Amode {
}
pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Gpr, index: Gpr, shift: u8) -> Self {
debug_assert!(base.get_class() == RegClass::I64);
debug_assert!(index.get_class() == RegClass::I64);
debug_assert!(base.class() == RegClass::Int);
debug_assert!(index.class() == RegClass::Int);
debug_assert!(shift <= 3);
Self::ImmRegRegShift {
simm32,
@@ -450,14 +326,17 @@ impl Amode {
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
pub(crate) fn get_operands<F: Fn(VReg) -> VReg>(
&self,
collector: &mut OperandCollector<'_, F>,
) {
match self {
Amode::ImmReg { base, .. } => {
collector.add_use(*base);
collector.reg_use(*base);
}
Amode::ImmRegRegShift { base, index, .. } => {
collector.add_use(base.to_reg());
collector.add_use(index.to_reg());
collector.reg_use(base.to_reg());
collector.reg_use(index.to_reg());
}
Amode::RipRelative { .. } => {
// RIP isn't involved in regalloc.
@@ -476,13 +355,56 @@ impl Amode {
pub(crate) fn can_trap(&self) -> bool {
!self.get_flags().notrap()
}
pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
// The order in which we consume allocs here must match the
// order in which we produce operands in get_operands() above.
match self {
&Amode::ImmReg {
simm32,
base,
flags,
} => Amode::ImmReg {
simm32,
flags,
base: allocs.next(base),
},
&Amode::ImmRegRegShift {
simm32,
base,
index,
shift,
flags,
} => Amode::ImmRegRegShift {
simm32,
shift,
flags,
base: Gpr::new(allocs.next(*base)).unwrap(),
index: Gpr::new(allocs.next(*index)).unwrap(),
},
&Amode::RipRelative { target } => Amode::RipRelative { target },
}
}
/// Offset the amode by a fixed offset.
pub(crate) fn offset(&self, offset: u32) -> Self {
let mut ret = self.clone();
match &mut ret {
&mut Amode::ImmReg { ref mut simm32, .. } => *simm32 += offset,
&mut Amode::ImmRegRegShift { ref mut simm32, .. } => *simm32 += offset,
_ => panic!("Cannot offset amode: {:?}", self),
}
ret
}
}
impl PrettyPrint for Amode {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
fn pretty_print(&self, _size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
match self {
Amode::ImmReg { simm32, base, .. } => {
format!("{}({})", *simm32 as i32, base.show_rru(mb_rru))
// Note: size is always 8; the address is 64 bits,
// even if the addressed operand is smaller.
format!("{}({})", *simm32 as i32, pretty_print_reg(*base, 8, allocs))
}
Amode::ImmRegRegShift {
simm32,
@@ -493,8 +415,8 @@ impl PrettyPrint for Amode {
} => format!(
"{}({},{},{})",
*simm32 as i32,
base.show_rru(mb_rru),
index.show_rru(mb_rru),
pretty_print_reg(base.to_reg(), 8, allocs),
pretty_print_reg(index.to_reg(), 8, allocs),
1 << shift
),
Amode::RipRelative { ref target } => format!("label{}(%rip)", target.get()),
@@ -524,9 +446,12 @@ impl SyntheticAmode {
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
pub(crate) fn get_operands<F: Fn(VReg) -> VReg>(
&self,
collector: &mut OperandCollector<'_, F>,
) {
match self {
SyntheticAmode::Real(addr) => addr.get_regs_as_uses(collector),
SyntheticAmode::Real(addr) => addr.get_operands(collector),
SyntheticAmode::NominalSPOffset { .. } => {
// Nothing to do; the base is SP and isn't involved in regalloc.
}
@@ -534,16 +459,6 @@ impl SyntheticAmode {
}
}
pub(crate) fn map_uses<RM: RegMapper>(&mut self, map: &RM) {
match self {
SyntheticAmode::Real(addr) => addr.map_uses(map),
SyntheticAmode::NominalSPOffset { .. } => {
// Nothing to do.
}
SyntheticAmode::ConstantOffset(_) => {}
}
}
pub(crate) fn finalize(&self, state: &mut EmitState, buffer: &MachBuffer<Inst>) -> Amode {
match self {
SyntheticAmode::Real(addr) => addr.clone(),
@@ -561,6 +476,15 @@ impl SyntheticAmode {
}
}
}
pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
match self {
SyntheticAmode::Real(addr) => SyntheticAmode::Real(addr.with_allocs(allocs)),
&SyntheticAmode::NominalSPOffset { .. } | &SyntheticAmode::ConstantOffset { .. } => {
self.clone()
}
}
}
}
impl Into<SyntheticAmode> for Amode {
@@ -570,9 +494,10 @@ impl Into<SyntheticAmode> for Amode {
}
impl PrettyPrint for SyntheticAmode {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
fn pretty_print(&self, _size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
match self {
SyntheticAmode::Real(addr) => addr.show_rru(mb_rru),
// See note in `Amode` regarding constant size of `8`.
SyntheticAmode::Real(addr) => addr.pretty_print(8, allocs),
SyntheticAmode::NominalSPOffset { simm32 } => {
format!("rsp({} + virtual offset)", *simm32 as i32)
}
@@ -594,7 +519,7 @@ pub enum RegMemImm {
impl RegMemImm {
pub(crate) fn reg(reg: Reg) -> Self {
debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128);
debug_assert!(reg.class() == RegClass::Int || reg.class() == RegClass::Float);
Self::Reg { reg }
}
pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
@@ -607,15 +532,18 @@ impl RegMemImm {
/// Asserts that in register mode, the reg class is the one that's expected.
pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) {
if let Self::Reg { reg } = self {
debug_assert_eq!(reg.get_class(), expected_reg_class);
debug_assert_eq!(reg.class(), expected_reg_class);
}
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
pub(crate) fn get_operands<F: Fn(VReg) -> VReg>(
&self,
collector: &mut OperandCollector<'_, F>,
) {
match self {
Self::Reg { reg } => collector.add_use(*reg),
Self::Mem { addr } => addr.get_regs_as_uses(collector),
Self::Reg { reg } => collector.reg_use(*reg),
Self::Mem { addr } => addr.get_operands(collector),
Self::Imm { .. } => {}
}
}
@@ -626,19 +554,25 @@ impl RegMemImm {
_ => None,
}
}
}
impl PrettyPrint for RegMemImm {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.show_rru_sized(mb_rru, 8)
pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
match self {
Self::Reg { reg } => Self::Reg {
reg: allocs.next(*reg),
},
Self::Mem { addr } => Self::Mem {
addr: addr.with_allocs(allocs),
},
Self::Imm { .. } => self.clone(),
}
}
}
impl PrettyPrintSized for RegMemImm {
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
impl PrettyPrint for RegMemImm {
fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
match self {
Self::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
Self::Mem { addr } => addr.show_rru(mb_rru),
Self::Reg { reg } => pretty_print_reg(*reg, size, allocs),
Self::Mem { addr } => addr.pretty_print(size, allocs),
Self::Imm { simm32 } => format!("${}", *simm32 as i32),
}
}
@@ -673,7 +607,7 @@ pub enum RegMem {
impl RegMem {
pub(crate) fn reg(reg: Reg) -> Self {
debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128);
debug_assert!(reg.class() == RegClass::Int || reg.class() == RegClass::Float);
Self::Reg { reg }
}
pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
@@ -682,14 +616,17 @@ impl RegMem {
/// Asserts that in register mode, the reg class is the one that's expected.
pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) {
if let Self::Reg { reg } = self {
debug_assert_eq!(reg.get_class(), expected_reg_class);
debug_assert_eq!(reg.class(), expected_reg_class);
}
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
pub(crate) fn get_operands<F: Fn(VReg) -> VReg>(
&self,
collector: &mut OperandCollector<'_, F>,
) {
match self {
RegMem::Reg { reg } => collector.add_use(*reg),
RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
RegMem::Reg { reg } => collector.reg_use(*reg),
RegMem::Mem { addr, .. } => addr.get_operands(collector),
}
}
pub(crate) fn to_reg(&self) -> Option<Reg> {
@@ -698,6 +635,17 @@ impl RegMem {
_ => None,
}
}
pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self {
match self {
RegMem::Reg { reg } => RegMem::Reg {
reg: allocs.next(*reg),
},
RegMem::Mem { addr } => RegMem::Mem {
addr: addr.with_allocs(allocs),
},
}
}
}
impl From<Writable<Reg>> for RegMem {
@@ -707,16 +655,10 @@ impl From<Writable<Reg>> for RegMem {
}
impl PrettyPrint for RegMem {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.show_rru_sized(mb_rru, 8)
}
}
impl PrettyPrintSized for RegMem {
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
match self {
RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
RegMem::Mem { addr, .. } => addr.show_rru(mb_rru),
RegMem::Reg { reg } => pretty_print_reg(*reg, size, allocs),
RegMem::Mem { addr, .. } => addr.pretty_print(size, allocs),
}
}
}
@@ -1222,6 +1164,22 @@ impl SseOpcode {
_ => 8,
}
}
/// Does an XmmRmmRImm with this opcode use src1? FIXME: split
/// into separate instructions.
pub(crate) fn uses_src1(&self) -> bool {
match self {
SseOpcode::Pextrb => false,
SseOpcode::Pextrw => false,
SseOpcode::Pextrd => false,
SseOpcode::Pshufd => false,
SseOpcode::Roundss => false,
SseOpcode::Roundsd => false,
SseOpcode::Roundps => false,
SseOpcode::Roundpd => false,
_ => true,
}
}
}
impl fmt::Debug for SseOpcode {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,26 +1,14 @@
//! Registers, the Universe thereof, and printing.
//! Register definitions for regalloc2.
//!
//! These are ordered by sequence number, as required in the Universe.
//! We define 16 GPRs, with indices equal to the hardware encoding,
//! and 16 XMM registers.
//!
//! The caller-saved registers are placed first in order to prefer not to clobber (requiring
//! saves/restores in prologue/epilogue code) when possible. Note that there is no other heuristic
//! in the backend that will apply such pressure; the register allocator's cost heuristics are not
//! aware of the cost of clobber-save/restore code.
//!
//! One might worry that this pessimizes code with many callsites, where using caller-saves causes
//! us to have to save them (as we are the caller) frequently. However, the register allocator
//! *should be* aware of *this* cost, because it sees that the call instruction modifies all of the
//! caller-saved (i.e., callee-clobbered) registers.
//!
//! Hence, this ordering encodes pressure in one direction (prefer not to clobber registers that we
//! ourselves have to save) and this is balanaced against the RA's pressure in the other direction
//! at callsites.
//! Note also that we make use of pinned VRegs to refer to PRegs.
use crate::machinst::{AllocationConsumer, RealReg, Reg};
use crate::settings;
use alloc::vec::Vec;
use regalloc::{
PrettyPrint, RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES,
};
use alloc::string::ToString;
use regalloc2::{MachineEnv, PReg, RegClass, VReg};
use std::string::String;
// Hardware encodings (note the special rax, rcx, rdx, rbx order).
@@ -42,53 +30,62 @@ pub const ENC_R13: u8 = 13;
pub const ENC_R14: u8 = 14;
pub const ENC_R15: u8 = 15;
fn gpr(enc: u8, index: u8) -> Reg {
Reg::new_real(RegClass::I64, enc, index)
// Constructors for Regs.
fn gpr(enc: u8) -> Reg {
let preg = PReg::new(enc as usize, RegClass::Int);
Reg::from(VReg::new(preg.index(), RegClass::Int))
}
pub(crate) fn rsi() -> Reg {
gpr(ENC_RSI, 16)
gpr(ENC_RSI)
}
pub(crate) fn rdi() -> Reg {
gpr(ENC_RDI, 17)
gpr(ENC_RDI)
}
pub(crate) fn rax() -> Reg {
gpr(ENC_RAX, 18)
gpr(ENC_RAX)
}
pub(crate) fn rcx() -> Reg {
gpr(ENC_RCX, 19)
gpr(ENC_RCX)
}
pub(crate) fn rdx() -> Reg {
gpr(ENC_RDX, 20)
gpr(ENC_RDX)
}
pub(crate) fn r8() -> Reg {
gpr(ENC_R8, 21)
gpr(ENC_R8)
}
pub(crate) fn r9() -> Reg {
gpr(ENC_R9, 22)
gpr(ENC_R9)
}
pub(crate) fn r10() -> Reg {
gpr(ENC_R10, 23)
gpr(ENC_R10)
}
pub(crate) fn r11() -> Reg {
gpr(ENC_R11, 24)
gpr(ENC_R11)
}
pub(crate) fn r12() -> Reg {
gpr(ENC_R12, 25)
gpr(ENC_R12)
}
pub(crate) fn r13() -> Reg {
gpr(ENC_R13, 26)
gpr(ENC_R13)
}
pub(crate) fn r14() -> Reg {
gpr(ENC_R14, 27)
gpr(ENC_R14)
}
pub(crate) fn rbx() -> Reg {
gpr(ENC_RBX, 28)
gpr(ENC_RBX)
}
pub(crate) fn r15() -> Reg {
// r15 is put aside since this is the pinned register.
gpr(ENC_R15, 29)
gpr(ENC_R15)
}
pub(crate) fn rsp() -> Reg {
gpr(ENC_RSP)
}
pub(crate) fn rbp() -> Reg {
gpr(ENC_RBP)
}
/// The pinned register on this architecture.
@@ -98,163 +95,177 @@ pub(crate) fn pinned_reg() -> Reg {
r15()
}
fn fpr(enc: u8, index: u8) -> Reg {
Reg::new_real(RegClass::V128, enc, index)
fn fpr(enc: u8) -> Reg {
let preg = PReg::new(enc as usize, RegClass::Float);
Reg::from(VReg::new(preg.index(), RegClass::Float))
}
pub(crate) fn xmm0() -> Reg {
fpr(0, 0)
fpr(0)
}
pub(crate) fn xmm1() -> Reg {
fpr(1, 1)
fpr(1)
}
pub(crate) fn xmm2() -> Reg {
fpr(2, 2)
fpr(2)
}
pub(crate) fn xmm3() -> Reg {
fpr(3, 3)
fpr(3)
}
pub(crate) fn xmm4() -> Reg {
fpr(4, 4)
fpr(4)
}
pub(crate) fn xmm5() -> Reg {
fpr(5, 5)
fpr(5)
}
pub(crate) fn xmm6() -> Reg {
fpr(6, 6)
fpr(6)
}
pub(crate) fn xmm7() -> Reg {
fpr(7, 7)
fpr(7)
}
pub(crate) fn xmm8() -> Reg {
fpr(8, 8)
fpr(8)
}
pub(crate) fn xmm9() -> Reg {
fpr(9, 9)
fpr(9)
}
pub(crate) fn xmm10() -> Reg {
fpr(10, 10)
fpr(10)
}
pub(crate) fn xmm11() -> Reg {
fpr(11, 11)
fpr(11)
}
pub(crate) fn xmm12() -> Reg {
fpr(12, 12)
fpr(12)
}
pub(crate) fn xmm13() -> Reg {
fpr(13, 13)
fpr(13)
}
pub(crate) fn xmm14() -> Reg {
fpr(14, 14)
fpr(14)
}
pub(crate) fn xmm15() -> Reg {
fpr(15, 15)
fpr(15)
}
pub(crate) fn rsp() -> Reg {
gpr(ENC_RSP, 30)
}
pub(crate) fn rbp() -> Reg {
gpr(ENC_RBP, 31)
}
/// Create the register universe for X64.
///
/// The ordering of registers matters, as commented in the file doc comment: assumes the
/// calling-convention is SystemV, at the moment.
pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUniverse {
let mut regs = Vec::<(RealReg, String)>::new();
let mut allocable_by_class = [None; NUM_REG_CLASSES];
let use_pinned_reg = flags.enable_pinned_reg();
// XMM registers
let first_fpr = regs.len();
regs.push((xmm0().to_real_reg(), "%xmm0".into()));
regs.push((xmm1().to_real_reg(), "%xmm1".into()));
regs.push((xmm2().to_real_reg(), "%xmm2".into()));
regs.push((xmm3().to_real_reg(), "%xmm3".into()));
regs.push((xmm4().to_real_reg(), "%xmm4".into()));
regs.push((xmm5().to_real_reg(), "%xmm5".into()));
regs.push((xmm6().to_real_reg(), "%xmm6".into()));
regs.push((xmm7().to_real_reg(), "%xmm7".into()));
regs.push((xmm8().to_real_reg(), "%xmm8".into()));
regs.push((xmm9().to_real_reg(), "%xmm9".into()));
regs.push((xmm10().to_real_reg(), "%xmm10".into()));
regs.push((xmm11().to_real_reg(), "%xmm11".into()));
regs.push((xmm12().to_real_reg(), "%xmm12".into()));
regs.push((xmm13().to_real_reg(), "%xmm13".into()));
regs.push((xmm14().to_real_reg(), "%xmm14".into()));
regs.push((xmm15().to_real_reg(), "%xmm15".into()));
let last_fpr = regs.len() - 1;
// Integer regs.
let first_gpr = regs.len();
// Caller-saved, in the SystemV x86_64 ABI.
regs.push((rsi().to_real_reg(), "%rsi".into()));
regs.push((rdi().to_real_reg(), "%rdi".into()));
regs.push((rax().to_real_reg(), "%rax".into()));
regs.push((rcx().to_real_reg(), "%rcx".into()));
regs.push((rdx().to_real_reg(), "%rdx".into()));
regs.push((r8().to_real_reg(), "%r8".into()));
regs.push((r9().to_real_reg(), "%r9".into()));
regs.push((r10().to_real_reg(), "%r10".into()));
regs.push((r11().to_real_reg(), "%r11".into()));
// Callee-saved, in the SystemV x86_64 ABI.
regs.push((r12().to_real_reg(), "%r12".into()));
regs.push((r13().to_real_reg(), "%r13".into()));
regs.push((r14().to_real_reg(), "%r14".into()));
regs.push((rbx().to_real_reg(), "%rbx".into()));
// Other regs, not available to the allocator.
debug_assert_eq!(r15(), pinned_reg());
let allocable = if use_pinned_reg {
// The pinned register is not allocatable in this case, so record the length before adding
// it.
let len = regs.len();
regs.push((r15().to_real_reg(), "%r15/pinned".into()));
len
} else {
regs.push((r15().to_real_reg(), "%r15".into()));
regs.len()
};
let last_gpr = allocable - 1;
regs.push((rsp().to_real_reg(), "%rsp".into()));
regs.push((rbp().to_real_reg(), "%rbp".into()));
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
first: first_gpr,
last: last_gpr,
suggested_scratch: Some(r12().get_index()),
});
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
first: first_fpr,
last: last_fpr,
suggested_scratch: Some(xmm15().get_index()),
});
// Sanity-check: the index passed to the Reg ctor must match the order in the register list.
for (i, reg) in regs.iter().enumerate() {
assert_eq!(i, reg.0.get_index());
/// Create the register environment for x64.
pub(crate) fn create_reg_env_systemv(flags: &settings::Flags) -> MachineEnv {
fn preg(r: Reg) -> PReg {
r.to_real_reg().unwrap().into()
}
RealRegUniverse {
regs,
allocable,
allocable_by_class,
let mut env = MachineEnv {
preferred_regs_by_class: [
// Preferred GPRs: caller-saved in the SysV ABI.
vec![
preg(rsi()),
preg(rdi()),
preg(rax()),
preg(rcx()),
preg(rdx()),
preg(r8()),
preg(r9()),
// N.B.: not r10; it is our scratch reg.
preg(r11()),
],
// Preferred XMMs: all of them.
vec![
preg(xmm0()),
preg(xmm1()),
preg(xmm2()),
preg(xmm3()),
preg(xmm4()),
preg(xmm5()),
preg(xmm6()),
preg(xmm7()),
preg(xmm8()),
preg(xmm9()),
preg(xmm10()),
preg(xmm11()),
preg(xmm12()),
preg(xmm13()),
preg(xmm14()),
// N.B.: not xmm15; it is our scratch reg.
],
],
non_preferred_regs_by_class: [
// Non-preferred GPRs: callee-saved in the SysV ABI.
vec![preg(rbx()), preg(r12()), preg(r13()), preg(r14())],
// Non-preferred XMMs: none.
vec![],
],
scratch_by_class: [preg(r10()), preg(xmm15())],
fixed_stack_slots: vec![],
};
debug_assert_eq!(r15(), pinned_reg());
if !flags.enable_pinned_reg() {
env.non_preferred_regs_by_class[0].push(preg(r15()));
}
env
}
/// Give the name of a RealReg.
pub fn realreg_name(reg: RealReg) -> &'static str {
let preg = PReg::from(reg);
match preg.class() {
RegClass::Int => match preg.hw_enc() as u8 {
ENC_RAX => "%rax",
ENC_RBX => "%rbx",
ENC_RCX => "%rcx",
ENC_RDX => "%rdx",
ENC_RSI => "%rsi",
ENC_RDI => "%rdi",
ENC_RBP => "%rbp",
ENC_RSP => "%rsp",
ENC_R8 => "%r8",
ENC_R9 => "%r9",
ENC_R10 => "%r10",
ENC_R11 => "%r11",
ENC_R12 => "%r12",
ENC_R13 => "%r13",
ENC_R14 => "%r14",
ENC_R15 => "%r15",
_ => panic!("Invalid PReg: {:?}", preg),
},
RegClass::Float => match preg.hw_enc() {
0 => "%xmm0",
1 => "%xmm1",
2 => "%xmm2",
3 => "%xmm3",
4 => "%xmm4",
5 => "%xmm5",
6 => "%xmm6",
7 => "%xmm7",
8 => "%xmm8",
9 => "%xmm9",
10 => "%xmm10",
11 => "%xmm11",
12 => "%xmm12",
13 => "%xmm13",
14 => "%xmm14",
15 => "%xmm15",
_ => panic!("Invalid PReg: {:?}", preg),
},
}
}
pub fn show_reg(reg: Reg) -> String {
if let Some(rreg) = reg.to_real_reg() {
realreg_name(rreg).to_string()
} else {
format!("%{:?}", reg)
}
}
/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some
/// smaller size (4, 2 or 1 bytes).
pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
let mut s = reg.show_rru(mb_rru);
pub fn show_ireg_sized(reg: Reg, size: u8) -> String {
let mut s = show_reg(reg);
if reg.get_class() != RegClass::I64 || size == 8 {
if reg.class() != RegClass::Int || size == 8 {
// We can't do any better.
return s;
}
@@ -302,3 +313,15 @@ pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) ->
s
}
// N.B.: this is not an `impl PrettyPrint for Reg` because it is
// specific to x64; other backends have analogous functions. The
// disambiguation happens statically by virtue of higher-level,
// x64-specific, types calling the right `pretty_print_reg`. (In other
// words, we can't pretty-print a `Reg` all by itself in a build that
// may have multiple backends; but we can pretty-print one as part of
// an x64 Inst or x64 RegMemImm.)
pub fn pretty_print_reg(reg: Reg, size: u8, allocs: &mut AllocationConsumer<'_>) -> String {
let reg = allocs.next(reg);
show_ireg_sized(reg, size)
}

View File

@@ -1,8 +1,8 @@
//! Unwind information for System V ABI (x86-64).
use crate::isa::unwind::systemv::RegisterMappingError;
use crate::machinst::{Reg, RegClass};
use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
use regalloc::{Reg, RegClass};
/// Creates a new x86-64 common information entry (CIE).
pub fn create_cie() -> CommonInformationEntry {
@@ -69,14 +69,13 @@ pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
X86_64::XMM15,
];
match reg.get_class() {
RegClass::I64 => {
match reg.class() {
RegClass::Int => {
// x86 GP registers have a weird mapping to DWARF registers, so we use a
// lookup table.
Ok(X86_GP_REG_MAP[reg.get_hw_encoding() as usize])
Ok(X86_GP_REG_MAP[reg.to_real_reg().unwrap().hw_enc() as usize])
}
RegClass::V128 => Ok(X86_XMM_REG_MAP[reg.get_hw_encoding() as usize]),
_ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
RegClass::Float => Ok(X86_XMM_REG_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]),
}
}

View File

@@ -1,16 +1,15 @@
//! Unwind information for Windows x64 ABI.
use regalloc::{Reg, RegClass};
use crate::machinst::{Reg, RegClass};
pub(crate) struct RegisterMapper;
impl crate::isa::unwind::winx64::RegisterMapper<Reg> for RegisterMapper {
fn map(reg: Reg) -> crate::isa::unwind::winx64::MappedRegister {
use crate::isa::unwind::winx64::MappedRegister;
match reg.get_class() {
RegClass::I64 => MappedRegister::Int(reg.get_hw_encoding()),
RegClass::V128 => MappedRegister::Xmm(reg.get_hw_encoding()),
_ => unreachable!(),
match reg.class() {
RegClass::Int => MappedRegister::Int(reg.to_real_reg().unwrap().hw_enc()),
RegClass::Float => MappedRegister::Xmm(reg.to_real_reg().unwrap().hw_enc()),
}
}
}

View File

@@ -1432,12 +1432,12 @@
;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (trap code))
(safepoint (x64_ud2 code)))
(side_effect (x64_ud2 code)))
;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (resumable_trap code))
(safepoint (x64_ud2 code)))
(side_effect (x64_ud2 code)))
;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -20,7 +20,6 @@ use crate::settings::{Flags, TlsModel};
use alloc::boxed::Box;
use alloc::vec::Vec;
use log::trace;
use regalloc::{Reg, RegClass, Writable};
use smallvec::SmallVec;
use std::convert::TryFrom;
use target_lexicon::Triple;
@@ -1005,7 +1004,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// simply use the flags here.
let cc = CC::from_intcc(cond_code);
ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
ctx.emit(Inst::TrapIf { trap_code, cc });
} else if op == Opcode::Trapif {
let cond_code = ctx.data(insn).cond_code().unwrap();
@@ -1014,7 +1013,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let cond_code = emit_cmp(ctx, ifcmp, cond_code);
let cc = CC::from_intcc(cond_code);
ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
ctx.emit(Inst::TrapIf { trap_code, cc });
} else {
let cond_code = ctx.data(insn).fp_cond_code().unwrap();
@@ -1022,9 +1021,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap();
match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) {
FcmpCondResult::Condition(cc) => {
ctx.emit_safepoint(Inst::TrapIf { trap_code, cc })
}
FcmpCondResult::Condition(cc) => ctx.emit(Inst::TrapIf { trap_code, cc }),
FcmpCondResult::AndConditions(cc1, cc2) => {
// A bit unfortunate, but materialize the flags in their own register, and
// check against this.
@@ -1038,14 +1035,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
RegMemImm::reg(tmp.to_reg()),
tmp2,
));
ctx.emit_safepoint(Inst::TrapIf {
ctx.emit(Inst::TrapIf {
trap_code,
cc: CC::NZ,
});
}
FcmpCondResult::OrConditions(cc1, cc2) => {
ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc1 });
ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc2 });
ctx.emit(Inst::TrapIf { trap_code, cc: cc1 });
ctx.emit(Inst::TrapIf { trap_code, cc: cc2 });
}
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
};
@@ -2917,7 +2914,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let src_ty = ctx.input_ty(insn, 0);
debug_assert!(src_ty.is_vector() && src_ty.bits() == 128);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
debug_assert!(dst.to_reg().class() == RegClass::Int);
// The Intel specification allows using both 32-bit and 64-bit GPRs as destination for
// the "move mask" instructions. This is controlled by the REX.R bit: "In 64-bit mode,

View File

@@ -2,23 +2,23 @@
// Pull in the ISLE generated code.
pub(crate) mod generated_code;
use crate::machinst::{Reg, Writable};
use generated_code::MInst;
use regalloc::Writable;
// Types that the generated ISLE code uses via `use super::*`.
use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, Reg};
use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode};
use crate::{
ir::{
condcodes::{FloatCC, IntCC},
immediates::*,
types::*,
Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueLabel, ValueList,
Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueList,
},
isa::{
settings::Flags,
unwind::UnwindInst,
x64::{
inst::{args::*, regs, x64_map_regs},
inst::{args::*, regs},
settings::Flags as IsaFlags,
},
},
@@ -45,15 +45,9 @@ pub(crate) fn lower<C>(
where
C: LowerCtx<I = MInst>,
{
lower_common(
lower_ctx,
flags,
isa_flags,
outputs,
inst,
|cx, insn| generated_code::constructor_lower(cx, insn),
x64_map_regs,
)
lower_common(lower_ctx, flags, isa_flags, outputs, inst, |cx, insn| {
generated_code::constructor_lower(cx, insn)
})
}
impl<C> generated_code::Context for IsleContext<'_, C, Flags, IsaFlags, 6>
@@ -269,17 +263,7 @@ where
}
fn emit(&mut self, inst: &MInst) -> Unit {
for inst in inst.clone().mov_mitosis() {
self.emitted_insts.push((inst, false));
}
}
fn emit_safepoint(&mut self, inst: &MInst) -> Unit {
use crate::machinst::MachInst;
for inst in inst.clone().mov_mitosis() {
let is_safepoint = !inst.is_move().is_some();
self.emitted_insts.push((inst, is_safepoint));
}
self.lower_ctx.emit(inst.clone());
}
#[inline]

View File

@@ -1,4 +1,4 @@
src/clif.isle 443b34b797fc8ace
src/prelude.isle c0751050a11e2686
src/isa/x64/inst.isle 1a4206dba9fcf9d8
src/isa/x64/lower.isle 7e839e6b667bfe77
src/prelude.isle afd037c4d91c875c
src/isa/x64/inst.isle f3163ebadf210bb0
src/isa/x64/lower.isle fd63f3801d58180f

File diff suppressed because it is too large Load Diff

View File

@@ -6,8 +6,9 @@ use super::TargetIsa;
use crate::ir::{condcodes::IntCC, Function};
#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv;
use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
use crate::isa::x64::{inst::regs::create_reg_env_systemv, settings as x64_settings};
use crate::isa::Builder as IsaBuilder;
use crate::machinst::Reg;
use crate::machinst::{
compile, MachCompileResult, MachTextSectionBuilder, TextSectionBuilder, VCode,
};
@@ -15,8 +16,7 @@ use crate::result::{CodegenError, CodegenResult};
use crate::settings::{self as shared_settings, Flags};
use alloc::{boxed::Box, vec::Vec};
use core::fmt;
use regalloc::{PrettyPrint, RealRegUniverse, Reg};
use regalloc2::MachineEnv;
use target_lexicon::Triple;
mod abi;
@@ -30,27 +30,31 @@ pub(crate) struct X64Backend {
triple: Triple,
flags: Flags,
x64_flags: x64_settings::Flags,
reg_universe: RealRegUniverse,
reg_env: MachineEnv,
}
impl X64Backend {
/// Create a new X64 backend with the given (shared) flags.
fn new_with_flags(triple: Triple, flags: Flags, x64_flags: x64_settings::Flags) -> Self {
let reg_universe = create_reg_universe_systemv(&flags);
let reg_env = create_reg_env_systemv(&flags);
Self {
triple,
flags,
x64_flags,
reg_universe,
reg_env,
}
}
fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
fn compile_vcode(
&self,
func: &Function,
flags: Flags,
) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
// This performs lowering to VCode, register-allocates the code, computes
// block layout and finalizes branches. The result is ready for binary emission.
let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone());
let abi = Box::new(abi::X64ABICallee::new(&func, flags, self.isa_flags())?);
compile::compile::<Self>(&func, self, abi, &self.reg_universe, emit_info)
compile::compile::<Self>(&func, self, abi, &self.reg_env, emit_info)
}
}
@@ -61,28 +65,27 @@ impl TargetIsa for X64Backend {
want_disasm: bool,
) -> CodegenResult<MachCompileResult> {
let flags = self.flags();
let vcode = self.compile_vcode(func, flags.clone())?;
let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?;
let (buffer, bb_starts, bb_edges) = vcode.emit();
let buffer = buffer.finish();
let frame_size = vcode.frame_size();
let value_labels_ranges = vcode.value_labels_ranges();
let stackslot_offsets = vcode.stackslot_offsets().clone();
let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug);
let emit_result = vcode.emit(&regalloc_result, want_disasm, flags.machine_code_cfg_info());
let frame_size = emit_result.frame_size;
let value_labels_ranges = emit_result.value_labels_ranges;
let buffer = emit_result.buffer.finish();
let stackslot_offsets = emit_result.stackslot_offsets;
let disasm = if want_disasm {
Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
} else {
None
};
if let Some(disasm) = emit_result.disasm.as_ref() {
log::debug!("disassembly:\n{}", disasm);
}
Ok(MachCompileResult {
buffer,
frame_size,
disasm,
disasm: emit_result.disasm,
value_labels_ranges,
stackslot_offsets,
bb_starts,
bb_edges,
bb_starts: emit_result.bb_offsets,
bb_edges: emit_result.bb_edges,
})
}
@@ -319,30 +322,29 @@ mod test {
// 00000000 55 push rbp
// 00000001 4889E5 mov rbp,rsp
// 00000004 4889FE mov rsi,rdi
// 00000007 81C634120000 add esi,0x1234
// 0000000D 85F6 test esi,esi
// 0000000F 0F841B000000 jz near 0x30
// 00000015 4889F7 mov rdi,rsi
// 00000018 4889F0 mov rax,rsi
// 0000001B 81E834120000 sub eax,0x1234
// 00000021 01F8 add eax,edi
// 00000023 85F6 test esi,esi
// 00000025 0F8505000000 jnz near 0x30
// 0000002B 4889EC mov rsp,rbp
// 0000002E 5D pop rbp
// 0000002F C3 ret
// 00000030 4889F7 mov rdi,rsi <--- cold block
// 00000033 81C734120000 add edi,0x1234
// 00000039 85FF test edi,edi
// 0000003B 0F85EFFFFFFF jnz near 0x30
// 00000041 E9D2FFFFFF jmp 0x18
// 00000004 81C734120000 add edi,0x1234
// 0000000A 85FF test edi,edi
// 0000000C 0F841C000000 jz near 0x2e
// 00000012 4989F8 mov r8,rdi
// 00000015 4889F8 mov rax,rdi
// 00000018 81E834120000 sub eax,0x1234
// 0000001E 4401C0 add eax,r8d
// 00000021 85FF test edi,edi
// 00000023 0F8505000000 jnz near 0x2e
// 00000029 4889EC mov rsp,rbp
// 0000002C 5D pop rbp
// 0000002D C3 ret
// 0000002E 4989F8 mov r8,rdi
// 00000031 4181C034120000 add r8d,0x1234
// 00000038 4585C0 test r8d,r8d
// 0000003B 0F85EDFFFFFF jnz near 0x2e
// 00000041 E9CFFFFFFF jmp 0x15
let golden = vec![
85, 72, 137, 229, 72, 137, 254, 129, 198, 52, 18, 0, 0, 133, 246, 15, 132, 27, 0, 0, 0,
72, 137, 247, 72, 137, 240, 129, 232, 52, 18, 0, 0, 1, 248, 133, 246, 15, 133, 5, 0, 0,
0, 72, 137, 236, 93, 195, 72, 137, 247, 129, 199, 52, 18, 0, 0, 133, 255, 15, 133, 239,
255, 255, 255, 233, 210, 255, 255, 255,
85, 72, 137, 229, 129, 199, 52, 18, 0, 0, 133, 255, 15, 132, 28, 0, 0, 0, 73, 137, 248,
72, 137, 248, 129, 232, 52, 18, 0, 0, 68, 1, 192, 133, 255, 15, 133, 5, 0, 0, 0, 72,
137, 236, 93, 195, 73, 137, 248, 65, 129, 192, 52, 18, 0, 0, 69, 133, 192, 15, 133,
237, 255, 255, 255, 233, 207, 255, 255, 255,
];
assert_eq!(code, &golden[..]);