Merge pull request #1718 from cfallin/machinst-codebuffer

Rework of MachInst isel, branch fixups and lowering, and block ordering.
This commit is contained in:
Chris Fallin
2020-05-19 07:17:22 -07:00
committed by GitHub
32 changed files with 3465 additions and 2353 deletions

View File

@@ -227,7 +227,7 @@ impl Context {
let _tt = timing::binemit();
let mut sink = MemoryCodeSink::new(mem, relocs, traps, stackmaps);
if let Some(ref result) = &self.mach_compile_result {
result.sections.emit(&mut sink);
result.buffer.emit(&mut sink);
} else {
isa.emit_function_to_memory(&self.func, &mut sink);
}

View File

@@ -40,3 +40,24 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
let opcode = data.opcode();
trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data)
}
/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load?
pub fn has_side_effect_or_load(func: &Function, inst: Inst) -> bool {
has_side_effect(func, inst) || func.dfg[inst].opcode().can_load()
}
/// Is the given instruction a constant value (`iconst`, `fconst`, `bconst`) that can be
/// represented in 64 bits?
pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
let data = &func.dfg[inst];
if data.opcode() == Opcode::Null {
return Some(0);
}
match data {
&InstructionData::UnaryImm { imm, .. } => Some(imm.bits() as u64),
&InstructionData::UnaryIeee32 { imm, .. } => Some(imm.bits() as u64),
&InstructionData::UnaryIeee64 { imm, .. } => Some(imm.bits()),
&InstructionData::UnaryBool { imm, .. } => Some(if imm { 1 } else { 0 }),
_ => None,
}
}

View File

@@ -504,7 +504,7 @@ impl AArch64ABIBody {
rn: stack_reg(),
rm: stack_limit,
});
insts.push(Inst::CondBrLowered {
insts.push(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
// Here `Hs` == "higher or same" when interpreting the two
// operands as unsigned integers.

View File

@@ -3,14 +3,14 @@
// Some variants are never constructed, but we still want them as options in the future.
#![allow(dead_code)]
use crate::binemit::CodeOffset;
use crate::ir::Type;
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::lower::ty_bits;
use crate::machinst::MachLabel;
use regalloc::{RealRegUniverse, Reg, Writable};
use core::convert::{Into, TryFrom};
use core::convert::Into;
use std::string::String;
/// A shift operator for a register or immediate.
@@ -303,78 +303,44 @@ impl CondBrKind {
/// A branch target. Either unresolved (basic-block index) or resolved (offset
/// from end of current instruction).
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum BranchTarget {
/// An unresolved reference to a BlockIndex, as passed into
/// An unresolved reference to a Label, as passed into
/// `lower_branch_group()`.
Block(BlockIndex),
/// A resolved reference to another instruction, after
/// `Inst::with_block_offsets()`.
Label(MachLabel),
/// A fixed PC offset.
ResolvedOffset(isize),
}
impl BranchTarget {
/// Lower the branch target given offsets of each block.
pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
/// Return the target's label, if it is a label-based target.
pub fn as_label(self) -> Option<MachLabel> {
match self {
&mut BranchTarget::Block(bix) => {
let bix = usize::try_from(bix).unwrap();
assert!(bix < targets.len());
let block_offset_in_func = targets[bix];
let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
*self = BranchTarget::ResolvedOffset(branch_offset);
}
&mut BranchTarget::ResolvedOffset(..) => {}
}
}
/// Get the block index.
pub fn as_block_index(&self) -> Option<BlockIndex> {
match self {
&BranchTarget::Block(bix) => Some(bix),
BranchTarget::Label(l) => Some(l),
_ => None,
}
}
/// Get the offset as 4-byte words. Returns `0` if not
/// yet resolved (in that case, we're only computing
/// size and the offset doesn't matter).
pub fn as_offset_words(&self) -> isize {
match self {
&BranchTarget::ResolvedOffset(off) => off >> 2,
/// Return the target's offset, if specified, or zero if label-based.
pub fn as_offset19_or_zero(self) -> u32 {
let off = match self {
BranchTarget::ResolvedOffset(off) => off >> 2,
_ => 0,
}
};
assert!(off <= 0x3ffff);
assert!(off >= -0x40000);
(off as u32) & 0x7ffff
}
/// Get the offset as a 26-bit offset suitable for a 26-bit jump, or `None` if overflow.
pub fn as_off26(&self) -> Option<u32> {
let off = self.as_offset_words();
if (off < (1 << 25)) && (off >= -(1 << 25)) {
Some((off as u32) & ((1 << 26) - 1))
} else {
None
}
}
/// Get the offset as a 19-bit offset, or `None` if overflow.
pub fn as_off19(&self) -> Option<u32> {
let off = self.as_offset_words();
if (off < (1 << 18)) && (off >= -(1 << 18)) {
Some((off as u32) & ((1 << 19) - 1))
} else {
None
}
}
/// Map the block index given a transform map.
pub fn map(&mut self, block_index_map: &[BlockIndex]) {
match self {
&mut BranchTarget::Block(ref mut bix) => {
let n = block_index_map[usize::try_from(*bix).unwrap()];
*bix = n;
}
&mut BranchTarget::ResolvedOffset(_) => {}
}
/// Return the target's offset, if specified, or zero if label-based.
pub fn as_offset26_or_zero(self) -> u32 {
let off = match self {
BranchTarget::ResolvedOffset(off) => off >> 2,
_ => 0,
};
assert!(off <= 0x1ffffff);
assert!(off >= -0x2000000);
(off as u32) & 0x3ffffff
}
}
@@ -507,7 +473,7 @@ impl ShowWithRRU for Cond {
impl ShowWithRRU for BranchTarget {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&BranchTarget::Block(block) => format!("block{}", block),
&BranchTarget::Label(label) => format!("label{:?}", label.get()),
&BranchTarget::ResolvedOffset(off) => format!("{}", off),
}
}

View File

@@ -4,7 +4,7 @@ use crate::binemit::{CodeOffset, Reloc};
use crate::ir::constant::ConstantData;
use crate::ir::types::*;
use crate::ir::TrapCode;
use crate::isa::aarch64::{inst::regs::PINNED_REG, inst::*};
use crate::isa::aarch64::inst::*;
use regalloc::{Reg, RegClass, Writable};
@@ -149,6 +149,14 @@ fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
(op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
}
fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
match kind {
CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
}
}
const MOVE_WIDE_FIXED: u32 = 0x92800000;
#[repr(u32)]
@@ -340,10 +348,17 @@ pub struct EmitState {
virtual_sp_offset: i64,
}
impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
impl MachInstEmit for Inst {
type State = EmitState;
fn emit(&self, sink: &mut O, flags: &settings::Flags, state: &mut EmitState) {
fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut EmitState) {
// N.B.: we *must* not exceed the "worst-case size" used to compute
// where to insert islands, except when islands are explicitly triggered
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
// to allow disabling the check for `JTSequence`, which is always
// emitted following an `EmitIsland`.
let mut start_off = sink.cur_offset();
match self {
&Inst::AluRRR { alu_op, rd, rn, rm } => {
let top11 = match alu_op {
@@ -616,7 +631,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
ref mem,
srcloc,
} => {
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state);
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
for inst in mem_insts.into_iter() {
inst.emit(sink, flags, state);
@@ -759,7 +774,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
ref mem,
srcloc,
} => {
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state);
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
for inst in mem_insts.into_iter() {
inst.emit(sink, flags, state);
@@ -1147,10 +1162,18 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
panic!("Unsupported extend variant");
}
&Inst::Jump { ref dest } => {
// TODO: differentiate between as_off26() returning `None` for
// out-of-range vs. not-yet-finalized. The latter happens when we
// do early (fake) emission for size computation.
sink.put4(enc_jump26(0b000101, dest.as_off26().unwrap()));
let off = sink.cur_offset();
// Emit the jump itself.
sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
// After the jump has been emitted, indicate that it uses a
// label, if so, so that a fixup can occur later. This happens
// after we emit the bytes because the fixup might occur right
// away (so the bytes must actually exist now).
if let Some(l) = dest.as_label() {
sink.use_label_at_offset(off, l, LabelUse::Branch26);
let cur_off = sink.cur_offset();
sink.add_uncond_branch(off, cur_off, l);
}
}
&Inst::Ret => {
sink.put4(0xd65f03c0);
@@ -1178,51 +1201,35 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.add_call_site(loc, opcode);
}
}
&Inst::CondBr { .. } => panic!("Unlowered CondBr during binemit!"),
&Inst::CondBrLowered { target, kind } => match kind {
// TODO: handle >2^19 case by emitting a compound sequence with
// an unconditional (26-bit) branch. We need branch-relaxation
// adjustment machinery to enable this (because we don't want to
// always emit the long form).
CondBrKind::Zero(reg) => {
sink.put4(enc_cmpbr(0b1_011010_0, target.as_off19().unwrap(), reg));
}
CondBrKind::NotZero(reg) => {
sink.put4(enc_cmpbr(0b1_011010_1, target.as_off19().unwrap(), reg));
}
CondBrKind::Cond(c) => {
sink.put4(enc_cbr(
0b01010100,
target.as_off19().unwrap_or(0),
0b0,
c.bits(),
));
}
},
&Inst::CondBrLoweredCompound {
&Inst::CondBr {
taken,
not_taken,
kind,
} => {
// Conditional part first.
match kind {
CondBrKind::Zero(reg) => {
sink.put4(enc_cmpbr(0b1_011010_0, taken.as_off19().unwrap(), reg));
}
CondBrKind::NotZero(reg) => {
sink.put4(enc_cmpbr(0b1_011010_1, taken.as_off19().unwrap(), reg));
}
CondBrKind::Cond(c) => {
sink.put4(enc_cbr(
0b01010100,
taken.as_off19().unwrap_or(0),
0b0,
c.bits(),
));
}
let cond_off = sink.cur_offset();
sink.put4(enc_conditional_br(taken, kind));
if let Some(l) = taken.as_label() {
sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
let cur_off = sink.cur_offset();
let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
sink.add_cond_branch(cond_off, cur_off, l, &inverted[..]);
}
// Unconditional part.
sink.put4(enc_jump26(0b000101, not_taken.as_off26().unwrap_or(0)));
let uncond_off = sink.cur_offset();
sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
if let Some(l) = not_taken.as_label() {
sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
let cur_off = sink.cur_offset();
sink.add_uncond_branch(uncond_off, cur_off, l);
}
}
&Inst::OneWayCondBr { target, kind } => {
let off = sink.cur_offset();
sink.put4(enc_conditional_br(target, kind));
if let Some(l) = target.as_label() {
sink.use_label_at_offset(off, l, LabelUse::Branch19);
}
}
&Inst::IndirectBr { rn, .. } => {
sink.put4(enc_br(rn));
@@ -1239,8 +1246,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.add_trap(srcloc, code);
sink.put4(0xd4a00000);
}
&Inst::Adr { rd, ref label } => {
let off = memlabel_finalize(sink.cur_offset_from_start(), label);
&Inst::Adr { rd, off } => {
assert!(off > -(1 << 20));
assert!(off < (1 << 20));
sink.put4(enc_adr(off, rd));
@@ -1261,19 +1267,13 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
// This sequence is *one* instruction in the vcode, and is expanded only here at
// emission time, because we cannot allow the regalloc to insert spills/reloads in
// the middle; we depend on hardcoded PC-rel addressing below.
//
// N.B.: if PC-rel addressing on ADR below is changed, also update
// `Inst::with_block_offsets()` in aarch64/inst/mod.rs.
// Save index in a tmp (the live range of ridx only goes to start of this
// sequence; rtmp1 or rtmp2 may overwrite it).
let inst = Inst::gen_move(rtmp2, ridx, I64);
inst.emit(sink, flags, state);
// Load address of jump table
let inst = Inst::Adr {
rd: rtmp1,
label: MemLabel::PCRel(16),
};
let inst = Inst::Adr { rd: rtmp1, off: 16 };
inst.emit(sink, flags, state);
// Load value out of jump table
let inst = Inst::SLoad32 {
@@ -1303,13 +1303,21 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
};
inst.emit(sink, flags, state);
// Emit jump table (table of 32-bit offsets).
for target in targets.iter() {
let off = target.as_offset_words() * 4;
let off = i32::try_from(off).unwrap();
// cast i32 to u32 (two's-complement)
let off = off as u32;
sink.put4(off);
let jt_off = sink.cur_offset();
for &target in targets.iter() {
let word_off = sink.cur_offset();
let off_into_table = word_off - jt_off;
sink.put4(off_into_table);
sink.use_label_at_offset(
word_off,
target.as_label().unwrap(),
LabelUse::PCRel32,
);
}
// Lowering produces an EmitIsland before using a JTSequence, so we can safely
// disable the worst-case-size check in this case.
start_off = sink.cur_offset();
}
&Inst::LoadConst64 { rd, const_data } => {
let inst = Inst::ULoad64 {
@@ -1348,7 +1356,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
}
}
&Inst::LoadAddr { rd, ref mem } => {
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state);
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
for inst in mem_insts.into_iter() {
inst.emit(sink, flags, state);
}
@@ -1401,20 +1409,6 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
add.emit(sink, flags, state);
}
}
&Inst::GetPinnedReg { rd } => {
let inst = Inst::Mov {
rd,
rm: xreg(PINNED_REG),
};
inst.emit(sink, flags, state);
}
&Inst::SetPinnedReg { rm } => {
let inst = Inst::Mov {
rd: Writable::from_reg(xreg(PINNED_REG)),
rm,
};
inst.emit(sink, flags, state);
}
&Inst::VirtualSPOffsetAdj { offset } => {
debug!(
"virtual sp offset adjusted by {} -> {}",
@@ -1423,6 +1417,20 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
);
state.virtual_sp_offset += offset;
}
&Inst::EmitIsland { needed_space } => {
if sink.island_needed(needed_space + 4) {
let jump_around_label = sink.get_label();
let jmp = Inst::Jump {
dest: BranchTarget::Label(jump_around_label),
};
jmp.emit(sink, flags, state);
sink.emit_island();
sink.bind_label(jump_around_label);
}
}
}
let end_off = sink.cur_offset();
debug_assert!((end_off - start_off) <= Inst::worst_case_size());
}
}

View File

@@ -1956,7 +1956,7 @@ fn test_aarch64_binemit() {
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Zero(xreg(8)),
},
@@ -1964,7 +1964,7 @@ fn test_aarch64_binemit() {
"cbz x8, 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::NotZero(xreg(8)),
},
@@ -1972,7 +1972,7 @@ fn test_aarch64_binemit() {
"cbnz x8, 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Eq),
},
@@ -1980,7 +1980,7 @@ fn test_aarch64_binemit() {
"b.eq 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Ne),
},
@@ -1989,7 +1989,7 @@ fn test_aarch64_binemit() {
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Hs),
},
@@ -1997,7 +1997,7 @@ fn test_aarch64_binemit() {
"b.hs 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Lo),
},
@@ -2005,7 +2005,7 @@ fn test_aarch64_binemit() {
"b.lo 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Mi),
},
@@ -2013,7 +2013,7 @@ fn test_aarch64_binemit() {
"b.mi 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Pl),
},
@@ -2021,7 +2021,7 @@ fn test_aarch64_binemit() {
"b.pl 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Vs),
},
@@ -2029,7 +2029,7 @@ fn test_aarch64_binemit() {
"b.vs 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Vc),
},
@@ -2037,7 +2037,7 @@ fn test_aarch64_binemit() {
"b.vc 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Hi),
},
@@ -2045,7 +2045,7 @@ fn test_aarch64_binemit() {
"b.hi 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Ls),
},
@@ -2053,7 +2053,7 @@ fn test_aarch64_binemit() {
"b.ls 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Ge),
},
@@ -2061,7 +2061,7 @@ fn test_aarch64_binemit() {
"b.ge 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Lt),
},
@@ -2069,7 +2069,7 @@ fn test_aarch64_binemit() {
"b.lt 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Gt),
},
@@ -2077,7 +2077,7 @@ fn test_aarch64_binemit() {
"b.gt 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Le),
},
@@ -2085,7 +2085,7 @@ fn test_aarch64_binemit() {
"b.le 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Al),
},
@@ -2093,7 +2093,7 @@ fn test_aarch64_binemit() {
"b.al 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Nv),
},
@@ -2102,7 +2102,7 @@ fn test_aarch64_binemit() {
));
insns.push((
Inst::CondBrLoweredCompound {
Inst::CondBr {
taken: BranchTarget::ResolvedOffset(64),
not_taken: BranchTarget::ResolvedOffset(128),
kind: CondBrKind::Cond(Cond::Le),
@@ -2138,7 +2138,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::IndirectBr {
rn: xreg(3),
targets: vec![1, 2, 3],
targets: vec![],
},
"60001FD6",
"br x3",
@@ -2149,7 +2149,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::Adr {
rd: writable_xreg(15),
label: MemLabel::PCRel((1 << 20) - 4),
off: (1 << 20) - 4,
},
"EFFF7F10",
"adr x15, pc+1048572",
@@ -2792,19 +2792,11 @@ fn test_aarch64_binemit() {
let actual_printing = insn.show_rru(Some(&rru));
assert_eq!(expected_printing, actual_printing);
// Check the encoding is as expected.
let text_size = {
let mut code_sec = MachSectionSize::new(0);
insn.emit(&mut code_sec, &flags, &mut Default::default());
code_sec.size()
};
let mut sink = test_utils::TestCodeSink::new();
let mut sections = MachSections::new();
let code_idx = sections.add_section(0, text_size);
let code_sec = sections.get_section(code_idx);
insn.emit(code_sec, &flags, &mut Default::default());
sections.emit(&mut sink);
let mut buffer = MachBuffer::new();
insn.emit(&mut buffer, &flags, &mut Default::default());
let buffer = buffer.finish();
buffer.emit(&mut sink);
let actual_encoding = &sink.stringify();
assert_eq!(expected_encoding, actual_encoding);
}

View File

@@ -645,35 +645,37 @@ pub enum Inst {
dest: BranchTarget,
},
/// A conditional branch.
/// A conditional branch. Contains two targets; at emission time, both are emitted, but
/// the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the
/// choice of taken/not_taken (inverting the branch polarity as needed) based on the
/// fallthrough at the time of lowering.
CondBr {
taken: BranchTarget,
not_taken: BranchTarget,
kind: CondBrKind,
},
/// Lowered conditional branch: contains the original branch kind (or the
/// inverse), but only one BranchTarget is retained. The other is
/// implicitly the next instruction, given the final basic-block layout.
CondBrLowered {
/// A one-way conditional branch, invisible to the CFG processing; used *only* as part of
/// straight-line sequences in code to be emitted.
///
/// In more detail:
/// - This branch is lowered to a branch at the machine-code level, but does not end a basic
/// block, and does not create edges in the CFG seen by regalloc.
/// - Thus, it is *only* valid to use as part of a single-in, single-out sequence that is
/// lowered from a single CLIF instruction. For example, certain arithmetic operations may
/// use these branches to handle certain conditions, such as overflows, traps, etc.
///
/// See, e.g., the lowering of `trapif` (conditional trap) for an example.
OneWayCondBr {
target: BranchTarget,
kind: CondBrKind,
},
/// As for `CondBrLowered`, but represents a condbr/uncond-br sequence (two
/// actual machine instructions). Needed when the final block layout implies
/// that neither arm of a conditional branch targets the fallthrough block.
CondBrLoweredCompound {
taken: BranchTarget,
not_taken: BranchTarget,
kind: CondBrKind,
},
/// An indirect branch through a register, augmented with set of all
/// possible successors.
IndirectBr {
rn: Reg,
targets: Vec<BlockIndex>,
targets: Vec<MachLabel>,
},
/// A "break" instruction, used for e.g. traps and debug breakpoints.
@@ -685,11 +687,14 @@ pub enum Inst {
trap_info: (SourceLoc, TrapCode),
},
/// Load the address (using a PC-relative offset) of a MemLabel, using the
/// `ADR` instruction.
/// Compute the address (using a PC-relative offset) of a memory location, using the `ADR`
/// instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
/// only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
/// need full `MemLabel` support.
Adr {
rd: Writable<Reg>,
label: MemLabel,
/// Offset in range -2^20 .. 2^20.
off: i32,
},
/// Raw 32-bit word, used for inline constants and jump-table entries.
@@ -706,7 +711,7 @@ pub enum Inst {
/// for rationale).
JTSequence {
targets: Box<[BranchTarget]>,
targets_for_term: Box<[BlockIndex]>, // needed for MachTerminator.
targets_for_term: Box<[MachLabel]>, // needed for MachTerminator.
ridx: Reg,
rtmp1: Writable<Reg>,
rtmp2: Writable<Reg>,
@@ -732,21 +737,36 @@ pub enum Inst {
mem: MemArg,
},
/// Sets the value of the pinned register to the given register target.
GetPinnedReg {
rd: Writable<Reg>,
},
/// Writes the value of the given source register to the pinned register.
SetPinnedReg {
rm: Reg,
},
/// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
/// controls MemArg::NominalSPOffset args are lowered.
VirtualSPOffsetAdj {
offset: i64,
},
/// Meta-insn, no-op in generated code: emit constant/branch veneer island
/// at this point (with a guard jump around it) if less than the needed
/// space is available before the next branch deadline. See the `MachBuffer`
/// implementation in `machinst/buffer.rs` for the overall algorithm. In
/// brief, we retain a set of "pending/unresolved label references" from
/// branches as we scan forward through instructions to emit machine code;
/// if we notice we're about to go out of range on an unresolved reference,
/// we stop, emit a bunch of "veneers" (branches in a form that has a longer
/// range, e.g. a 26-bit-offset unconditional jump), and point the original
/// label references to those. This is an "island" because it comes in the
/// middle of the code.
///
/// This meta-instruction is a necessary part of the logic that determines
/// where to place islands. Ordinarily, we want to place them between basic
/// blocks, so we compute the worst-case size of each block, and emit the
/// island before starting a block if we would exceed a deadline before the
/// end of the block. However, some sequences (such as an inline jumptable)
/// are variable-length and not accounted for by this logic; so these
/// lowered sequences include an `EmitIsland` to trigger island generation
/// where necessary.
EmitIsland {
/// The needed space before the next deadline.
needed_space: CodeOffset,
},
}
fn count_zero_half_words(mut value: u64) -> usize {
@@ -1111,9 +1131,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_defs(&*defs);
collector.add_use(rn);
}
&Inst::CondBr { ref kind, .. }
| &Inst::CondBrLowered { ref kind, .. }
| &Inst::CondBrLoweredCompound { ref kind, .. } => match kind {
&Inst::CondBr { ref kind, .. } | &Inst::OneWayCondBr { ref kind, .. } => match kind {
CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
collector.add_use(*rt);
}
@@ -1142,13 +1160,8 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
&Inst::LoadAddr { rd, mem: _ } => {
collector.add_def(rd);
}
&Inst::GetPinnedReg { rd } => {
collector.add_def(rd);
}
&Inst::SetPinnedReg { rm } => {
collector.add_use(rm);
}
&Inst::VirtualSPOffsetAdj { .. } => {}
&Inst::EmitIsland { .. } => {}
}
}
@@ -1676,13 +1689,7 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
*defs = Box::new(new_defs);
map_use(mapper, rn);
}
&mut Inst::CondBr { ref mut kind, .. } => {
map_br(mapper, kind);
}
&mut Inst::CondBrLowered { ref mut kind, .. } => {
map_br(mapper, kind);
}
&mut Inst::CondBrLoweredCompound { ref mut kind, .. } => {
&mut Inst::CondBr { ref mut kind, .. } | &mut Inst::OneWayCondBr { ref mut kind, .. } => {
map_br(mapper, kind);
}
&mut Inst::IndirectBr { ref mut rn, .. } => {
@@ -1716,13 +1723,8 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
map_def(mapper, rd);
map_mem(mapper, mem);
}
&mut Inst::GetPinnedReg { ref mut rd } => {
map_def(mapper, rd);
}
&mut Inst::SetPinnedReg { ref mut rm } => {
map_use(mapper, rm);
}
&mut Inst::VirtualSPOffsetAdj { .. } => {}
&mut Inst::EmitIsland { .. } => {}
}
}
@@ -1730,6 +1732,8 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
// Instructions: misc functions and external interface
impl MachInst for Inst {
type LabelUse = LabelUse;
fn get_regs(&self, collector: &mut RegUsageCollector) {
aarch64_get_regs(self, collector)
}
@@ -1757,24 +1761,14 @@ impl MachInst for Inst {
fn is_term<'a>(&'a self) -> MachTerminator<'a> {
match self {
&Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret,
&Inst::Jump { dest } => MachTerminator::Uncond(dest.as_block_index().unwrap()),
&Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
&Inst::CondBr {
taken, not_taken, ..
} => MachTerminator::Cond(
taken.as_block_index().unwrap(),
not_taken.as_block_index().unwrap(),
),
&Inst::CondBrLowered { .. } => {
// When this is used prior to branch finalization for branches
// within an open-coded sequence, i.e. with ResolvedOffsets,
// do not consider it a terminator. From the point of view of CFG analysis,
// it is part of a black-box single-in single-out region, hence is not
// denoted a terminator.
} => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
&Inst::OneWayCondBr { .. } => {
// Explicitly invisible to CFG processing.
MachTerminator::None
}
&Inst::CondBrLoweredCompound { .. } => {
panic!("is_term() called after lowering branches");
}
&Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]),
&Inst::JTSequence {
ref targets_for_term,
@@ -1789,6 +1783,35 @@ impl MachInst for Inst {
Inst::mov(to_reg, from_reg)
}
fn gen_constant(to_reg: Writable<Reg>, value: u64, ty: Type) -> SmallVec<[Inst; 4]> {
if ty == F64 {
let mut ret = SmallVec::new();
ret.push(Inst::load_fp_constant64(to_reg, f64::from_bits(value)));
ret
} else if ty == F32 {
let mut ret = SmallVec::new();
ret.push(Inst::load_fp_constant32(
to_reg,
f32::from_bits(value as u32),
));
ret
} else {
// Must be an integer type.
debug_assert!(
ty == B1
|| ty == I8
|| ty == B8
|| ty == I16
|| ty == B16
|| ty == I32
|| ty == B32
|| ty == I64
|| ty == B64
);
Inst::load_constant(to_reg, value)
}
}
fn gen_zero_len_nop() -> Inst {
Inst::Nop0
}
@@ -1815,101 +1838,25 @@ impl MachInst for Inst {
}
}
fn gen_jump(blockindex: BlockIndex) -> Inst {
fn gen_jump(target: MachLabel) -> Inst {
Inst::Jump {
dest: BranchTarget::Block(blockindex),
dest: BranchTarget::Label(target),
}
}
fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]) {
match self {
&mut Inst::Jump { ref mut dest } => {
dest.map(block_target_map);
}
&mut Inst::CondBr {
ref mut taken,
ref mut not_taken,
..
} => {
taken.map(block_target_map);
not_taken.map(block_target_map);
}
&mut Inst::CondBrLowered { .. } => {
// See note in `is_term()`: this is used in open-coded sequences
// within blocks and should be left alone.
}
&mut Inst::CondBrLoweredCompound { .. } => {
panic!("with_block_rewrites called after branch lowering!");
}
_ => {}
}
fn reg_universe(flags: &settings::Flags) -> RealRegUniverse {
create_reg_universe(flags)
}
fn with_fallthrough_block(&mut self, fallthrough: Option<BlockIndex>) {
match self {
&mut Inst::CondBr {
taken,
not_taken,
kind,
} => {
if taken.as_block_index() == fallthrough
&& not_taken.as_block_index() == fallthrough
{
*self = Inst::Nop0;
} else if taken.as_block_index() == fallthrough {
*self = Inst::CondBrLowered {
target: not_taken,
kind: kind.invert(),
};
} else if not_taken.as_block_index() == fallthrough {
*self = Inst::CondBrLowered {
target: taken,
kind,
};
} else {
// We need a compound sequence (condbr / uncond-br).
*self = Inst::CondBrLoweredCompound {
taken,
not_taken,
kind,
};
}
}
&mut Inst::Jump { dest } => {
if dest.as_block_index() == fallthrough {
*self = Inst::Nop0;
}
}
_ => {}
}
}
fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]) {
match self {
&mut Inst::CondBrLowered { ref mut target, .. } => {
target.lower(targets, my_offset);
}
&mut Inst::CondBrLoweredCompound {
ref mut taken,
ref mut not_taken,
..
} => {
taken.lower(targets, my_offset);
not_taken.lower(targets, my_offset + 4);
}
&mut Inst::Jump { ref mut dest } => {
dest.lower(targets, my_offset);
}
&mut Inst::JTSequence {
targets: ref mut t, ..
} => {
for target in t.iter_mut() {
// offset+20: jumptable is 20 bytes into compound sequence.
target.lower(targets, my_offset + 20);
}
}
_ => {}
}
fn worst_case_size() -> CodeOffset {
// The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
// an 8-instruction sequence (saturating int-to-float conversions) with three embedded
// 64-bit f64 constants.
//
// Note that inline jump-tables handle island/pool insertion separately, so we do not need
// to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
// feasible for other reasons).
44
}
}
@@ -2550,12 +2497,12 @@ impl ShowWithRRU for Inst {
}
}
}
&Inst::CondBrLowered {
&Inst::OneWayCondBr {
ref target,
ref kind,
} => {
let target = target.show_rru(mb_rru);
match &kind {
match kind {
&CondBrKind::Zero(reg) => {
let reg = reg.show_rru(mb_rru);
format!("cbz {}, {}", reg, target)
@@ -2570,30 +2517,15 @@ impl ShowWithRRU for Inst {
}
}
}
&Inst::CondBrLoweredCompound {
ref taken,
ref not_taken,
ref kind,
} => {
let first = Inst::CondBrLowered {
target: taken.clone(),
kind: kind.clone(),
};
let second = Inst::Jump {
dest: not_taken.clone(),
};
first.show_rru(mb_rru) + " ; " + &second.show_rru(mb_rru)
}
&Inst::IndirectBr { rn, .. } => {
let rn = rn.show_rru(mb_rru);
format!("br {}", rn)
}
&Inst::Brk => "brk #0".to_string(),
&Inst::Udf { .. } => "udf".to_string(),
&Inst::Adr { rd, ref label } => {
&Inst::Adr { rd, off } => {
let rd = rd.show_rru(mb_rru);
let label = label.show_rru(mb_rru);
format!("adr {}, {}", rd, label)
format!("adr {}, pc+{}", rd, off)
}
&Inst::Word4 { data } => format!("data.i32 {}", data),
&Inst::Word8 { data } => format!("data.i64 {}", data),
@@ -2683,15 +2615,135 @@ impl ShowWithRRU for Inst {
}
ret
}
&Inst::GetPinnedReg { rd } => {
let rd = rd.show_rru(mb_rru);
format!("get_pinned_reg {}", rd)
}
&Inst::SetPinnedReg { rm } => {
let rm = rm.show_rru(mb_rru);
format!("set_pinned_reg {}", rm)
}
&Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
&Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space),
}
}
}
//=============================================================================
// Label fixups and jump veneers.
/// Different forms of label references for different instruction formats.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum LabelUse {
/// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
/// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
Branch19,
/// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
/// signed bits, in bits 25:0. Used by b, bl.
Branch26,
/// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
/// in bits 23:5.
Ldr19,
/// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
/// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
Adr21,
/// 32-bit PC relative constant offset (from address of constant itself),
/// signed. Used in jump tables.
PCRel32,
}
impl MachInstLabelUse for LabelUse {
/// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
const ALIGN: CodeOffset = 4;
/// Maximum PC-relative range (positive), inclusive.
fn max_pos_range(self) -> CodeOffset {
match self {
// 19-bit immediate, left-shifted by 2, for 21 bits of total range. Signed, so +2^20
// from zero. Likewise for two other shifted cases below.
LabelUse::Branch19 => (1 << 20) - 1,
LabelUse::Branch26 => (1 << 27) - 1,
LabelUse::Ldr19 => (1 << 20) - 1,
// Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
// range.
LabelUse::Adr21 => (1 << 20) - 1,
LabelUse::PCRel32 => 0x7fffffff,
}
}
/// Maximum PC-relative range (negative).
fn max_neg_range(self) -> CodeOffset {
// All forms are twos-complement signed offsets, so negative limit is one more than
// positive limit.
self.max_pos_range() + 1
}
/// Size of window into code needed to do the patch.
fn patch_size(self) -> CodeOffset {
// Patch is on one instruction only for all of these label reference types.
4
}
/// Perform the patch.
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
let pc_rel = (label_offset as i64) - (use_offset as i64);
debug_assert!(pc_rel <= self.max_pos_range() as i64);
debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
let pc_rel = pc_rel as u32;
let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
let mask = match self {
LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
LabelUse::Ldr19 => 0x00ffffe0, // bits 23..5 inclusive
LabelUse::Adr21 => 0x60ffffe0, // bits 30..29, 25..5 inclusive
LabelUse::PCRel32 => 0xffffffff,
};
let pc_rel_shifted = match self {
LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
_ => {
debug_assert!(pc_rel & 3 == 0);
pc_rel >> 2
}
};
let pc_rel_inserted = match self {
LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
LabelUse::Adr21 => (pc_rel_shifted & 0x7ffff) << 5 | (pc_rel_shifted & 0x180000) << 10,
LabelUse::PCRel32 => pc_rel_shifted,
};
let is_add = match self {
LabelUse::PCRel32 => true,
_ => false,
};
let insn_word = if is_add {
insn_word.wrapping_add(pc_rel_inserted)
} else {
(insn_word & !mask) | pc_rel_inserted
};
buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
}
/// Is a veneer supported for this label reference type?
fn supports_veneer(self) -> bool {
match self {
LabelUse::Branch19 => true, // veneer is a Branch26
_ => false,
}
}
/// How large is the veneer, if supported?
fn veneer_size(self) -> CodeOffset {
4
}
/// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
/// an offset and label-use for the veneer's use of the original label.
fn generate_veneer(
self,
buffer: &mut [u8],
veneer_offset: CodeOffset,
) -> (CodeOffset, LabelUse) {
match self {
LabelUse::Branch19 => {
// veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
// bother with constructing an Inst.
let insn_word = 0b000101 << 26;
buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
(veneer_offset, LabelUse::Branch26)
}
_ => panic!("Unsupported label-reference type for veneer generation!"),
}
}
}

View File

@@ -14,12 +14,14 @@ use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode, Type};
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::CodegenResult;
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::AArch64Backend;
use super::lower_inst;
use log::debug;
use regalloc::{Reg, RegClass, Writable};
//============================================================================
@@ -104,18 +106,11 @@ pub(crate) enum ResultRegImmShift {
}
//============================================================================
// Instruction input and output "slots".
// Instruction input "slots".
//
// We use these types to refer to operand numbers, and result numbers, together
// with the associated instruction, in a type-safe way.
/// Identifier for a particular output of an instruction.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) struct InsnOutput {
pub(crate) insn: IRInst,
pub(crate) output: usize,
}
/// Identifier for a particular input of an instruction.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) struct InsnInput {
@@ -123,93 +118,28 @@ pub(crate) struct InsnInput {
pub(crate) input: usize,
}
/// Producer of a value: either a previous instruction's output, or a register that will be
/// codegen'd separately.
/// Identifier for a particular output of an instruction.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum InsnInputSource {
Output(InsnOutput),
Reg(Reg),
}
impl InsnInputSource {
fn as_output(self) -> Option<InsnOutput> {
match self {
InsnInputSource::Output(o) => Some(o),
_ => None,
}
}
}
fn get_input<C: LowerCtx<I = Inst>>(ctx: &mut C, output: InsnOutput, num: usize) -> InsnInput {
assert!(num <= ctx.num_inputs(output.insn));
InsnInput {
insn: output.insn,
input: num,
}
}
/// Convert an instruction input to a producing instruction's output if possible (in same BB), or a
/// register otherwise.
fn input_source<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSource {
if let Some((input_inst, result_num)) = ctx.input_inst(input.insn, input.input) {
let out = InsnOutput {
insn: input_inst,
output: result_num,
};
InsnInputSource::Output(out)
} else {
let reg = ctx.input(input.insn, input.input);
InsnInputSource::Reg(reg)
}
pub(crate) struct InsnOutput {
pub(crate) insn: IRInst,
pub(crate) output: usize,
}
//============================================================================
// Lowering: convert instruction outputs to result types.
// Lowering: convert instruction inputs to forms that we can use.
/// Lower an instruction output to a 64-bit constant, if possible.
pub(crate) fn output_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64> {
if out.output > 0 {
None
} else {
let inst_data = ctx.data(out.insn);
if inst_data.opcode() == Opcode::Null {
Some(0)
} else {
match inst_data {
&InstructionData::UnaryImm { opcode: _, imm } => {
// Only has Into for i64; we use u64 elsewhere, so we cast.
let imm: i64 = imm.into();
Some(imm as u64)
}
&InstructionData::UnaryBool { opcode: _, imm } => Some(u64::from(imm)),
&InstructionData::UnaryIeee32 { opcode: _, imm } => Some(u64::from(imm.bits())),
&InstructionData::UnaryIeee64 { opcode: _, imm } => Some(imm.bits()),
_ => None,
}
}
}
/// Lower an instruction input to a 64-bit constant, if possible.
pub(crate) fn input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
let input = ctx.get_input(input.insn, input.input);
input.constant
}
pub(crate) fn output_to_const_f32<C: LowerCtx<I = Inst>>(
/// Lower an instruction input to a constant register-shift amount, if possible.
pub(crate) fn input_to_shiftimm<C: LowerCtx<I = Inst>>(
ctx: &mut C,
out: InsnOutput,
) -> Option<f32> {
output_to_const(ctx, out).map(|value| f32::from_bits(value as u32))
}
pub(crate) fn output_to_const_f64<C: LowerCtx<I = Inst>>(
ctx: &mut C,
out: InsnOutput,
) -> Option<f64> {
output_to_const(ctx, out).map(|value| f64::from_bits(value))
}
/// Lower an instruction output to a constant register-shift amount, if possible.
pub(crate) fn output_to_shiftimm<C: LowerCtx<I = Inst>>(
ctx: &mut C,
out: InsnOutput,
input: InsnInput,
) -> Option<ShiftOpShiftImm> {
output_to_const(ctx, out).and_then(ShiftOpShiftImm::maybe_from_shift)
input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
}
/// How to handle narrow values loaded into registers; see note on `narrow_mode`
@@ -237,9 +167,9 @@ impl NarrowValueMode {
}
}
/// Lower an instruction output to a reg.
/// Allocate a register for an instruction output and return it.
pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
ctx.output(out.insn, out.output)
ctx.get_output(out.insn, out.output)
}
/// Lower an instruction input to a reg.
@@ -252,13 +182,26 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> Reg {
debug!("input_to_reg: input {:?}", input);
let ty = ctx.input_ty(input.insn, input.input);
let from_bits = ty_bits(ty) as u8;
let in_reg = ctx.input(input.insn, input.input);
let inputs = ctx.get_input(input.insn, input.input);
let in_reg = if let Some(c) = inputs.constant {
// Generate constants fresh at each use to minimize long-range register pressure.
let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
for inst in Inst::gen_constant(to_reg, c, ty).into_iter() {
ctx.emit(inst);
}
to_reg.to_reg()
} else {
ctx.use_input_reg(inputs);
inputs.reg
};
match (narrow_mode, from_bits) {
(NarrowValueMode::None, _) => in_reg,
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@@ -269,7 +212,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
tmp.to_reg()
}
(NarrowValueMode::SignExtend32, n) if n < 32 => {
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@@ -282,18 +225,23 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
(NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
(NarrowValueMode::ZeroExtend64, n) if n < 64 => {
let tmp = ctx.tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
signed: false,
from_bits,
to_bits: 64,
});
tmp.to_reg()
if inputs.constant.is_some() {
// Constants are zero-extended to full 64-bit width on load already.
in_reg
} else {
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
signed: false,
from_bits,
to_bits: 64,
});
tmp.to_reg()
}
}
(NarrowValueMode::SignExtend64, n) if n < 64 => {
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@@ -313,8 +261,6 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
}
/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
/// This does not actually codegen the source instruction; it just uses the
/// vreg into which the source instruction will generate its value.
///
/// The `narrow_mode` flag indicates whether the consumer of this value needs
/// the high bits clear. For many operations, such as an add/sub/mul or any
@@ -330,23 +276,18 @@ fn input_to_rs<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRS {
if let InsnInputSource::Output(out) = input_source(ctx, input) {
let insn = out.insn;
assert!(out.output <= ctx.num_outputs(insn));
let inputs = ctx.get_input(input.insn, input.input);
if let Some((insn, 0)) = inputs.inst {
let op = ctx.data(insn).opcode();
if op == Opcode::Ishl {
let shiftee = get_input(ctx, out, 0);
let shift_amt = get_input(ctx, out, 1);
let shiftee = InsnInput { insn, input: 0 };
let shift_amt = InsnInput { insn, input: 1 };
// Can we get the shift amount as an immediate?
if let Some(shift_amt_out) = input_source(ctx, shift_amt).as_output() {
if let Some(shiftimm) = output_to_shiftimm(ctx, shift_amt_out) {
let reg = input_to_reg(ctx, shiftee, narrow_mode);
ctx.merged(insn);
ctx.merged(shift_amt_out.insn);
return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
}
if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
let reg = input_to_reg(ctx, shiftee, narrow_mode);
return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
}
}
}
@@ -364,11 +305,10 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRSE {
if let InsnInputSource::Output(out) = input_source(ctx, input) {
let insn = out.insn;
assert!(out.output <= ctx.num_outputs(insn));
let inputs = ctx.get_input(input.insn, input.input);
if let Some((insn, 0)) = inputs.inst {
let op = ctx.data(insn).opcode();
let out_ty = ctx.output_ty(insn, out.output);
let out_ty = ctx.output_ty(insn, 0);
let out_bits = ty_bits(out_ty);
// If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
@@ -378,7 +318,7 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
&& ((narrow_mode.is_32bit() && out_bits < 32)
|| (!narrow_mode.is_32bit() && out_bits < 64))
{
let reg = output_to_reg(ctx, out);
let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
let extendop = match (narrow_mode, out_bits) {
(NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
ExtendOp::SXTB
@@ -402,15 +342,14 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
(NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
_ => unreachable!(),
};
return ResultRSE::RegExtend(reg.to_reg(), extendop);
return ResultRSE::RegExtend(reg, extendop);
}
// Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
if op == Opcode::Uextend || op == Opcode::Sextend {
assert!(out_bits == 32 || out_bits == 64);
let sign_extend = op == Opcode::Sextend;
let extendee = get_input(ctx, out, 0);
let inner_ty = ctx.input_ty(extendee.insn, extendee.input);
let inner_ty = ctx.input_ty(insn, 0);
let inner_bits = ty_bits(inner_ty);
assert!(inner_bits < out_bits);
let extendop = match (sign_extend, inner_bits) {
@@ -424,8 +363,7 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
(false, 32) => ExtendOp::UXTW,
_ => unreachable!(),
};
let reg = input_to_reg(ctx, extendee, NarrowValueMode::None);
ctx.merged(insn);
let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
return ResultRSE::RegExtend(reg, extendop);
}
}
@@ -438,12 +376,9 @@ pub(crate) fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRSEImm12 {
if let InsnInputSource::Output(out) = input_source(ctx, input) {
if let Some(imm_value) = output_to_const(ctx, out) {
if let Some(i) = Imm12::maybe_from_u64(imm_value) {
ctx.merged(out.insn);
return ResultRSEImm12::Imm12(i);
}
if let Some(imm_value) = input_to_const(ctx, input) {
if let Some(i) = Imm12::maybe_from_u64(imm_value) {
return ResultRSEImm12::Imm12(i);
}
}
@@ -455,14 +390,11 @@ pub(crate) fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRSImmLogic {
if let InsnInputSource::Output(out) = input_source(ctx, input) {
if let Some(imm_value) = output_to_const(ctx, out) {
let ty = ctx.output_ty(out.insn, out.output);
let ty = if ty_bits(ty) < 32 { I32 } else { ty };
if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
ctx.merged(out.insn);
return ResultRSImmLogic::ImmLogic(i);
}
if let Some(imm_value) = input_to_const(ctx, input) {
let ty = ctx.input_ty(input.insn, input.input);
let ty = if ty_bits(ty) < 32 { I32 } else { ty };
if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
return ResultRSImmLogic::ImmLogic(i);
}
}
@@ -473,12 +405,9 @@ pub(crate) fn input_to_reg_immshift<C: LowerCtx<I = Inst>>(
ctx: &mut C,
input: InsnInput,
) -> ResultRegImmShift {
if let InsnInputSource::Output(out) = input_source(ctx, input) {
if let Some(imm_value) = output_to_const(ctx, out) {
if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
ctx.merged(out.insn);
return ResultRegImmShift::ImmShift(immshift);
}
if let Some(imm_value) = input_to_const(ctx, input) {
if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
return ResultRegImmShift::ImmShift(immshift);
}
}
@@ -600,7 +529,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
}
// Otherwise, generate add instructions.
let addr = ctx.tmp(RegClass::I64, I64);
let addr = ctx.alloc_tmp(RegClass::I64, I64);
// Get the const into a reg.
lower_constant_u64(ctx, addr.clone(), offset as u64);
@@ -612,7 +541,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
// In an addition, the stack register is the zero register, so divert it to another
// register just before doing the actual add.
let reg = if reg == stack_reg() {
let tmp = ctx.tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
ctx.emit(Inst::Mov {
rd: tmp,
rm: stack_reg(),
@@ -823,24 +752,29 @@ pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
}
}
/// Checks for an instance of `op` feeding the given input. Marks as merged (decrementing refcount) if so.
/// Checks for an instance of `op` feeding the given input.
pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
c: &mut C,
input: InsnInput,
op: Opcode,
) -> Option<IRInst> {
if let InsnInputSource::Output(out) = input_source(c, input) {
let data = c.data(out.insn);
let inputs = c.get_input(input.insn, input.input);
debug!(
"maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
input, inputs, op
);
if let Some((src_inst, _)) = inputs.inst {
let data = c.data(src_inst);
debug!(" -> input inst {:?}", data);
if data.opcode() == op {
c.merged(out.insn);
return Some(out.insn);
return Some(src_inst);
}
}
None
}
/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
/// Bint or a bitcast). Marks one or both as merged if so, as appropriate.
/// Bint or a bitcast).
///
/// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
/// a bit more generic.
@@ -850,21 +784,19 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
op: Opcode,
conv: Opcode,
) -> Option<IRInst> {
if let Some(ret) = maybe_input_insn(c, input, op) {
return Some(ret);
}
if let InsnInputSource::Output(out) = input_source(c, input) {
let data = c.data(out.insn);
let inputs = c.get_input(input.insn, input.input);
if let Some((src_inst, _)) = inputs.inst {
let data = c.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
if data.opcode() == conv {
let conv_insn = out.insn;
let conv_input = InsnInput {
insn: conv_insn,
input: 0,
};
if let Some(inner) = maybe_input_insn(c, conv_input, op) {
c.merged(conv_insn);
return Some(inner);
let inputs = c.get_input(src_inst, 0);
if let Some((src_inst, _)) = inputs.inst {
let data = c.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
}
}
}
@@ -876,6 +808,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
insn: IRInst,
is_signed: bool,
) {
debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn);
let ty = ctx.input_ty(insn, 0);
let bits = ty_bits(ty);
let narrow_mode = match (bits <= 32, is_signed) {
@@ -897,6 +830,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
let ty = ctx.input_ty(insn, 0);
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm);
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
let rd = writable_zero_reg();
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
@@ -934,17 +868,21 @@ pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, i
impl LowerBackend for AArch64Backend {
type MInst = Inst;
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
lower_inst::lower_insn_to_regs(ctx, ir_inst);
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
lower_inst::lower_insn_to_regs(ctx, ir_inst)
}
fn lower_branch_group<C: LowerCtx<I = Inst>>(
&self,
ctx: &mut C,
branches: &[IRInst],
targets: &[BlockIndex],
fallthrough: Option<BlockIndex>,
) {
targets: &[MachLabel],
fallthrough: Option<MachLabel>,
) -> CodegenResult<()> {
lower_inst::lower_branch(ctx, branches, targets, fallthrough)
}
fn maybe_pinned_reg(&self) -> Option<Reg> {
Some(xreg(PINNED_REG))
}
}

View File

@@ -1,11 +1,13 @@
//! Lower a single Cranelift instruction into vcode.
use crate::binemit::CodeOffset;
use crate::ir::condcodes::FloatCC;
use crate::ir::types::*;
use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode};
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::CodegenResult;
use crate::isa::aarch64::abi::*;
use crate::isa::aarch64::inst::*;
@@ -19,7 +21,10 @@ use smallvec::SmallVec;
use super::lower::*;
/// Actually codegen an instruction's results into registers.
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx: &mut C,
insn: IRInst,
) -> CodegenResult<()> {
let op = ctx.data(insn).opcode();
let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
.map(|i| InsnInput { insn, input: i })
@@ -35,17 +40,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
match op {
Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
let value = output_to_const(ctx, outputs[0]).unwrap();
let value = ctx.get_constant(insn).unwrap();
let rd = output_to_reg(ctx, outputs[0]);
lower_constant_u64(ctx, rd, value);
}
Opcode::F32const => {
let value = output_to_const_f32(ctx, outputs[0]).unwrap();
let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32);
let rd = output_to_reg(ctx, outputs[0]);
lower_constant_f32(ctx, rd, value);
}
Opcode::F64const => {
let value = output_to_const_f64(ctx, outputs[0]).unwrap();
let value = f64::from_bits(ctx.get_constant(insn).unwrap());
let rd = output_to_reg(ctx, outputs[0]);
lower_constant_f64(ctx, rd, value);
}
@@ -79,8 +84,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
} else {
VecALUOp::UQAddScalar
};
let va = ctx.tmp(RegClass::V128, I128);
let vb = ctx.tmp(RegClass::V128, I128);
let va = ctx.alloc_tmp(RegClass::V128, I128);
let vb = ctx.alloc_tmp(RegClass::V128, I128);
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
@@ -110,8 +115,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
} else {
VecALUOp::UQSubScalar
};
let va = ctx.tmp(RegClass::V128, I128);
let vb = ctx.tmp(RegClass::V128, I128);
let va = ctx.alloc_tmp(RegClass::V128, I128);
let vb = ctx.alloc_tmp(RegClass::V128, I128);
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
@@ -271,7 +276,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// Check for divide by 0.
let branch_size = 8;
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(branch_size),
kind: CondBrKind::NotZero(rm),
});
@@ -297,7 +302,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// Check for divide by 0.
let branch_size = 20;
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(branch_size),
kind: CondBrKind::Zero(rm),
});
@@ -324,7 +329,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
nzcv: NZCV::new(false, false, false, false),
cond: Cond::Eq,
});
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(12),
kind: CondBrKind::Cond(Cond::Vc),
});
@@ -337,7 +342,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// Check for divide by 0.
let branch_size = 8;
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(branch_size),
kind: CondBrKind::NotZero(rm),
});
@@ -493,7 +498,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// ignored (because of the implicit masking done by the instruction),
// so this is equivalent to negating the input.
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
let tmp = ctx.tmp(RegClass::I64, ty);
let tmp = ctx.alloc_tmp(RegClass::I64, ty);
ctx.emit(Inst::AluRRR {
alu_op,
rd: tmp,
@@ -516,7 +521,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// Really ty_bits_size - rn, but the upper bits of the result are
// ignored (because of the implicit masking done by the instruction),
// so this is equivalent to negating the input.
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Sub32,
rd: tmp,
@@ -529,7 +534,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
};
// Explicitly mask the rotation count.
let tmp_masked_rm = ctx.tmp(RegClass::I64, I32);
let tmp_masked_rm = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRImmLogic {
alu_op: ALUOp::And32,
rd: tmp_masked_rm,
@@ -538,8 +543,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
});
let tmp_masked_rm = tmp_masked_rm.to_reg();
let tmp1 = ctx.tmp(RegClass::I64, I32);
let tmp2 = ctx.tmp(RegClass::I64, I32);
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
let tmp2 = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRImm12 {
alu_op: ALUOp::Sub32,
rd: tmp1,
@@ -578,7 +583,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
}
immshift.imm &= ty_bits_size - 1;
let tmp1 = ctx.tmp(RegClass::I64, I32);
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRImmShift {
alu_op: ALUOp::Lsr32,
rd: tmp1,
@@ -683,7 +688,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// and fix the sequence below to work properly for this.
let narrow_mode = NarrowValueMode::ZeroExtend64;
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
let tmp = ctx.tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
// If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
// the rest of the code is identical to the 64-bit version.
@@ -992,7 +997,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
}
Opcode::Bitselect => {
let tmp = ctx.tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
let rd = output_to_reg(ctx, outputs[0]);
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
@@ -1211,7 +1216,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// Branch around the break instruction with inverted cond. Go straight to lowered
// one-target form; this is logically part of a single-in single-out template lowering.
let cond = cond.invert();
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(cond),
});
@@ -1301,11 +1306,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
Opcode::GetPinnedReg => {
let rd = output_to_reg(ctx, outputs[0]);
ctx.emit(Inst::GetPinnedReg { rd });
ctx.emit(Inst::mov(rd, xreg(PINNED_REG)));
}
Opcode::SetPinnedReg => {
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(Inst::SetPinnedReg { rm });
ctx.emit(Inst::mov(writable_xreg(PINNED_REG), rm));
}
Opcode::Spill
@@ -1469,8 +1475,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = output_to_reg(ctx, outputs[0]);
let tmp1 = ctx.tmp(RegClass::I64, I64);
let tmp2 = ctx.tmp(RegClass::I64, I64);
let tmp1 = ctx.alloc_tmp(RegClass::I64, I64);
let tmp2 = ctx.alloc_tmp(RegClass::I64, I64);
ctx.emit(Inst::MovFromVec64 { rd: tmp1, rn: rn });
ctx.emit(Inst::MovFromVec64 { rd: tmp2, rn: rm });
let imml = if bits == 32 {
@@ -1533,14 +1539,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
} else {
ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
}
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Ordered)),
});
let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
ctx.emit(Inst::Udf { trap_info });
let tmp = ctx.tmp(RegClass::V128, I128);
let tmp = ctx.alloc_tmp(RegClass::V128, I128);
// Check that the input is in range, with "truncate towards zero" semantics. This means
// we allow values that are slightly out of range:
@@ -1574,7 +1580,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
rn,
rm: tmp.to_reg(),
});
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
});
@@ -1587,7 +1593,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
rn,
rm: tmp.to_reg(),
});
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
});
@@ -1617,7 +1623,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
rn,
rm: tmp.to_reg(),
});
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
});
@@ -1630,7 +1636,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
rn,
rm: tmp.to_reg(),
});
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
});
@@ -1706,8 +1712,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
_ => unreachable!(),
};
let rtmp1 = ctx.tmp(RegClass::V128, in_ty);
let rtmp2 = ctx.tmp(RegClass::V128, in_ty);
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
if in_bits == 32 {
ctx.emit(Inst::LoadFpuConst32 {
@@ -1862,14 +1868,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
Opcode::AvgRound => unimplemented!(),
Opcode::TlsValue => unimplemented!(),
}
Ok(())
}
pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx: &mut C,
branches: &[IRInst],
targets: &[BlockIndex],
fallthrough: Option<BlockIndex>,
) {
targets: &[MachLabel],
fallthrough: Option<MachLabel>,
) -> CodegenResult<()> {
// A block should end with at most two branches. The first may be a
// conditional branch; a conditional branch can be followed only by an
// unconditional branch or fallthrough. Otherwise, if only one branch,
@@ -1883,18 +1891,14 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
let op0 = ctx.data(branches[0]).opcode();
let op1 = ctx.data(branches[1]).opcode();
//println!(
// "lowering two-branch group: opcodes are {:?} and {:?}",
// op0, op1
//);
assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
let taken = BranchTarget::Block(targets[0]);
let taken = BranchTarget::Label(targets[0]);
let not_taken = match op1 {
Opcode::Jump => BranchTarget::Block(targets[1]),
Opcode::Fallthrough => BranchTarget::Block(fallthrough.unwrap()),
Opcode::Jump => BranchTarget::Label(targets[1]),
Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
_ => unreachable!(), // assert above.
};
match op0 {
Opcode::Brz | Opcode::Brnz => {
let flag_input = InsnInput {
@@ -1954,6 +1958,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
Opcode::BrIcmp => {
let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
let cond = lower_condcode(condcode);
let kind = CondBrKind::Cond(cond);
let is_signed = condcode_is_signed(condcode);
let ty = ctx.input_ty(branches[0], 0);
let bits = ty_bits(ty);
@@ -1986,13 +1992,15 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
kind,
});
}
Opcode::Brif => {
let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
let cond = lower_condcode(condcode);
let kind = CondBrKind::Cond(cond);
let is_signed = condcode_is_signed(condcode);
let flag_input = InsnInput {
insn: branches[0],
@@ -2003,7 +2011,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
kind,
});
} else {
// If the ifcmp result is actually placed in a
@@ -2013,7 +2021,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
kind,
});
}
}
@@ -2021,6 +2029,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
Opcode::Brff => {
let condcode = inst_fp_condcode(ctx.data(branches[0])).unwrap();
let cond = lower_fp_condcode(condcode);
let kind = CondBrKind::Cond(cond);
let flag_input = InsnInput {
insn: branches[0],
input: 0,
@@ -2030,7 +2039,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
kind,
});
} else {
// If the ffcmp result is actually placed in a
@@ -2040,7 +2049,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
kind,
});
}
}
@@ -2057,12 +2066,15 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
// fills in `targets[0]` with our fallthrough block, so this
// is valid for both Jump and Fallthrough.
ctx.emit(Inst::Jump {
dest: BranchTarget::Block(targets[0]),
dest: BranchTarget::Label(targets[0]),
});
}
Opcode::BrTable => {
// Expand `br_table index, default, JT` to:
//
// emit_island // this forces an island at this point
// // if the jumptable would push us past
// // the deadline
// subs idx, #jt_size
// b.hs default
// adr vTmp1, PC+16
@@ -2072,6 +2084,11 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
// [jumptable offsets relative to JT base]
let jt_size = targets.len() - 1;
assert!(jt_size <= std::u32::MAX as usize);
ctx.emit(Inst::EmitIsland {
needed_space: 4 * (6 + jt_size) as CodeOffset,
});
let ridx = input_to_reg(
ctx,
InsnInput {
@@ -2081,8 +2098,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
NarrowValueMode::ZeroExtend32,
);
let rtmp1 = ctx.tmp(RegClass::I64, I32);
let rtmp2 = ctx.tmp(RegClass::I64, I32);
let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);
// Bounds-check and branch to default.
if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
@@ -2101,10 +2118,10 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
rm: rtmp1.to_reg(),
});
}
let default_target = BranchTarget::Block(targets[0]);
ctx.emit(Inst::CondBrLowered {
kind: CondBrKind::Cond(Cond::Hs), // unsigned >=
let default_target = BranchTarget::Label(targets[0]);
ctx.emit(Inst::OneWayCondBr {
target: default_target.clone(),
kind: CondBrKind::Cond(Cond::Hs), // unsigned >=
});
// Emit the compound instruction that does:
@@ -2125,9 +2142,9 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
let jt_targets: Vec<BranchTarget> = targets
.iter()
.skip(1)
.map(|bix| BranchTarget::Block(*bix))
.map(|bix| BranchTarget::Label(*bix))
.collect();
let targets_for_term: Vec<BlockIndex> = targets.to_vec();
let targets_for_term: Vec<MachLabel> = targets.to_vec();
ctx.emit(Inst::JTSequence {
ridx,
rtmp1,
@@ -2140,4 +2157,6 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
_ => panic!("Unknown branch type!"),
}
}
Ok(())
}

View File

@@ -15,7 +15,7 @@ use target_lexicon::{Aarch64Architecture, Architecture, Triple};
// New backend:
mod abi;
mod inst;
pub(crate) mod inst;
mod lower;
mod lower_inst;
@@ -59,7 +59,7 @@ impl MachBackend for AArch64Backend {
) -> CodegenResult<MachCompileResult> {
let flags = self.flags();
let vcode = self.compile_vcode(func, flags.clone())?;
let sections = vcode.emit();
let buffer = vcode.emit();
let frame_size = vcode.frame_size();
let disasm = if want_disasm {
@@ -68,8 +68,10 @@ impl MachBackend for AArch64Backend {
None
};
let buffer = buffer.finish();
Ok(MachCompileResult {
sections,
buffer,
frame_size,
disasm,
})
@@ -140,8 +142,8 @@ mod test {
Triple::from_str("aarch64").unwrap(),
settings::Flags::new(shared_flags),
);
let sections = backend.compile_function(&mut func, false).unwrap().sections;
let code = &sections.sections[0].data;
let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
let code = &buffer.data[..];
// stp x29, x30, [sp, #-16]!
// mov x29, sp
@@ -155,7 +157,7 @@ mod test {
0x01, 0x0b, 0xbf, 0x03, 0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
];
assert_eq!(code, &golden);
assert_eq!(code, &golden[..]);
}
#[test]
@@ -198,34 +200,32 @@ mod test {
let result = backend
.compile_function(&mut func, /* want_disasm = */ false)
.unwrap();
let code = &result.sections.sections[0].data;
let code = &result.buffer.data[..];
// stp x29, x30, [sp, #-16]!
// mov x29, sp
// mov x1, x0
// mov x0, #0x1234
// add w1, w1, w0
// mov w2, w1
// cbz x2, ...
// mov w2, w1
// cbz x2, ...
// sub w0, w1, w0
// mov x1, #0x1234 // #4660
// add w0, w0, w1
// mov w1, w0
// cbnz x1, 0x28
// mov x1, #0x1234 // #4660
// add w1, w0, w1
// mov w1, w1
// cbnz x1, 0x18
// mov w1, w0
// cbnz x1, 0x18
// mov x1, #0x1234 // #4660
// sub w0, w0, w1
// mov sp, x29
// ldp x29, x30, [sp], #16
// ret
// add w2, w1, w0
// mov w2, w2
// cbnz x2, ... <---- compound branch (cond / uncond)
// b ... <----
let golden = vec![
0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0xe1, 0x03, 0x00, 0xaa, 0x80, 0x46,
0x82, 0xd2, 0x21, 0x00, 0x00, 0x0b, 0xe2, 0x03, 0x01, 0x2a, 0xe2, 0x00, 0x00, 0xb4,
0xe2, 0x03, 0x01, 0x2a, 0xa2, 0x00, 0x00, 0xb5, 0x20, 0x00, 0x00, 0x4b, 0xbf, 0x03,
0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6, 0x22, 0x00, 0x00, 0x0b,
0xe2, 0x03, 0x02, 0x2a, 0xc2, 0xff, 0xff, 0xb5, 0xf7, 0xff, 0xff, 0x17,
253, 123, 191, 169, 253, 3, 0, 145, 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161,
0, 0, 181, 129, 70, 130, 210, 1, 0, 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3,
0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 191, 3, 0, 145, 253, 123,
193, 168, 192, 3, 95, 214,
];
assert_eq!(code, &golden);
assert_eq!(code, &golden[..]);
}
}

View File

@@ -84,7 +84,7 @@ mod x64;
mod arm32;
#[cfg(feature = "arm64")]
mod aarch64;
pub(crate) mod aarch64;
#[cfg(feature = "unwind")]
pub mod unwind;

View File

@@ -5,7 +5,6 @@ use std::string::{String, ToString};
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector};
use crate::binemit::CodeOffset;
use crate::machinst::*;
use super::regs::show_ireg_sized;
@@ -375,43 +374,27 @@ impl fmt::Debug for CC {
/// from end of current instruction).
#[derive(Clone, Copy, Debug)]
pub enum BranchTarget {
/// An unresolved reference to a BlockIndex, as passed into
/// `lower_branch_group()`.
Block(BlockIndex),
/// An unresolved reference to a MachLabel.
Label(MachLabel),
/// A resolved reference to another instruction, after
/// `Inst::with_block_offsets()`. This offset is in bytes.
ResolvedOffset(BlockIndex, isize),
/// A resolved reference to another instruction, in bytes.
ResolvedOffset(isize),
}
impl ShowWithRRU for BranchTarget {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
BranchTarget::Block(bix) => format!("(Block {})", bix),
BranchTarget::ResolvedOffset(bix, offs) => format!("(Block {}, offset {})", bix, offs),
BranchTarget::Label(l) => format!("{:?}", l),
BranchTarget::ResolvedOffset(offs) => format!("(offset {})", offs),
}
}
}
impl BranchTarget {
/// Lower the branch target given offsets of each block.
pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
/// Get the label.
pub fn as_label(&self) -> Option<MachLabel> {
match self {
&mut BranchTarget::Block(bix) => {
let bix = bix as usize;
assert!(bix < targets.len());
let block_offset_in_func = targets[bix];
let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
*self = BranchTarget::ResolvedOffset(bix as BlockIndex, branch_offset);
}
&mut BranchTarget::ResolvedOffset(..) => {}
}
}
/// Get the block index.
pub fn as_block_index(&self) -> Option<BlockIndex> {
match self {
&BranchTarget::Block(bix) => Some(bix),
&BranchTarget::Label(l) => Some(l),
_ => None,
}
}
@@ -421,31 +404,17 @@ impl BranchTarget {
/// byte of the target. It does not take into account the Intel-specific
/// rule that a branch offset is encoded as relative to the start of the
/// following instruction. That is a problem for the emitter to deal
/// with.
pub fn as_offset_i32(&self) -> Option<i32> {
/// with. If a label, returns zero.
pub fn as_offset32_or_zero(&self) -> i32 {
match self {
&BranchTarget::ResolvedOffset(_, off) => {
&BranchTarget::ResolvedOffset(off) => {
// Leave a bit of slack so that the emitter is guaranteed to
// be able to add the length of the jump instruction encoding
// to this value and still have a value in signed-32 range.
if off >= -0x7FFF_FF00isize && off <= 0x7FFF_FF00isize {
Some(off as i32)
} else {
None
}
assert!(off >= -0x7FFF_FF00 && off <= 0x7FFF_FF00);
off as i32
}
_ => None,
}
}
/// Map the block index given a transform map.
pub fn map(&mut self, block_index_map: &[BlockIndex]) {
match self {
&mut BranchTarget::Block(ref mut bix) => {
let n = block_index_map[*bix as usize];
*bix = n;
}
_ => panic!("BranchTarget::map() called on already-lowered BranchTarget!"),
_ => 0,
}
}
}

View File

@@ -80,8 +80,8 @@ const F_PREFIX_66: u32 = 4;
/// deleted if it is redundant (0x40). Note that for a 64-bit operation, the
/// REX prefix will normally never be redundant, since REX.W must be 1 to
/// indicate a 64-bit operation.
fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE<O: MachSectionOutput>(
sink: &mut O,
fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink: &mut MachBuffer<Inst>,
opcodes: u32,
mut numOpcodes: usize,
encG: u8,
@@ -199,8 +199,8 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE<O: MachSectionOutput>(
/// emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE, except it is for the case
/// where the E operand is a register rather than memory. Hence it is much
/// simpler.
fn emit_REX_OPCODES_MODRM_encG_encE<O: MachSectionOutput>(
sink: &mut O,
fn emit_REX_OPCODES_MODRM_encG_encE(
sink: &mut MachBuffer<Inst>,
opcodes: u32,
mut numOpcodes: usize,
encG: u8,
@@ -240,8 +240,8 @@ fn emit_REX_OPCODES_MODRM_encG_encE<O: MachSectionOutput>(
// These are merely wrappers for the above two functions that facilitate passing
// actual `Reg`s rather than their encodings.
fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE<O: MachSectionOutput>(
sink: &mut O,
fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink: &mut MachBuffer<Inst>,
opcodes: u32,
numOpcodes: usize,
regG: Reg,
@@ -253,8 +253,8 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE<O: MachSectionOutput>(
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags);
}
fn emit_REX_OPCODES_MODRM_regG_regE<O: MachSectionOutput>(
sink: &mut O,
fn emit_REX_OPCODES_MODRM_regG_regE(
sink: &mut MachBuffer<Inst>,
opcodes: u32,
numOpcodes: usize,
regG: Reg,
@@ -268,7 +268,7 @@ fn emit_REX_OPCODES_MODRM_regG_regE<O: MachSectionOutput>(
}
/// Write a suitable number of bits from an imm64 to the sink.
fn emit_simm<O: MachSectionOutput>(sink: &mut O, size: u8, simm32: u32) {
fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
match size {
8 | 4 => sink.put4(simm32),
2 => sink.put2(simm32 as u16),
@@ -329,7 +329,7 @@ fn emit_simm<O: MachSectionOutput>(sink: &mut O, size: u8, simm32: u32) {
///
/// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate. (Do we
/// care?)
pub(crate) fn emit<O: MachSectionOutput>(inst: &Inst, sink: &mut O) {
pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
match inst {
Inst::Nop { len: 0 } => {}
Inst::Alu_RMI_R {
@@ -808,55 +808,59 @@ pub(crate) fn emit<O: MachSectionOutput>(inst: &Inst, sink: &mut O) {
}
Inst::Ret {} => sink.put1(0xC3),
Inst::JmpKnown {
dest: BranchTarget::Block(..),
} => {
// Computation of block offsets/sizes.
sink.put1(0);
sink.put4(0);
}
Inst::JmpKnown {
dest: BranchTarget::ResolvedOffset(_bix, offset),
} if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => {
// And now for real
let mut offs_i32 = *offset as i32;
offs_i32 -= 5;
let offs_u32 = offs_i32 as u32;
Inst::JmpKnown { dest } => {
let disp = dest.as_offset32_or_zero() - 5;
let disp = disp as u32;
let br_start = sink.cur_offset();
sink.put1(0xE9);
sink.put4(offs_u32);
let br_disp_off = sink.cur_offset();
sink.put4(disp);
let br_end = sink.cur_offset();
if let Some(l) = dest.as_label() {
sink.use_label_at_offset(br_disp_off, l, LabelUse::Rel32);
sink.add_uncond_branch(br_start, br_end, l);
}
}
//
// ** Inst::JmpCondSymm XXXX should never happen
//
Inst::JmpCond {
cc: _,
target: BranchTarget::Block(..),
} => {
// This case occurs when we are computing block offsets / sizes,
// prior to lowering block-index targets to concrete-offset targets.
// Only the size matters, so let's emit 6 bytes, as below.
sink.put1(0);
sink.put1(0);
sink.put4(0);
}
Inst::JmpCond {
Inst::JmpCondSymm {
cc,
target: BranchTarget::ResolvedOffset(_bix, offset),
} if *offset >= -0x7FFF_FF00 && *offset <= 0x7FFF_FF00 => {
taken,
not_taken,
} => {
// Conditional part.
// This insn is 6 bytes long. Currently `offset` is relative to
// the start of this insn, but the Intel encoding requires it to
// be relative to the start of the next instruction. Hence the
// adjustment.
let mut offs_i32 = *offset as i32;
offs_i32 -= 6;
let offs_u32 = offs_i32 as u32;
let taken_disp = taken.as_offset32_or_zero() - 6;
let taken_disp = taken_disp as u32;
let cond_start = sink.cur_offset();
sink.put1(0x0F);
sink.put1(0x80 + cc.get_enc());
sink.put4(offs_u32);
let cond_disp_off = sink.cur_offset();
sink.put4(taken_disp);
let cond_end = sink.cur_offset();
if let Some(l) = taken.as_label() {
sink.use_label_at_offset(cond_disp_off, l, LabelUse::Rel32);
let inverted: [u8; 6] =
[0x0F, 0x80 + (cc.invert().get_enc()), 0xFA, 0xFF, 0xFF, 0xFF];
sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]);
}
// Unconditional part.
let nt_disp = not_taken.as_offset32_or_zero() - 5;
let nt_disp = nt_disp as u32;
let uncond_start = sink.cur_offset();
sink.put1(0xE9);
let uncond_disp_off = sink.cur_offset();
sink.put4(nt_disp);
let uncond_end = sink.cur_offset();
if let Some(l) = not_taken.as_label() {
sink.use_label_at_offset(uncond_disp_off, l, LabelUse::Rel32);
sink.add_uncond_branch(uncond_start, uncond_end, l);
}
}
//
// ** Inst::JmpCondCompound XXXX should never happen
//
Inst::JmpUnknown { target } => {
match target {
RM::R { reg } => {

View File

@@ -2180,19 +2180,11 @@ fn test_x64_emit() {
let actual_printing = insn.show_rru(Some(&rru));
assert_eq!(expected_printing, actual_printing);
// Check the encoding is as expected.
let text_size = {
let mut code_sec = MachSectionSize::new(0);
insn.emit(&mut code_sec, &flags, &mut Default::default());
code_sec.size()
};
let mut sink = test_utils::TestCodeSink::new();
let mut sections = MachSections::new();
let code_idx = sections.add_section(0, text_size);
let code_sec = sections.get_section(code_idx);
insn.emit(code_sec, &flags, &mut Default::default());
sections.emit(&mut sink);
let mut buffer = MachBuffer::new();
insn.emit(&mut buffer, &flags, &mut Default::default());
let buffer = buffer.finish();
buffer.emit(&mut sink);
let actual_encoding = &sink.stringify();
assert_eq!(expected_encoding, actual_encoding);
}

View File

@@ -4,6 +4,8 @@
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
use core::convert::TryFrom;
use smallvec::SmallVec;
use std::fmt;
use std::string::{String, ToString};
@@ -16,6 +18,7 @@ use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I6
use crate::ir::ExternalName;
use crate::ir::Type;
use crate::machinst::*;
use crate::settings::Flags;
use crate::{settings, CodegenError, CodegenResult};
pub mod args;
@@ -25,7 +28,7 @@ mod emit_tests;
pub mod regs;
use args::*;
use regs::show_ireg_sized;
use regs::{create_reg_universe_systemv, show_ireg_sized};
//=============================================================================
// Instructions (top level): definition
@@ -136,34 +139,15 @@ pub(crate) enum Inst {
JmpKnown { dest: BranchTarget },
/// jcond cond target target
// Symmetrical two-way conditional branch.
// Should never reach the emitter.
/// Symmetrical two-way conditional branch.
/// Emitted as a compound sequence; the MachBuffer will shrink it
/// as appropriate.
JmpCondSymm {
cc: CC,
taken: BranchTarget,
not_taken: BranchTarget,
},
/// Lowered conditional branch: contains the original instruction, and a
/// flag indicating whether to invert the taken-condition or not. Only one
/// BranchTarget is retained, and the other is implicitly the next
/// instruction, given the final basic-block layout.
JmpCond {
cc: CC,
//inverted: bool, is this needed?
target: BranchTarget,
},
/// As for `CondBrLowered`, but represents a condbr/uncond-br sequence (two
/// actual machine instructions). Needed when the final block layout implies
/// that neither arm of a conditional branch targets the fallthrough block.
// Should never reach the emitter
JmpCondCompound {
cc: CC,
taken: BranchTarget,
not_taken: BranchTarget,
},
/// jmpq (reg mem)
JmpUnknown { target: RM },
}
@@ -298,18 +282,6 @@ impl Inst {
}
}
pub(crate) fn jmp_cond(cc: CC, target: BranchTarget) -> Inst {
Inst::JmpCond { cc, target }
}
pub(crate) fn jmp_cond_compound(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
Inst::JmpCondCompound {
cc,
taken,
not_taken,
}
}
pub(crate) fn jmp_unknown(target: RM) -> Inst {
Inst::JmpUnknown { target }
}
@@ -485,13 +457,6 @@ impl ShowWithRRU for Inst {
not_taken.show_rru(mb_rru)
),
//
Inst::JmpCond { cc, ref target } => format!(
"{} {}",
ljustify2("j".to_string(), cc.to_string()),
target.show_rru(None)
),
//
Inst::JmpCondCompound { .. } => "**JmpCondCompound**".to_string(),
Inst::JmpUnknown { target } => format!(
"{} *{}",
ljustify("jmp".to_string()),
@@ -601,18 +566,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
taken: _,
not_taken: _,
} => {}
//
// ** JmpCond
//
// ** JmpCondCompound
//
//Inst::JmpUnknown { target } => {
// target.get_regs_as_uses(collector);
//}
Inst::Nop { .. }
| Inst::JmpCond { .. }
| Inst::JmpCondCompound { .. }
| Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
}
}
@@ -767,18 +724,10 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
taken: _,
not_taken: _,
} => {}
//
// ** JmpCond
//
// ** JmpCondCompound
//
//Inst::JmpUnknown { target } => {
// target.apply_map(mapper);
//}
Inst::Nop { .. }
| Inst::JmpCond { .. }
| Inst::JmpCondCompound { .. }
| Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
}
}
@@ -817,18 +766,12 @@ impl MachInst for Inst {
match self {
// Interesting cases.
&Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
&Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_block_index().unwrap()),
&Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
&Self::JmpCondSymm {
cc: _,
taken,
not_taken,
} => MachTerminator::Cond(
taken.as_block_index().unwrap(),
not_taken.as_block_index().unwrap(),
),
&Self::JmpCond { .. } | &Self::JmpCondCompound { .. } => {
panic!("is_term() called after lowering branches");
}
} => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
// All other cases are boring.
_ => MachTerminator::None,
}
@@ -868,87 +811,95 @@ impl MachInst for Inst {
}
}
fn gen_jump(blockindex: BlockIndex) -> Inst {
Inst::jmp_known(BranchTarget::Block(blockindex))
fn gen_jump(label: MachLabel) -> Inst {
Inst::jmp_known(BranchTarget::Label(label))
}
fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]) {
// This is identical (modulo renaming) to the arm64 version.
match self {
&mut Inst::JmpKnown { ref mut dest } => {
dest.map(block_target_map);
}
&mut Inst::JmpCondSymm {
cc: _,
ref mut taken,
ref mut not_taken,
} => {
taken.map(block_target_map);
not_taken.map(block_target_map);
}
&mut Inst::JmpCond { .. } | &mut Inst::JmpCondCompound { .. } => {
panic!("with_block_rewrites called after branch lowering!");
}
_ => {}
}
fn gen_constant(to_reg: Writable<Reg>, value: u64, _: Type) -> SmallVec<[Self; 4]> {
let mut ret = SmallVec::new();
let is64 = value > 0xffff_ffff;
ret.push(Inst::imm_r(is64, value, to_reg));
ret
}
fn with_fallthrough_block(&mut self, fallthrough: Option<BlockIndex>) {
// This is identical (modulo renaming) to the arm64 version.
match self {
&mut Inst::JmpCondSymm {
cc,
taken,
not_taken,
} => {
if taken.as_block_index() == fallthrough {
*self = Inst::jmp_cond(cc.invert(), not_taken);
} else if not_taken.as_block_index() == fallthrough {
*self = Inst::jmp_cond(cc, taken);
} else {
// We need a compound sequence (condbr / uncond-br).
*self = Inst::jmp_cond_compound(cc, taken, not_taken);
}
}
&mut Inst::JmpKnown { dest } => {
if dest.as_block_index() == fallthrough {
*self = Inst::nop(0);
}
}
_ => {}
}
fn reg_universe(flags: &Flags) -> RealRegUniverse {
create_reg_universe_systemv(flags)
}
fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]) {
// This is identical (modulo renaming) to the arm64 version.
match self {
&mut Self::JmpCond {
cc: _,
ref mut target,
} => {
target.lower(targets, my_offset);
}
&mut Self::JmpCondCompound {
cc: _,
ref mut taken,
ref mut not_taken,
..
} => {
taken.lower(targets, my_offset);
not_taken.lower(targets, my_offset);
}
&mut Self::JmpKnown { ref mut dest } => {
dest.lower(targets, my_offset);
}
_ => {}
}
fn worst_case_size() -> CodeOffset {
15
}
type LabelUse = LabelUse;
}
impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
impl MachInstEmit for Inst {
type State = ();
fn emit(&self, sink: &mut O, _flags: &settings::Flags, _: &mut Self::State) {
fn emit(&self, sink: &mut MachBuffer<Inst>, _flags: &settings::Flags, _: &mut Self::State) {
emit::emit(self, sink);
}
}
/// A label-use (internal relocation) in generated code.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum LabelUse {
/// A 32-bit offset from location of relocation itself, added to the
/// existing value at that location.
Rel32,
}
impl MachInstLabelUse for LabelUse {
const ALIGN: CodeOffset = 1;
fn max_pos_range(self) -> CodeOffset {
match self {
LabelUse::Rel32 => 0x7fff_ffff,
}
}
fn max_neg_range(self) -> CodeOffset {
match self {
LabelUse::Rel32 => 0x8000_0000,
}
}
fn patch_size(self) -> CodeOffset {
match self {
LabelUse::Rel32 => 4,
}
}
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
match self {
LabelUse::Rel32 => {
let addend = i32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
let value = i32::try_from(label_offset)
.unwrap()
.wrapping_sub(i32::try_from(use_offset).unwrap())
.wrapping_add(addend);
buffer.copy_from_slice(&value.to_le_bytes()[..]);
}
}
}
fn supports_veneer(self) -> bool {
match self {
LabelUse::Rel32 => false,
}
}
fn veneer_size(self) -> CodeOffset {
match self {
LabelUse::Rel32 => 0,
}
}
fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
match self {
LabelUse::Rel32 => {
panic!("Veneer not supported for Rel32 label-use.");
}
}
}
}

View File

@@ -12,6 +12,7 @@ use crate::ir::{InstructionData, Opcode, Type};
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::result::CodegenResult;
use crate::isa::x64::inst::args::*;
use crate::isa::x64::inst::*;
@@ -94,6 +95,16 @@ fn intCC_to_x64_CC(cc: IntCC) -> CC {
}
}
fn input_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, input: usize) -> Reg {
let inputs = ctx.get_input(iri, input);
ctx.use_input_reg(inputs);
inputs.reg
}
fn output_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, output: usize) -> Writable<Reg> {
ctx.get_output(iri, output)
}
//=============================================================================
// Top-level instruction lowering entry point, for one instruction.
@@ -114,7 +125,7 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
// Get exactly the bit pattern in 'w64' into the dest. No
// monkeying with sign extension etc.
let dstIs64 = w64 > 0xFFFF_FFFF;
let regD = ctx.output(iri, 0);
let regD = output_to_reg(ctx, iri, 0);
ctx.emit(Inst::imm_r(dstIs64, w64, regD));
} else {
unimplemented!();
@@ -122,9 +133,9 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
}
Opcode::Iadd | Opcode::Isub => {
let regD = ctx.output(iri, 0);
let regL = ctx.input(iri, 0);
let regR = ctx.input(iri, 1);
let regD = output_to_reg(ctx, iri, 0);
let regL = input_to_reg(ctx, iri, 0);
let regR = input_to_reg(ctx, iri, 1);
let is64 = int_ty_to_is64(ty.unwrap());
let how = if op == Opcode::Iadd {
RMI_R_Op::Add
@@ -139,9 +150,9 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
// TODO: implement imm shift value into insn
let tySL = ctx.input_ty(iri, 0);
let tyD = ctx.output_ty(iri, 0); // should be the same as tySL
let regSL = ctx.input(iri, 0);
let regSR = ctx.input(iri, 1);
let regD = ctx.output(iri, 0);
let regSL = input_to_reg(ctx, iri, 0);
let regSR = input_to_reg(ctx, iri, 1);
let regD = output_to_reg(ctx, iri, 0);
if tyD == tySL && (tyD == types::I32 || tyD == types::I64) {
let how = match op {
Opcode::Ishl => ShiftKind::Left,
@@ -168,8 +179,8 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
let isZX = op == Opcode::Uextend;
let tyS = ctx.input_ty(iri, 0);
let tyD = ctx.output_ty(iri, 0);
let regS = ctx.input(iri, 0);
let regD = ctx.output(iri, 0);
let regS = input_to_reg(ctx, iri, 0);
let regD = output_to_reg(ctx, iri, 0);
ctx.emit(Inst::mov_r_r(true, regS, regD));
match (tyS, tyD, isZX) {
(types::I8, types::I64, false) => {
@@ -182,7 +193,7 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
Opcode::FallthroughReturn | Opcode::Return => {
for i in 0..ctx.num_inputs(iri) {
let src_reg = ctx.input(iri, i);
let src_reg = input_to_reg(ctx, iri, i);
let retval_reg = ctx.retval(i);
ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
}
@@ -219,35 +230,6 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
panic!("ALU+imm and ALU+carry ops should not appear here!");
}
Opcode::X86Udivmodx
| Opcode::X86Sdivmodx
| Opcode::X86Umulx
| Opcode::X86Smulx
| Opcode::X86Cvtt2si
| Opcode::X86Fmin
| Opcode::X86Fmax
| Opcode::X86Push
| Opcode::X86Pop
| Opcode::X86Bsr
| Opcode::X86Bsf
| Opcode::X86Pshufd
| Opcode::X86Pshufb
| Opcode::X86Pextr
| Opcode::X86Pinsr
| Opcode::X86Insertps
| Opcode::X86Movsd
| Opcode::X86Movlhps
| Opcode::X86Psll
| Opcode::X86Psrl
| Opcode::X86Psra
| Opcode::X86Ptest
| Opcode::X86Pmaxs
| Opcode::X86Pmaxu
| Opcode::X86Pmins
| Opcode::X86Pminu => {
panic!("x86-specific opcode in supposedly arch-neutral IR!");
}
_ => unimplemented!("unimplemented lowering for opcode {:?}", op),
}
}
@@ -258,17 +240,18 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
impl LowerBackend for X64Backend {
type MInst = Inst;
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
lower_insn_to_regs(ctx, ir_inst);
Ok(())
}
fn lower_branch_group<C: LowerCtx<I = Inst>>(
&self,
ctx: &mut C,
branches: &[IRInst],
targets: &[BlockIndex],
fallthrough: Option<BlockIndex>,
) {
targets: &[MachLabel],
fallthrough: Option<MachLabel>,
) -> CodegenResult<()> {
// A block should end with at most two branches. The first may be a
// conditional branch; a conditional branch can be followed only by an
// unconditional branch or fallthrough. Otherwise, if only one branch,
@@ -290,17 +273,17 @@ impl LowerBackend for X64Backend {
);
assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
let taken = BranchTarget::Block(targets[0]);
let taken = BranchTarget::Label(targets[0]);
let not_taken = match op1 {
Opcode::Jump => BranchTarget::Block(targets[1]),
Opcode::Fallthrough => BranchTarget::Block(fallthrough.unwrap()),
Opcode::Jump => BranchTarget::Label(targets[1]),
Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
_ => unreachable!(), // assert above.
};
match op0 {
Opcode::Brz | Opcode::Brnz => {
let tyS = ctx.input_ty(branches[0], 0);
if is_int_ty(tyS) {
let rS = ctx.input(branches[0], 0);
let rS = input_to_reg(ctx, branches[0], 0);
let cc = match op0 {
Opcode::Brz => CC::Z,
Opcode::Brnz => CC::NZ,
@@ -316,8 +299,8 @@ impl LowerBackend for X64Backend {
Opcode::BrIcmp => {
let tyS = ctx.input_ty(branches[0], 0);
if is_int_ty(tyS) {
let rSL = ctx.input(branches[0], 0);
let rSR = ctx.input(branches[0], 1);
let rSL = input_to_reg(ctx, branches[0], 0);
let rSR = input_to_reg(ctx, branches[0], 1);
let cc = intCC_to_x64_CC(inst_condcode(ctx.data(branches[0])));
let sizeB = int_ty_to_sizeB(tyS);
// FIXME verify rSR vs rSL ordering
@@ -339,10 +322,10 @@ impl LowerBackend for X64Backend {
let op = ctx.data(branches[0]).opcode();
match op {
Opcode::Jump => {
ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0])));
ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
}
Opcode::Fallthrough => {
ctx.emit(Inst::jmp_known(BranchTarget::Block(targets[0])));
ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
}
Opcode::Trap => {
unimplemented = true;
@@ -354,5 +337,7 @@ impl LowerBackend for X64Backend {
if unimplemented {
unimplemented!("lower_branch_group(x64): can't handle: {:?}", branches);
}
Ok(())
}
}

View File

@@ -52,7 +52,8 @@ impl MachBackend for X64Backend {
) -> CodegenResult<MachCompileResult> {
let flags = self.flags();
let vcode = self.compile_vcode(func, flags.clone())?;
let sections = vcode.emit();
let buffer = vcode.emit();
let buffer = buffer.finish();
let frame_size = vcode.frame_size();
let disasm = if want_disasm {
@@ -62,7 +63,7 @@ impl MachBackend for X64Backend {
};
Ok(MachCompileResult {
sections,
buffer,
frame_size,
disasm,
})

View File

@@ -99,7 +99,6 @@ mod iterators;
mod legalizer;
mod licm;
mod nan_canonicalization;
mod num_uses;
mod partition_slice;
mod postopt;
mod predicates;

View File

@@ -1,49 +1,624 @@
//! Computation of basic block order in emitted code.
//!
//! This module handles the translation from CLIF BBs to VCode BBs.
//!
//! The basic idea is that we compute a sequence of "lowered blocks" that
//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
//! moves for phi-nodes on every block-to-block transfer; these blocks always
//! conceptually exist, but may be merged with an "original" CLIF block (and
//! hence not actually exist; this is equivalent to inserting the blocks only on
//! critical edges).
//!
//! In other words, starting from a CFG like this (where each "CLIF block" and
//! "(edge N->M)" is a separate basic block):
//!
//! ```plain
//!
//! CLIF block 0
//! / \
//! (edge 0->1) (edge 0->2)
//! | |
//! CLIF block 1 CLIF block 2
//! \ /
//! (edge 1->3) (edge 2->3)
//! \ /
//! CLIF block 3
//! ```
//!
//! We can produce a CFG of lowered blocks like so:
//!
//! ```plain
//! +--------------+
//! | CLIF block 0 |
//! +--------------+
//! / \
//! +--------------+ +--------------+
//! | (edge 0->1) | |(edge 0->2) |
//! | CLIF block 1 | | CLIF block 2 |
//! +--------------+ +--------------+
//! \ /
//! +-----------+ +-----------+
//! |(edge 1->3)| |(edge 2->3)|
//! +-----------+ +-----------+
//! \ /
//! +------------+
//! |CLIF block 3|
//! +------------+
//! ```
//!
//! (note that the edges into CLIF blocks 1 and 2 could be merged with those
//! blocks' original bodies, but the out-edges could not because for simplicity
//! in the successor-function definition, we only ever merge an edge onto one
//! side of an original CLIF block.)
//!
//! Each `LoweredBlock` names just an original CLIF block, an original CLIF
//! block prepended or appended with an edge block (never both, though), or just
//! an edge block.
//!
//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
//! (never actually materialized, just defined by a "successors" function), and
//! compute the reverse postorder.
//!
//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
//! example, consider any information about whether edge blocks will actually
//! have content, because this computation happens as part of lowering *before*
//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
//! particular edge. But it works relatively well and is conceptually simple.
//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
//! branch editing that in practice elides empty blocks and simplifies some of
//! the other redundancies that this scheme produces.
use crate::entity::SecondaryMap;
use crate::fx::{FxHashMap, FxHashSet};
use crate::ir::{Block, Function, Inst, Opcode};
use crate::machinst::lower::visit_block_succs;
use crate::machinst::*;
/// Simple reverse postorder-based block order emission.
///
/// TODO: use a proper algorithm, such as the bottom-up straight-line-section
/// construction algorithm.
struct BlockRPO {
visited: Vec<bool>,
postorder: Vec<BlockIndex>,
use log::debug;
use smallvec::SmallVec;
/// Mapping from CLIF BBs to VCode BBs.
#[derive(Debug)]
pub struct BlockLoweringOrder {
/// Lowered blocks, in BlockIndex order. Each block is some combination of
/// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
/// see [LoweredBlock] for details.
lowered_order: Vec<LoweredBlock>,
/// Successors for all lowered blocks, in one serialized vector. Indexed by
/// the ranges in `lowered_succ_ranges`.
lowered_succs: Vec<(Inst, LoweredBlock)>,
/// BlockIndex values for successors for all lowered blocks, in the same
/// order as `lowered_succs`.
lowered_succ_indices: Vec<(Inst, BlockIndex)>,
/// Ranges in `lowered_succs` giving the successor lists for each lowered
/// block. Indexed by lowering-order index (`BlockIndex`).
lowered_succ_ranges: Vec<(usize, usize)>,
/// Mapping from CLIF BB to BlockIndex (index in lowered order). Note that
/// some CLIF BBs may not be lowered; in particular, we skip unreachable
/// blocks.
orig_map: SecondaryMap<Block, Option<BlockIndex>>,
}
impl BlockRPO {
fn new<I: VCodeInst>(vcode: &VCode<I>) -> BlockRPO {
BlockRPO {
visited: vec![false; vcode.num_blocks()],
postorder: Vec::with_capacity(vcode.num_blocks()),
/// The origin of a block in the lowered block-order: either an original CLIF
/// block, or an inserted edge-block, or a combination of the two if an edge is
/// non-critical.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum LoweredBlock {
/// Block in original CLIF, with no merged edge-blocks.
Orig {
/// Original CLIF block.
block: Block,
},
/// Block in the original CLIF, plus edge-block to one succ (which is the
/// one successor of the original block).
OrigAndEdge {
/// The original CLIF block contained in this lowered block.
block: Block,
/// The edge (jump) instruction transitioning from this block
/// to the next, i.e., corresponding to the included edge-block. This
/// will be an instruction in `block`.
edge_inst: Inst,
/// The successor CLIF block.
succ: Block,
},
/// Block in the original CLIF, preceded by edge-block from one pred (which
/// is the one pred of the original block).
EdgeAndOrig {
/// The previous CLIF block, i.e., the edge block's predecessor.
pred: Block,
/// The edge (jump) instruction corresponding to the included
/// edge-block. This will be an instruction in `pred`.
edge_inst: Inst,
/// The original CLIF block included in this lowered block.
block: Block,
},
/// Split critical edge between two CLIF blocks. This lowered block does not
/// correspond to any original CLIF blocks; it only serves as an insertion
/// point for work to happen on the transition from `pred` to `succ`.
Edge {
/// The predecessor CLIF block.
pred: Block,
/// The edge (jump) instruction corresponding to this edge's transition.
/// This will be an instruction in `pred`.
edge_inst: Inst,
/// The successor CLIF block.
succ: Block,
},
}
impl LoweredBlock {
/// The associated original (CLIF) block included in this lowered block, if
/// any.
pub fn orig_block(self) -> Option<Block> {
match self {
LoweredBlock::Orig { block, .. }
| LoweredBlock::OrigAndEdge { block, .. }
| LoweredBlock::EdgeAndOrig { block, .. } => Some(block),
LoweredBlock::Edge { .. } => None,
}
}
fn visit<I: VCodeInst>(&mut self, vcode: &VCode<I>, block: BlockIndex) {
self.visited[block as usize] = true;
for succ in vcode.succs(block) {
if !self.visited[succ.get() as usize] {
self.visit(vcode, succ.get());
/// The associated in-edge, if any.
pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
match self {
LoweredBlock::EdgeAndOrig {
pred,
edge_inst,
block,
} => Some((pred, edge_inst, block)),
_ => None,
}
}
/// the associated out-edge, if any. Also includes edge-only blocks.
pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
match self {
LoweredBlock::OrigAndEdge {
block,
edge_inst,
succ,
} => Some((block, edge_inst, succ)),
LoweredBlock::Edge {
pred,
edge_inst,
succ,
} => Some((pred, edge_inst, succ)),
_ => None,
}
}
}
impl BlockLoweringOrder {
/// Compute and return a lowered block order for `f`.
pub fn new(f: &Function) -> BlockLoweringOrder {
debug!("BlockLoweringOrder: function body {:?}", f);
// Step 1: compute the in-edge and out-edge count of every block.
let mut block_in_count = SecondaryMap::with_default(0);
let mut block_out_count = SecondaryMap::with_default(0);
// Cache the block successors to avoid re-examining branches below.
let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new();
let mut block_succ_range = SecondaryMap::with_default((0, 0));
let mut fallthrough_return_block = None;
for block in f.layout.blocks() {
let block_succ_start = block_succs.len();
visit_block_succs(f, block, |inst, succ| {
block_out_count[block] += 1;
block_in_count[succ] += 1;
block_succs.push((inst, succ));
});
let block_succ_end = block_succs.len();
block_succ_range[block] = (block_succ_start, block_succ_end);
for inst in f.layout.block_likely_branches(block) {
if f.dfg[inst].opcode() == Opcode::Return {
// Implicit output edge for any return.
block_out_count[block] += 1;
}
if f.dfg[inst].opcode() == Opcode::FallthroughReturn {
// Fallthrough return block must come last.
debug_assert!(fallthrough_return_block == None);
fallthrough_return_block = Some(block);
}
}
}
if Some(block) != vcode.fallthrough_return_block {
self.postorder.push(block);
// Implicit input edge for entry block.
if let Some(entry) = f.layout.entry_block() {
block_in_count[entry] += 1;
}
// Here we define the implicit CLIF-plus-edges graph. There are
// conceptually two such graphs: the original, with every edge explicit,
// and the merged one, with blocks (represented by `LoweredBlock`
// values) that contain original CLIF blocks, edges, or both. This
// function returns a lowered block's successors as per the latter, with
// consideration to edge-block merging.
//
// Note that there is a property of the block-merging rules below
// that is very important to ensure we don't miss any lowered blocks:
// any block in the implicit CLIF-plus-edges graph will *only* be
// included in one block in the merged graph.
//
// This, combined with the property that every edge block is reachable
// only from one predecessor (and hence cannot be reached by a DFS
// backedge), means that it is sufficient in our DFS below to track
// visited-bits per original CLIF block only, not per edge. This greatly
// simplifies the data structures (no need to keep a sparse hash-set of
// (block, block) tuples).
let compute_lowered_succs = |ret: &mut Vec<(Inst, LoweredBlock)>, block: LoweredBlock| {
let start_idx = ret.len();
match block {
LoweredBlock::Orig { block } | LoweredBlock::EdgeAndOrig { block, .. } => {
// At an orig block; successors are always edge blocks,
// possibly with orig blocks following.
let range = block_succ_range[block];
for &(edge_inst, succ) in &block_succs[range.0..range.1] {
if block_in_count[succ] == 1 {
ret.push((
edge_inst,
LoweredBlock::EdgeAndOrig {
pred: block,
edge_inst,
block: succ,
},
));
} else {
ret.push((
edge_inst,
LoweredBlock::Edge {
pred: block,
edge_inst,
succ,
},
));
}
}
}
LoweredBlock::Edge {
succ, edge_inst, ..
}
| LoweredBlock::OrigAndEdge {
succ, edge_inst, ..
} => {
// At an edge block; successors are always orig blocks,
// possibly with edge blocks following.
if block_out_count[succ] == 1 {
let range = block_succ_range[succ];
// check if the one succ is a real CFG edge (vs.
// implicit return succ).
if range.1 - range.0 > 0 {
debug_assert!(range.1 - range.0 == 1);
let (succ_edge_inst, succ_succ) = block_succs[range.0];
ret.push((
edge_inst,
LoweredBlock::OrigAndEdge {
block: succ,
edge_inst: succ_edge_inst,
succ: succ_succ,
},
));
} else {
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
}
} else {
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
}
}
}
let end_idx = ret.len();
(start_idx, end_idx)
};
// Build the explicit LoweredBlock-to-LoweredBlock successors list.
let mut lowered_succs = vec![];
let mut lowered_succ_indices = vec![];
// Step 2: Compute RPO traversal of the implicit CLIF-plus-edge-block graph. Use an
// explicit stack so we don't overflow the real stack with a deep DFS.
#[derive(Debug)]
struct StackEntry {
this: LoweredBlock,
succs: (usize, usize), // range in lowered_succs
cur_succ: usize, // index in lowered_succs
}
let mut stack: SmallVec<[StackEntry; 16]> = SmallVec::new();
let mut visited = FxHashSet::default();
let mut postorder = vec![];
if let Some(entry) = f.layout.entry_block() {
// FIXME(cfallin): we might be able to use OrigAndEdge. Find a way
// to not special-case the entry block here.
let block = LoweredBlock::Orig { block: entry };
visited.insert(block);
let range = compute_lowered_succs(&mut lowered_succs, block);
lowered_succ_indices.resize(lowered_succs.len(), 0);
stack.push(StackEntry {
this: block,
succs: range,
cur_succ: range.1,
});
}
let mut deferred_last = None;
while !stack.is_empty() {
let stack_entry = stack.last_mut().unwrap();
let range = stack_entry.succs;
if stack_entry.cur_succ == range.0 {
let orig_block = stack_entry.this.orig_block();
if orig_block.is_some() && orig_block == fallthrough_return_block {
deferred_last = Some((stack_entry.this, range));
} else {
postorder.push((stack_entry.this, range));
}
stack.pop();
} else {
// Heuristic: chase the children in reverse. This puts the first
// successor block first in RPO, all other things being equal,
// which tends to prioritize loop backedges over out-edges,
// putting the edge-block closer to the loop body and minimizing
// live-ranges in linear instruction space.
let next = lowered_succs[stack_entry.cur_succ - 1].1;
stack_entry.cur_succ -= 1;
if visited.contains(&next) {
continue;
}
visited.insert(next);
let range = compute_lowered_succs(&mut lowered_succs, next);
lowered_succ_indices.resize(lowered_succs.len(), 0);
stack.push(StackEntry {
this: next,
succs: range,
cur_succ: range.1,
});
}
}
postorder.reverse();
let mut rpo = postorder;
if let Some(d) = deferred_last {
rpo.push(d);
}
// Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
let mut lowered_order = vec![];
let mut lowered_succ_ranges = vec![];
let mut lb_to_bindex = FxHashMap::default();
for (block, succ_range) in rpo.into_iter() {
lb_to_bindex.insert(block, lowered_order.len() as BlockIndex);
lowered_order.push(block);
lowered_succ_ranges.push(succ_range);
}
let lowered_succ_indices = lowered_succs
.iter()
.map(|&(inst, succ)| (inst, lb_to_bindex.get(&succ).cloned().unwrap()))
.collect();
let mut orig_map = SecondaryMap::with_default(None);
for (i, lb) in lowered_order.iter().enumerate() {
let i = i as BlockIndex;
if let Some(b) = lb.orig_block() {
orig_map[b] = Some(i);
}
}
let result = BlockLoweringOrder {
lowered_order,
lowered_succs,
lowered_succ_indices,
lowered_succ_ranges,
orig_map,
};
debug!("BlockLoweringOrder: {:?}", result);
result
}
fn rpo<I: VCodeInst>(self, vcode: &VCode<I>) -> Vec<BlockIndex> {
let mut rpo = self.postorder;
rpo.reverse();
if let Some(block) = vcode.fallthrough_return_block {
rpo.push(block);
}
rpo
/// Get the lowered order of blocks.
pub fn lowered_order(&self) -> &[LoweredBlock] {
&self.lowered_order[..]
}
/// Get the successors for a lowered block, by index in `lowered_order()`'s
/// returned slice. Each successsor is paired with the edge-instruction
/// (branch) corresponding to this edge.
pub fn succs(&self, block: BlockIndex) -> &[(Inst, LoweredBlock)] {
let range = self.lowered_succ_ranges[block as usize];
&self.lowered_succs[range.0..range.1]
}
/// Get the successor indices for a lowered block.
pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
let range = self.lowered_succ_ranges[block as usize];
&self.lowered_succ_indices[range.0..range.1]
}
/// Get the lowered block index containing a CLIF block, if any. (May not be
/// present if the original CLIF block was unreachable.)
pub fn lowered_block_for_bb(&self, bb: Block) -> Option<BlockIndex> {
self.orig_map[bb]
}
}
/// Compute the final block order.
pub fn compute_final_block_order<I: VCodeInst>(vcode: &VCode<I>) -> Vec<BlockIndex> {
let mut rpo = BlockRPO::new(vcode);
rpo.visit(vcode, vcode.entry());
rpo.rpo(vcode)
#[cfg(test)]
mod test {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::types::*;
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
use crate::isa::CallConv;
fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> Function {
assert!(n_blocks > 0);
let name = ExternalName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);
let blocks = (0..n_blocks)
.map(|i| {
let bb = func.dfg.make_block();
assert!(bb.as_u32() == i as u32);
bb
})
.collect::<Vec<_>>();
let arg0 = func.dfg.append_block_param(blocks[0], I32);
let mut pos = FuncCursor::new(&mut func);
let mut edge = 0;
for i in 0..n_blocks {
pos.insert_block(blocks[i]);
let mut succs = vec![];
while edge < edges.len() && edges[edge].0 == i {
succs.push(edges[edge].1);
edge += 1;
}
if succs.len() == 0 {
pos.ins().return_(&[arg0]);
} else if succs.len() == 1 {
pos.ins().jump(blocks[succs[0]], &[]);
} else if succs.len() == 2 {
pos.ins().brnz(arg0, blocks[succs[0]], &[]);
pos.ins().jump(blocks[succs[1]], &[]);
} else {
panic!("Too many successors");
}
}
func
}
#[test]
fn test_blockorder_diamond() {
let func = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
let order = BlockLoweringOrder::new(&func);
assert_eq!(order.lowered_order.len(), 6);
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
assert!(order.lowered_order[0].in_edge().is_none());
assert!(order.lowered_order[0].out_edge().is_none());
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
assert!(order.lowered_order[2].orig_block().is_none());
assert!(order.lowered_order[2].in_edge().is_none());
assert!(order.lowered_order[2].out_edge().unwrap().0.as_u32() == 1);
assert!(order.lowered_order[2].out_edge().unwrap().2.as_u32() == 3);
assert!(order.lowered_order[3].orig_block().unwrap().as_u32() == 2);
assert!(order.lowered_order[3].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[3].in_edge().unwrap().2.as_u32() == 2);
assert!(order.lowered_order[3].out_edge().is_none());
assert!(order.lowered_order[4].orig_block().is_none());
assert!(order.lowered_order[4].in_edge().is_none());
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 2);
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 3);
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 3);
assert!(order.lowered_order[5].in_edge().is_none());
assert!(order.lowered_order[5].out_edge().is_none());
}
#[test]
fn test_blockorder_critedge() {
// 0
// / \
// 1 2
// / \ \
// 3 4 |
// |\ _|____|
// | \/ |
// | /\ |
// 5 6
//
// (3 -> 5, 3 -> 6, 4 -> 6 are critical edges and must be split)
//
let func = build_test_func(
7,
&[
(0, 1),
(0, 2),
(1, 3),
(1, 4),
(2, 5),
(3, 5),
(3, 6),
(4, 6),
],
);
let order = BlockLoweringOrder::new(&func);
assert_eq!(order.lowered_order.len(), 11);
println!("ordered = {:?}", order.lowered_order);
// block 0
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
assert!(order.lowered_order[0].in_edge().is_none());
assert!(order.lowered_order[0].out_edge().is_none());
// edge 0->1 + block 1
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
assert!(order.lowered_order[1].out_edge().is_none());
// edge 1->3 + block 3
assert!(order.lowered_order[2].orig_block().unwrap().as_u32() == 3);
assert!(order.lowered_order[2].in_edge().unwrap().0.as_u32() == 1);
assert!(order.lowered_order[2].in_edge().unwrap().2.as_u32() == 3);
assert!(order.lowered_order[2].out_edge().is_none());
// edge 3->5
assert!(order.lowered_order[3].orig_block().is_none());
assert!(order.lowered_order[3].in_edge().is_none());
assert!(order.lowered_order[3].out_edge().unwrap().0.as_u32() == 3);
assert!(order.lowered_order[3].out_edge().unwrap().2.as_u32() == 5);
// edge 3->6
assert!(order.lowered_order[4].orig_block().is_none());
assert!(order.lowered_order[4].in_edge().is_none());
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 3);
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 6);
// edge 1->4 + block 4
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 4);
assert!(order.lowered_order[5].in_edge().unwrap().0.as_u32() == 1);
assert!(order.lowered_order[5].in_edge().unwrap().2.as_u32() == 4);
assert!(order.lowered_order[5].out_edge().is_none());
// edge 4->6
assert!(order.lowered_order[6].orig_block().is_none());
assert!(order.lowered_order[6].in_edge().is_none());
assert!(order.lowered_order[6].out_edge().unwrap().0.as_u32() == 4);
assert!(order.lowered_order[6].out_edge().unwrap().2.as_u32() == 6);
// block 6
assert!(order.lowered_order[7].orig_block().unwrap().as_u32() == 6);
assert!(order.lowered_order[7].in_edge().is_none());
assert!(order.lowered_order[7].out_edge().is_none());
// edge 0->2 + block 2
assert!(order.lowered_order[8].orig_block().unwrap().as_u32() == 2);
assert!(order.lowered_order[8].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[8].in_edge().unwrap().2.as_u32() == 2);
assert!(order.lowered_order[8].out_edge().is_none());
// edge 2->5
assert!(order.lowered_order[9].orig_block().is_none());
assert!(order.lowered_order[9].in_edge().is_none());
assert!(order.lowered_order[9].out_edge().unwrap().0.as_u32() == 2);
assert!(order.lowered_order[9].out_edge().unwrap().2.as_u32() == 5);
// block 5
assert!(order.lowered_order[10].orig_block().unwrap().as_u32() == 5);
assert!(order.lowered_order[10].in_edge().is_none());
assert!(order.lowered_order[10].out_edge().is_none());
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -18,8 +18,12 @@ pub fn compile<B: LowerBackend + MachBackend>(
where
B::MInst: ShowWithRRU,
{
// This lowers the CL IR.
let mut vcode = Lower::new(f, abi)?.lower(b)?;
// Compute lowered block order.
let block_order = BlockLoweringOrder::new(f);
// Build the lowering context.
let lower = Lower::new(f, abi, block_order)?;
// Lower the IR.
let mut vcode = lower.lower(b)?;
debug!(
"vcode from lowering: \n{}",
@@ -65,11 +69,6 @@ where
// all at once. This also inserts prologues/epilogues.
vcode.replace_insns_from_regalloc(result);
vcode.remove_redundant_branches();
// Do final passes over code to finalize branches.
vcode.finalize_branches();
debug!(
"vcode after regalloc: final version:\n{}",
vcode.show_rru(Some(b.reg_universe()))

File diff suppressed because it is too large Load Diff

View File

@@ -109,6 +109,7 @@ use regalloc::RegUsageCollector;
use regalloc::{
RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
};
use smallvec::SmallVec;
use std::string::String;
use target_lexicon::Triple;
@@ -124,8 +125,8 @@ pub mod abi;
pub use abi::*;
pub mod pretty_print;
pub use pretty_print::*;
pub mod sections;
pub use sections::*;
pub mod buffer;
pub use buffer::*;
pub mod adapter;
pub use adapter::*;
@@ -152,6 +153,9 @@ pub trait MachInst: Clone + Debug {
/// Generate a move.
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
/// Generate a constant into a reg.
fn gen_constant(to_reg: Writable<Reg>, value: u64, ty: Type) -> SmallVec<[Self; 4]>;
/// Generate a zero-length no-op.
fn gen_zero_len_nop() -> Self;
@@ -166,7 +170,7 @@ pub trait MachInst: Clone + Debug {
/// Generate a jump to another target. Used during lowering of
/// control flow.
fn gen_jump(target: BlockIndex) -> Self;
fn gen_jump(target: MachLabel) -> Self;
/// Generate a NOP. The `preferred_size` parameter allows the caller to
/// request a NOP of that size, or as close to it as possible. The machine
@@ -175,22 +179,62 @@ pub trait MachInst: Clone + Debug {
/// the instruction must have a nonzero size.
fn gen_nop(preferred_size: usize) -> Self;
/// Rewrite block targets using the block-target map.
fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]);
/// Finalize branches once the block order (fallthrough) is known.
fn with_fallthrough_block(&mut self, fallthrough_block: Option<BlockIndex>);
/// Update instruction once block offsets are known. These offsets are
/// relative to the beginning of the function. `targets` is indexed by
/// BlockIndex.
fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]);
/// Get the register universe for this backend.
fn reg_universe(flags: &Flags) -> RealRegUniverse;
/// Align a basic block offset (from start of function). By default, no
/// alignment occurs.
fn align_basic_block(offset: CodeOffset) -> CodeOffset {
offset
}
/// What is the worst-case instruction size emitted by this instruction type?
fn worst_case_size() -> CodeOffset;
/// A label-use kind: a type that describes the types of label references that
/// can occur in an instruction.
type LabelUse: MachInstLabelUse;
}
/// A descriptor of a label reference (use) in an instruction set.
pub trait MachInstLabelUse: Clone + Copy + Debug + Eq {
/// Required alignment for any veneer. Usually the required instruction
/// alignment (e.g., 4 for a RISC with 32-bit instructions, or 1 for x86).
const ALIGN: CodeOffset;
/// What is the maximum PC-relative range (positive)? E.g., if `1024`, a
/// label-reference fixup at offset `x` is valid if the label resolves to `x
/// + 1024`.
fn max_pos_range(self) -> CodeOffset;
/// What is the maximum PC-relative range (negative)? This is the absolute
/// value; i.e., if `1024`, then a label-reference fixup at offset `x` is
/// valid if the label resolves to `x - 1024`.
fn max_neg_range(self) -> CodeOffset;
/// What is the size of code-buffer slice this label-use needs to patch in
/// the label's value?
fn patch_size(self) -> CodeOffset;
/// Perform a code-patch, given the offset into the buffer of this label use
/// and the offset into the buffer of the label's definition.
/// It is guaranteed that, given `delta = offset - label_offset`, we will
/// have `offset >= -self.max_neg_range()` and `offset <=
/// self.max_pos_range()`.
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset);
/// Can the label-use be patched to a veneer that supports a longer range?
/// Usually valid for jumps (a short-range jump can jump to a longer-range
/// jump), but not for e.g. constant pool references, because the constant
/// load would require different code (one more level of indirection).
fn supports_veneer(self) -> bool;
/// How many bytes are needed for a veneer?
fn veneer_size(self) -> CodeOffset;
/// Generate a veneer. The given code-buffer slice is `self.veneer_size()`
/// bytes long at offset `veneer_offset` in the buffer. The original
/// label-use will be patched to refer to this veneer's offset. A new
/// (offset, LabelUse) is returned that allows the veneer to use the actual
/// label. For veneers to work properly, it is expected that the new veneer
/// has a larger range; on most platforms this probably means either a
/// "long-range jump" (e.g., on ARM, the 26-bit form), or if already at that
/// stage, a jump that supports a full 32-bit range, for example.
fn generate_veneer(self, buffer: &mut [u8], veneer_offset: CodeOffset) -> (CodeOffset, Self);
}
/// Describes a block terminator (not call) in the vcode, when its branches
@@ -202,26 +246,26 @@ pub enum MachTerminator<'a> {
/// A return instruction.
Ret,
/// An unconditional branch to another block.
Uncond(BlockIndex),
Uncond(MachLabel),
/// A conditional branch to one of two other blocks.
Cond(BlockIndex, BlockIndex),
Cond(MachLabel, MachLabel),
/// An indirect branch with known possible targets.
Indirect(&'a [BlockIndex]),
Indirect(&'a [MachLabel]),
}
/// A trait describing the ability to encode a MachInst into binary machine code.
pub trait MachInstEmit<O: MachSectionOutput> {
pub trait MachInstEmit: MachInst {
/// Persistent state carried across `emit` invocations.
type State: Default + Clone + Debug;
/// Emit the instruction.
fn emit(&self, code: &mut O, flags: &Flags, state: &mut Self::State);
fn emit(&self, code: &mut MachBuffer<Self>, flags: &Flags, state: &mut Self::State);
}
/// The result of a `MachBackend::compile_function()` call. Contains machine
/// code (as bytes) and a disassembly, if requested.
pub struct MachCompileResult {
/// Machine code.
pub sections: MachSections,
pub buffer: MachBufferFinalized,
/// Size of stack frame, in bytes.
pub frame_size: u32,
/// Disassembly, if requested.
@@ -231,7 +275,7 @@ pub struct MachCompileResult {
impl MachCompileResult {
/// Get a `CodeInfo` describing section sizes from this compilation result.
pub fn code_info(&self) -> CodeInfo {
let code_size = self.sections.total_size();
let code_size = self.buffer.total_size();
CodeInfo {
code_size,
jumptables_size: 0,

View File

@@ -1,460 +0,0 @@
//! In-memory representation of compiled machine code, in multiple sections
//! (text, constant pool / rodata, etc). Emission occurs into multiple sections
//! simultaneously, so we buffer the result in memory and hand off to the
//! caller at the end of compilation.
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
use alloc::vec::Vec;
/// A collection of sections with defined start-offsets.
pub struct MachSections {
/// Sections, in offset order.
pub sections: Vec<MachSection>,
}
impl MachSections {
/// New, empty set of sections.
pub fn new() -> MachSections {
MachSections { sections: vec![] }
}
/// Add a section with a known offset and size. Returns the index.
pub fn add_section(&mut self, start: CodeOffset, length: CodeOffset) -> usize {
let idx = self.sections.len();
self.sections.push(MachSection::new(start, length));
idx
}
/// Mutably borrow the given section by index.
pub fn get_section<'a>(&'a mut self, idx: usize) -> &'a mut MachSection {
&mut self.sections[idx]
}
/// Get mutable borrows of two sections simultaneously. Used during
/// instruction emission to provide references to the .text and .rodata
/// (constant pool) sections.
pub fn two_sections<'a>(
&'a mut self,
idx1: usize,
idx2: usize,
) -> (&'a mut MachSection, &'a mut MachSection) {
assert!(idx1 < idx2);
assert!(idx1 < self.sections.len());
assert!(idx2 < self.sections.len());
let (first, rest) = self.sections.split_at_mut(idx2);
(&mut first[idx1], &mut rest[0])
}
/// Emit this set of sections to a set of sinks for the code,
/// relocations, traps, and stackmap.
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
// N.B.: we emit every section into the .text section as far as
// the `CodeSink` is concerned; we do not bother to segregate
// the contents into the actual program text, the jumptable and the
// rodata (constant pool). This allows us to generate code assuming
// that these will not be relocated relative to each other, and avoids
// having to designate each section as belonging in one of the three
// fixed categories defined by `CodeSink`. If this becomes a problem
// later (e.g. because of memory permissions or similar), we can
// add this designation and segregate the output; take care, however,
// to add the appropriate relocations in this case.
for section in &self.sections {
if section.data.len() > 0 {
while sink.offset() < section.start_offset {
sink.put1(0);
}
section.emit(sink);
}
}
sink.begin_jumptables();
sink.begin_rodata();
sink.end_codegen();
}
/// Get a list of source location mapping tuples in sorted-by-start-offset order.
pub fn get_srclocs_sorted<'a>(&'a self) -> MachSectionsSrcLocs<'a> {
MachSectionsSrcLocs::new(&self.sections)
}
/// Get the total required size for these sections.
pub fn total_size(&self) -> CodeOffset {
if self.sections.len() == 0 {
0
} else {
// Find the last non-empty section.
self.sections
.iter()
.rev()
.find(|s| s.data.len() > 0)
.map(|s| s.cur_offset_from_start())
.unwrap_or(0)
}
}
}
/// An iterator over the srclocs in each section.
/// Returns MachSrcLocs in an order sorted by start location.
pub struct MachSectionsSrcLocs<'a> {
sections: &'a [MachSection],
cur_section: usize,
cur_srcloc: usize,
// For validation:
last_offset: CodeOffset,
}
impl<'a> MachSectionsSrcLocs<'a> {
fn new(sections: &'a [MachSection]) -> MachSectionsSrcLocs<'a> {
MachSectionsSrcLocs {
sections,
cur_section: 0,
cur_srcloc: 0,
last_offset: 0,
}
}
}
impl<'a> Iterator for MachSectionsSrcLocs<'a> {
type Item = &'a MachSrcLoc;
fn next(&mut self) -> Option<&'a MachSrcLoc> {
// We simply iterate through sections and srcloc records in order. This produces a
// sorted order naturally because sections are in starting-offset-order, and srclocs
// are produced as a section is emitted into, so are in order as well.
// If we're out of sections, we're done.
if self.cur_section >= self.sections.len() {
return None;
}
// Otherwise, make sure we have a srcloc in the current section left to return, and
// advance to the next section if not. Done if we run out of sections.
while self.cur_srcloc >= self.sections[self.cur_section].srclocs.len() {
self.cur_srcloc = 0;
self.cur_section += 1;
if self.cur_section >= self.sections.len() {
return None;
}
}
let loc = &self.sections[self.cur_section].srclocs[self.cur_srcloc];
self.cur_srcloc += 1;
debug_assert!(loc.start >= self.last_offset);
self.last_offset = loc.start;
Some(loc)
}
}
/// An abstraction over MachSection and MachSectionSize: some
/// receiver of section data.
pub trait MachSectionOutput {
/// Get the current offset from the start of all sections.
fn cur_offset_from_start(&self) -> CodeOffset;
/// Get the start offset of this section.
fn start_offset(&self) -> CodeOffset;
/// Add 1 byte to the section.
fn put1(&mut self, _: u8);
/// Add 2 bytes to the section.
fn put2(&mut self, value: u16) {
let [b0, b1] = value.to_le_bytes();
self.put1(b0);
self.put1(b1);
}
/// Add 4 bytes to the section.
fn put4(&mut self, value: u32) {
let [b0, b1, b2, b3] = value.to_le_bytes();
self.put1(b0);
self.put1(b1);
self.put1(b2);
self.put1(b3);
}
/// Add 8 bytes to the section.
fn put8(&mut self, value: u64) {
let [b0, b1, b2, b3, b4, b5, b6, b7] = value.to_le_bytes();
self.put1(b0);
self.put1(b1);
self.put1(b2);
self.put1(b3);
self.put1(b4);
self.put1(b5);
self.put1(b6);
self.put1(b7);
}
/// Add a slice of bytes to the section.
fn put_data(&mut self, data: &[u8]);
/// Add a relocation at the current offset.
fn add_reloc(&mut self, loc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend);
/// Add a trap record at the current offset.
fn add_trap(&mut self, loc: SourceLoc, code: TrapCode);
/// Add a call return address record at the current offset.
fn add_call_site(&mut self, loc: SourceLoc, opcode: Opcode);
/// Start the output for the given source-location at the current offset.
fn start_srcloc(&mut self, loc: SourceLoc);
/// End the output for the previously-given source-location at the current offset.
fn end_srcloc(&mut self);
/// Align up to the given alignment.
fn align_to(&mut self, align_to: CodeOffset) {
assert!(align_to.is_power_of_two());
while self.cur_offset_from_start() & (align_to - 1) != 0 {
self.put1(0);
}
}
}
/// A section of output to be emitted to a CodeSink / RelocSink in bulk.
/// Multiple sections may be created with known start offsets in advance; the
/// usual use-case is to create the .text (code) and .rodata (constant pool) at
/// once, after computing the length of the code, so that constant references
/// can use known offsets as instructions are emitted.
pub struct MachSection {
/// The starting offset of this section.
pub start_offset: CodeOffset,
/// The limit of this section, defined by the start of the next section.
pub length_limit: CodeOffset,
/// The section contents, as raw bytes.
pub data: Vec<u8>,
/// Any relocations referring to this section.
pub relocs: Vec<MachReloc>,
/// Any trap records referring to this section.
pub traps: Vec<MachTrap>,
/// Any call site records referring to this section.
pub call_sites: Vec<MachCallSite>,
/// Any source location mappings referring to this section.
pub srclocs: Vec<MachSrcLoc>,
/// The current source location in progress (after `start_srcloc()` and before `end_srcloc()`).
/// This is a (start_offset, src_loc) tuple.
pub cur_srcloc: Option<(CodeOffset, SourceLoc)>,
}
impl MachSection {
/// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`.
pub fn new(start_offset: CodeOffset, length_limit: CodeOffset) -> MachSection {
MachSection {
start_offset,
length_limit,
data: vec![],
relocs: vec![],
traps: vec![],
call_sites: vec![],
srclocs: vec![],
cur_srcloc: None,
}
}
/// Emit this section to the CodeSink and other associated sinks. The
/// current offset of the CodeSink must match the starting offset of this
/// section.
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
assert!(sink.offset() == self.start_offset);
let mut next_reloc = 0;
let mut next_trap = 0;
let mut next_call_site = 0;
for (idx, byte) in self.data.iter().enumerate() {
if next_reloc < self.relocs.len() {
let reloc = &self.relocs[next_reloc];
if reloc.offset == idx as CodeOffset {
sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend);
next_reloc += 1;
}
}
if next_trap < self.traps.len() {
let trap = &self.traps[next_trap];
if trap.offset == idx as CodeOffset {
sink.trap(trap.code, trap.srcloc);
next_trap += 1;
}
}
if next_call_site < self.call_sites.len() {
let call_site = &self.call_sites[next_call_site];
if call_site.ret_addr == idx as CodeOffset {
sink.add_call_site(call_site.opcode, call_site.srcloc);
next_call_site += 1;
}
}
sink.put1(*byte);
}
}
}
impl MachSectionOutput for MachSection {
fn cur_offset_from_start(&self) -> CodeOffset {
self.start_offset + self.data.len() as CodeOffset
}
fn start_offset(&self) -> CodeOffset {
self.start_offset
}
fn put1(&mut self, value: u8) {
assert!(((self.data.len() + 1) as CodeOffset) <= self.length_limit);
self.data.push(value);
}
fn put_data(&mut self, data: &[u8]) {
assert!(((self.data.len() + data.len()) as CodeOffset) <= self.length_limit);
self.data.extend_from_slice(data);
}
fn add_reloc(&mut self, srcloc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend) {
let name = name.clone();
self.relocs.push(MachReloc {
offset: self.data.len() as CodeOffset,
srcloc,
kind,
name,
addend,
});
}
fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) {
self.traps.push(MachTrap {
offset: self.data.len() as CodeOffset,
srcloc,
code,
});
}
fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) {
self.call_sites.push(MachCallSite {
ret_addr: self.data.len() as CodeOffset,
srcloc,
opcode,
});
}
fn start_srcloc(&mut self, loc: SourceLoc) {
self.cur_srcloc = Some((self.cur_offset_from_start(), loc));
}
fn end_srcloc(&mut self) {
let (start, loc) = self
.cur_srcloc
.take()
.expect("end_srcloc() called without start_srcloc()");
let end = self.cur_offset_from_start();
// Skip zero-length extends.
debug_assert!(end >= start);
if end > start {
self.srclocs.push(MachSrcLoc { start, end, loc });
}
}
}
/// A MachSectionOutput implementation that records only size.
pub struct MachSectionSize {
/// The starting offset of this section.
pub start_offset: CodeOffset,
/// The current offset of this section.
pub offset: CodeOffset,
}
impl MachSectionSize {
/// Create a new size-counting dummy section.
pub fn new(start_offset: CodeOffset) -> MachSectionSize {
MachSectionSize {
start_offset,
offset: start_offset,
}
}
/// Return the size this section would take if emitted with a real sink.
pub fn size(&self) -> CodeOffset {
self.offset - self.start_offset
}
}
impl MachSectionOutput for MachSectionSize {
fn cur_offset_from_start(&self) -> CodeOffset {
// All size-counting sections conceptually start at offset 0; this doesn't
// matter when counting code size.
self.offset
}
fn start_offset(&self) -> CodeOffset {
self.start_offset
}
fn put1(&mut self, _: u8) {
self.offset += 1;
}
fn put_data(&mut self, data: &[u8]) {
self.offset += data.len() as CodeOffset;
}
fn add_reloc(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {}
fn add_trap(&mut self, _: SourceLoc, _: TrapCode) {}
fn add_call_site(&mut self, _: SourceLoc, _: Opcode) {}
fn start_srcloc(&mut self, _: SourceLoc) {}
fn end_srcloc(&mut self) {}
}
/// A relocation resulting from a compilation.
pub struct MachReloc {
/// The offset at which the relocation applies, *relative to the
/// containing section*.
pub offset: CodeOffset,
/// The original source location.
pub srcloc: SourceLoc,
/// The kind of relocation.
pub kind: Reloc,
/// The external symbol / name to which this relocation refers.
pub name: ExternalName,
/// The addend to add to the symbol value.
pub addend: i64,
}
/// A trap record resulting from a compilation.
pub struct MachTrap {
/// The offset at which the trap instruction occurs, *relative to the
/// containing section*.
pub offset: CodeOffset,
/// The original source location.
pub srcloc: SourceLoc,
/// The trap code.
pub code: TrapCode,
}
/// A call site record resulting from a compilation.
pub struct MachCallSite {
/// The offset of the call's return address, *relative to the containing section*.
pub ret_addr: CodeOffset,
/// The original source location.
pub srcloc: SourceLoc,
/// The call's opcode.
pub opcode: Opcode,
}
/// A source-location mapping resulting from a compilation.
#[derive(Clone, Debug)]
pub struct MachSrcLoc {
/// The start of the region of code corresponding to a source location.
/// This is relative to the start of the function, not to the start of the
/// section.
pub start: CodeOffset,
/// The end of the region of code corresponding to a source location.
/// This is relative to the start of the section, not to the start of the
/// section.
pub end: CodeOffset,
/// The source location.
pub loc: SourceLoc,
}

View File

@@ -17,8 +17,7 @@
//! See the main module comment in `mod.rs` for more details on the VCode-based
//! backend pipeline.
use crate::entity::SecondaryMap;
use crate::ir::{self, Block, SourceLoc};
use crate::ir::{self, SourceLoc};
use crate::machinst::*;
use crate::settings;
@@ -30,8 +29,6 @@ use regalloc::{
use alloc::boxed::Box;
use alloc::{borrow::Cow, vec::Vec};
use log::debug;
use smallvec::SmallVec;
use std::fmt;
use std::iter;
use std::string::String;
@@ -43,8 +40,8 @@ pub type BlockIndex = u32;
/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
pub trait VCodeInst: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize> {}
impl<I: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize>> VCodeInst for I {}
pub trait VCodeInst: MachInst + MachInstEmit {}
impl<I: MachInst + MachInstEmit> VCodeInst for I {}
/// A function in "VCode" (virtualized-register code) form, after lowering.
/// This is essentially a standard CFG of basic blocks, where each basic block
@@ -80,29 +77,11 @@ pub struct VCode<I: VCodeInst> {
/// correspond to each basic block's successors.
block_succs: Vec<BlockIx>,
/// Block indices by IR block.
block_by_bb: SecondaryMap<ir::Block, BlockIndex>,
/// IR block for each VCode Block. The length of this Vec will likely be
/// less than the total number of Blocks, because new Blocks (for edge
/// splits, for example) are appended during lowering.
bb_by_block: Vec<ir::Block>,
/// Order of block IDs in final generated code.
final_block_order: Vec<BlockIndex>,
/// Final block offsets. Computed during branch finalization and used
/// during emission.
final_block_offsets: Vec<CodeOffset>,
/// Size of code, accounting for block layout / alignment.
code_size: CodeOffset,
/// Block-order information.
block_order: BlockLoweringOrder,
/// ABI object.
abi: Box<dyn ABIBody<I = I>>,
/// The block targeted by fallthrough_returns, if there's one.
pub fallthrough_return_block: Option<BlockIndex>,
}
/// A builder for a VCode function body. This builder is designed for the
@@ -123,12 +102,8 @@ pub struct VCodeBuilder<I: VCodeInst> {
/// In-progress VCode.
vcode: VCode<I>,
/// Current basic block instructions, in reverse order (because blocks are
/// built bottom-to-top).
bb_insns: SmallVec<[(I, SourceLoc); 32]>,
/// Current IR-inst instructions, in forward order.
ir_inst_insns: SmallVec<[(I, SourceLoc); 4]>,
/// Index of the last block-start in the vcode.
block_start: InsnIndex,
/// Start of succs for the current block in the concatenated succs list.
succ_start: usize,
@@ -139,12 +114,11 @@ pub struct VCodeBuilder<I: VCodeInst> {
impl<I: VCodeInst> VCodeBuilder<I> {
/// Create a new VCodeBuilder.
pub fn new(abi: Box<dyn ABIBody<I = I>>) -> VCodeBuilder<I> {
let vcode = VCode::new(abi);
pub fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> {
let vcode = VCode::new(abi, block_order);
VCodeBuilder {
vcode,
bb_insns: SmallVec::new(),
ir_inst_insns: SmallVec::new(),
block_start: 0,
succ_start: 0,
cur_srcloc: SourceLoc::default(),
}
@@ -155,14 +129,9 @@ impl<I: VCodeInst> VCodeBuilder<I> {
&mut *self.vcode.abi
}
/// Set the fallthrough_return target block for this function. There must be at most once per
/// function.
pub fn set_fallthrough_return_block(&mut self, bb: Block) {
debug_assert!(
self.vcode.fallthrough_return_block.is_none(),
"a function must have at most one fallthrough-return instruction"
);
self.vcode.fallthrough_return_block = Some(self.bb_to_bindex(bb));
/// Access to the BlockLoweringOrder object.
pub fn block_order(&self) -> &BlockLoweringOrder {
&self.vcode.block_order
}
/// Set the type of a VReg.
@@ -173,53 +142,17 @@ impl<I: VCodeInst> VCodeBuilder<I> {
self.vcode.vreg_types[vreg.get_index()] = ty;
}
/// Return the underlying bb-to-BlockIndex map.
pub fn blocks_by_bb(&self) -> &SecondaryMap<ir::Block, BlockIndex> {
&self.vcode.block_by_bb
}
/// Initialize the bb-to-BlockIndex map. Returns the first free
/// BlockIndex.
pub fn init_bb_map(&mut self, blocks: &[ir::Block]) -> BlockIndex {
let mut bindex: BlockIndex = 0;
for bb in blocks.iter() {
self.vcode.block_by_bb[*bb] = bindex;
self.vcode.bb_by_block.push(*bb);
bindex += 1;
}
bindex
}
/// Get the BlockIndex for an IR block.
pub fn bb_to_bindex(&self, bb: ir::Block) -> BlockIndex {
self.vcode.block_by_bb[bb]
}
/// Set the current block as the entry block.
pub fn set_entry(&mut self, block: BlockIndex) {
self.vcode.entry = block;
}
/// End the current IR instruction. Must be called after pushing any
/// instructions and prior to ending the basic block.
pub fn end_ir_inst(&mut self) {
while let Some(pair) = self.ir_inst_insns.pop() {
self.bb_insns.push(pair);
}
}
/// End the current basic block. Must be called after emitting vcode insts
/// for IR insts and prior to ending the function (building the VCode).
pub fn end_bb(&mut self) -> BlockIndex {
assert!(self.ir_inst_insns.is_empty());
let block_num = self.vcode.block_ranges.len() as BlockIndex;
// Push the instructions.
let start_idx = self.vcode.insts.len() as InsnIndex;
while let Some((i, loc)) = self.bb_insns.pop() {
self.vcode.insts.push(i);
self.vcode.srclocs.push(loc);
}
pub fn end_bb(&mut self) {
let start_idx = self.block_start;
let end_idx = self.vcode.insts.len() as InsnIndex;
self.block_start = end_idx;
// Add the instruction index range to the list of blocks.
self.vcode.block_ranges.push((start_idx, end_idx));
// End the successors list.
@@ -228,8 +161,6 @@ impl<I: VCodeInst> VCodeBuilder<I> {
.block_succ_range
.push((self.succ_start, succ_end));
self.succ_start = succ_end;
block_num
}
/// Push an instruction for the current BB and current IR inst within the BB.
@@ -237,19 +168,27 @@ impl<I: VCodeInst> VCodeBuilder<I> {
match insn.is_term() {
MachTerminator::None | MachTerminator::Ret => {}
MachTerminator::Uncond(target) => {
self.vcode.block_succs.push(BlockIx::new(target));
self.vcode.block_succs.push(BlockIx::new(target.get()));
}
MachTerminator::Cond(true_branch, false_branch) => {
self.vcode.block_succs.push(BlockIx::new(true_branch));
self.vcode.block_succs.push(BlockIx::new(false_branch));
self.vcode.block_succs.push(BlockIx::new(true_branch.get()));
self.vcode
.block_succs
.push(BlockIx::new(false_branch.get()));
}
MachTerminator::Indirect(targets) => {
for target in targets {
self.vcode.block_succs.push(BlockIx::new(*target));
self.vcode.block_succs.push(BlockIx::new(target.get()));
}
}
}
self.ir_inst_insns.push((insn, self.cur_srcloc));
self.vcode.insts.push(insn);
self.vcode.srclocs.push(self.cur_srcloc);
}
/// Get the current source location.
pub fn get_srcloc(&self) -> SourceLoc {
self.cur_srcloc
}
/// Set the current source location.
@@ -259,8 +198,6 @@ impl<I: VCodeInst> VCodeBuilder<I> {
/// Build the final VCode.
pub fn build(self) -> VCode<I> {
assert!(self.ir_inst_insns.is_empty());
assert!(self.bb_insns.is_empty());
self.vcode
}
}
@@ -282,35 +219,9 @@ fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
}
}
fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> Option<BlockIndex> {
let range = vcode.block_insns(BlockIx::new(block));
debug!(
"is_trivial_jump_block: block {} has len {}",
block,
range.len()
);
if range.len() != 1 {
return None;
}
let insn = range.first();
debug!(
" -> only insn is: {:?} with terminator {:?}",
vcode.get_insn(insn),
vcode.get_insn(insn).is_term()
);
match vcode.get_insn(insn).is_term() {
MachTerminator::Uncond(target) => Some(target),
_ => None,
}
}
impl<I: VCodeInst> VCode<I> {
/// New empty VCode.
fn new(abi: Box<dyn ABIBody<I = I>>) -> VCode<I> {
fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> {
VCode {
liveins: abi.liveins(),
liveouts: abi.liveouts(),
@@ -321,13 +232,8 @@ impl<I: VCodeInst> VCode<I> {
block_ranges: vec![],
block_succ_range: vec![],
block_succs: vec![],
block_by_bb: SecondaryMap::with_default(0),
bb_by_block: vec![],
final_block_order: vec![],
final_block_offsets: vec![],
code_size: 0,
block_order,
abi,
fallthrough_return_block: None,
}
}
@@ -367,8 +273,6 @@ impl<I: VCodeInst> VCode<I> {
/// instructions including spliced fill/reload/move instructions, and replace
/// the VCode with them.
pub fn replace_insns_from_regalloc(&mut self, result: RegAllocResult<Self>) {
self.final_block_order = compute_final_block_order(self);
// Record the spillslot count and clobbered registers for the ABI/stack
// setup code.
self.abi.set_num_spillslots(result.num_spill_slots as usize);
@@ -383,11 +287,12 @@ impl<I: VCodeInst> VCode<I> {
let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
let mut final_srclocs = vec![];
for block in &self.final_block_order {
let (start, end) = block_ranges[*block as usize];
for block in 0..self.num_blocks() {
let block = block as BlockIndex;
let (start, end) = block_ranges[block as usize];
let final_start = final_insns.len() as InsnIndex;
if *block == self.entry {
if block == self.entry {
// Start with the prologue.
let prologue = self.abi.gen_prologue();
let len = prologue.len();
@@ -429,7 +334,7 @@ impl<I: VCodeInst> VCode<I> {
}
let final_end = final_insns.len() as InsnIndex;
final_block_ranges[*block as usize] = (final_start, final_end);
final_block_ranges[block as usize] = (final_start, final_end);
}
debug_assert!(final_insns.len() == final_srclocs.len());
@@ -439,175 +344,68 @@ impl<I: VCodeInst> VCode<I> {
self.block_ranges = final_block_ranges;
}
/// Removes redundant branches, rewriting targets to point directly to the
/// ultimate block at the end of a chain of trivial one-target jumps.
pub fn remove_redundant_branches(&mut self) {
// For each block, compute the actual target block, looking through up to one
// block with single-target jumps (this will remove empty edge blocks inserted
// by phi-lowering).
let block_rewrites: Vec<BlockIndex> = (0..self.num_blocks() as u32)
.map(|bix| is_trivial_jump_block(self, bix).unwrap_or(bix))
.collect();
let mut refcounts: Vec<usize> = vec![0; self.num_blocks()];
debug!(
"remove_redundant_branches: block_rewrites = {:?}",
block_rewrites
);
refcounts[self.entry as usize] = 1;
for block in 0..self.num_blocks() as u32 {
for insn in self.block_insns(BlockIx::new(block)) {
self.get_insn_mut(insn)
.with_block_rewrites(&block_rewrites[..]);
match self.get_insn(insn).is_term() {
MachTerminator::Uncond(bix) => {
refcounts[bix as usize] += 1;
}
MachTerminator::Cond(bix1, bix2) => {
refcounts[bix1 as usize] += 1;
refcounts[bix2 as usize] += 1;
}
MachTerminator::Indirect(blocks) => {
for block in blocks {
refcounts[*block as usize] += 1;
}
}
_ => {}
}
}
}
let deleted: Vec<bool> = refcounts.iter().map(|r| *r == 0).collect();
let block_order = std::mem::replace(&mut self.final_block_order, vec![]);
self.final_block_order = block_order
.into_iter()
.filter(|b| !deleted[*b as usize])
.collect();
// Rewrite successor information based on the block-rewrite map.
for succ in &mut self.block_succs {
let new_succ = block_rewrites[succ.get() as usize];
*succ = BlockIx::new(new_succ);
}
}
/// Mutate branch instructions to (i) lower two-way condbrs to one-way,
/// depending on fallthrough; and (ii) use concrete offsets.
pub fn finalize_branches(&mut self)
/// Emit the instructions to a `MachBuffer`, containing fixed-up code and external
/// reloc/trap/etc. records ready for use.
pub fn emit(&self) -> MachBuffer<I>
where
I: MachInstEmit<MachSectionSize>,
I: MachInstEmit,
{
// Compute fallthrough block, indexed by block.
let num_final_blocks = self.final_block_order.len();
let mut block_fallthrough: Vec<Option<BlockIndex>> = vec![None; self.num_blocks()];
for i in 0..(num_final_blocks - 1) {
let from = self.final_block_order[i];
let to = self.final_block_order[i + 1];
block_fallthrough[from as usize] = Some(to);
}
// Pass over VCode instructions and finalize two-way branches into
// one-way branches with fallthrough.
for block in 0..self.num_blocks() {
let next_block = block_fallthrough[block];
let (start, end) = self.block_ranges[block];
for iix in start..end {
let insn = &mut self.insts[iix as usize];
insn.with_fallthrough_block(next_block);
}
}
let flags = self.abi.flags();
// Compute block offsets.
let mut code_section = MachSectionSize::new(0);
let mut block_offsets = vec![0; self.num_blocks()];
let mut buffer = MachBuffer::new();
let mut state = Default::default();
for &block in &self.final_block_order {
code_section.offset = I::align_basic_block(code_section.offset);
block_offsets[block as usize] = code_section.offset;
let (start, end) = self.block_ranges[block as usize];
for iix in start..end {
self.insts[iix as usize].emit(&mut code_section, flags, &mut state);
}
}
// We now have the section layout.
self.final_block_offsets = block_offsets;
self.code_size = code_section.size();
// Update branches with known block offsets. This looks like the
// traversal above, but (i) does not update block_offsets, rather uses
// it (so forward references are now possible), and (ii) mutates the
// instructions.
let mut code_section = MachSectionSize::new(0);
let mut state = Default::default();
for &block in &self.final_block_order {
code_section.offset = I::align_basic_block(code_section.offset);
let (start, end) = self.block_ranges[block as usize];
for iix in start..end {
self.insts[iix as usize]
.with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
self.insts[iix as usize].emit(&mut code_section, flags, &mut state);
}
}
}
/// Emit the instructions to a list of sections.
pub fn emit(&self) -> MachSections
where
I: MachInstEmit<MachSection>,
{
let mut sections = MachSections::new();
let code_idx = sections.add_section(0, self.code_size);
let code_section = sections.get_section(code_idx);
let mut state = Default::default();
buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); // first N MachLabels are simply block indices.
let flags = self.abi.flags();
let mut cur_srcloc = None;
for &block in &self.final_block_order {
let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
while new_offset > code_section.cur_offset_from_start() {
for block in 0..self.num_blocks() {
let block = block as BlockIndex;
let new_offset = I::align_basic_block(buffer.cur_offset());
while new_offset > buffer.cur_offset() {
// Pad with NOPs up to the aligned block offset.
let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
nop.emit(code_section, flags, &mut Default::default());
let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
nop.emit(&mut buffer, flags, &mut Default::default());
}
assert_eq!(code_section.cur_offset_from_start(), new_offset);
assert_eq!(buffer.cur_offset(), new_offset);
let (start, end) = self.block_ranges[block as usize];
buffer.bind_label(MachLabel::from_block(block));
for iix in start..end {
let srcloc = self.srclocs[iix as usize];
if cur_srcloc != Some(srcloc) {
if cur_srcloc.is_some() {
code_section.end_srcloc();
buffer.end_srcloc();
}
code_section.start_srcloc(srcloc);
buffer.start_srcloc(srcloc);
cur_srcloc = Some(srcloc);
}
self.insts[iix as usize].emit(code_section, flags, &mut state);
self.insts[iix as usize].emit(&mut buffer, flags, &mut state);
}
if cur_srcloc.is_some() {
code_section.end_srcloc();
buffer.end_srcloc();
cur_srcloc = None;
}
// Do we need an island? Get the worst-case size of the next BB and see if, having
// emitted that many bytes, we will be beyond the deadline.
if block < (self.num_blocks() - 1) as BlockIndex {
let next_block = block + 1;
let next_block_range = self.block_ranges[next_block as usize];
let next_block_size = next_block_range.1 - next_block_range.0;
let worst_case_next_bb = I::worst_case_size() * next_block_size;
if buffer.island_needed(worst_case_next_bb) {
buffer.emit_island();
}
}
}
sections
buffer
}
/// Get the IR block for a BlockIndex, if one exists.
pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
if (block as usize) < self.bb_by_block.len() {
Some(self.bb_by_block[block as usize])
} else {
None
}
self.block_order.lowered_order()[block as usize].orig_block()
}
}
@@ -712,7 +510,6 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
writeln!(f, "VCode_Debug {{")?;
writeln!(f, " Entry block: {}", self.entry)?;
writeln!(f, " Final block order: {:?}", self.final_block_order)?;
for block in 0..self.num_blocks() {
writeln!(f, "Block {}:", block,)?;
@@ -736,52 +533,21 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
use std::fmt::Write;
// Calculate an order in which to display the blocks. This is the same
// as final_block_order, but also includes blocks which are in the
// representation but not in final_block_order.
let mut display_order = Vec::<usize>::new();
// First display blocks in `final_block_order`
for bix in &self.final_block_order {
assert!((*bix as usize) < self.num_blocks());
display_order.push(*bix as usize);
}
// Now also take care of those not listed in `final_block_order`.
// This is quadratic, but it's also debug-only code.
for bix in 0..self.num_blocks() {
if display_order.contains(&bix) {
continue;
}
display_order.push(bix);
}
let mut s = String::new();
write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
write!(&mut s, " Entry block: {}\n", self.entry).unwrap();
write!(
&mut s,
" Final block order: {:?}\n",
self.final_block_order
)
.unwrap();
for i in 0..self.num_blocks() {
let block = display_order[i];
let block = i as BlockIndex;
let omitted = if !self.final_block_order.is_empty() && i >= self.final_block_order.len()
{
"** OMITTED **"
} else {
""
};
write!(&mut s, "Block {}: {}\n", block, omitted).unwrap();
if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
write!(&mut s, "Block {}:\n", block).unwrap();
if let Some(bb) = self.bindex_to_bb(block) {
write!(&mut s, " (original IR block: {})\n", bb).unwrap();
}
for succ in self.succs(block as BlockIndex) {
for succ in self.succs(block) {
write!(&mut s, " (successor: Block {})\n", succ.get()).unwrap();
}
let (start, end) = self.block_ranges[block];
let (start, end) = self.block_ranges[block as usize];
write!(&mut s, " (instruction range: {} .. {})\n", start, end).unwrap();
for inst in start..end {
write!(

View File

@@ -1,52 +0,0 @@
//! A pass that computes the number of uses of any given instruction.
use crate::entity::SecondaryMap;
use crate::ir::dfg::ValueDef;
use crate::ir::Value;
use crate::ir::{DataFlowGraph, Function, Inst};
/// Auxiliary data structure that counts the number of uses of any given
/// instruction in a Function. This is used during instruction selection
/// to essentially do incremental DCE: when an instruction is no longer
/// needed because its computation has been isel'd into another machine
/// instruction at every use site, we can skip it.
#[derive(Clone, Debug)]
pub struct NumUses {
uses: SecondaryMap<Inst, u32>,
}
impl NumUses {
fn new() -> NumUses {
NumUses {
uses: SecondaryMap::with_default(0),
}
}
/// Compute the NumUses analysis result for a function.
pub fn compute(func: &Function) -> NumUses {
let mut uses = NumUses::new();
for bb in func.layout.blocks() {
for inst in func.layout.block_insts(bb) {
for arg in func.dfg.inst_args(inst) {
let v = func.dfg.resolve_aliases(*arg);
uses.add_value(&func.dfg, v);
}
}
}
uses
}
fn add_value(&mut self, dfg: &DataFlowGraph, v: Value) {
match dfg.value_def(v) {
ValueDef::Result(inst, _) => {
self.uses[inst] += 1;
}
_ => {}
}
}
/// Take the complete uses map, consuming this analysis result.
pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
self.uses
}
}

View File

@@ -1,7 +1,7 @@
test vcode
target aarch64
function %f(i64, i64) -> i64 {
function %f1(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iadd.i64 v0, v1
return v2
@@ -15,7 +15,7 @@ block0(v0: i64, v1: i64):
; nextln: ret
function %f(i64, i64) -> i64 {
function %f2(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = isub.i64 v0, v1
return v2
@@ -28,7 +28,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f3(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = imul.i64 v0, v1
return v2
@@ -41,7 +41,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f4(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = umulhi.i64 v0, v1
return v2
@@ -54,7 +54,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f5(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = smulhi.i64 v0, v1
return v2
@@ -67,7 +67,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f6(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = sdiv.i64 v0, v1
return v2
@@ -87,7 +87,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64) -> i64 {
function %f7(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 2
v2 = sdiv.i64 v0, v1
@@ -109,7 +109,7 @@ block0(v0: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f8(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = udiv.i64 v0, v1
return v2
@@ -124,7 +124,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64) -> i64 {
function %f9(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 2
v2 = udiv.i64 v0, v1
@@ -141,7 +141,7 @@ block0(v0: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f10(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = srem.i64 v0, v1
return v2
@@ -157,7 +157,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f11(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = urem.i64 v0, v1
return v2
@@ -174,7 +174,7 @@ block0(v0: i64, v1: i64):
; nextln: ret
function %f(i32, i32) -> i32 {
function %f12(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = sdiv.i32 v0, v1
return v2
@@ -195,48 +195,48 @@ block0(v0: i32, v1: i32):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i32) -> i32 {
function %f13(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 2
v2 = sdiv.i32 v0, v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; nextln: movz x0, #2
; nextln: sxtw x1, w1
; nextln: sxtw x2, w0
; nextln: sdiv x0, x1, x2
; nextln: cbz x2, 20
; nextln: adds wzr, w2, #1
; nextln: ccmp w1, #1, #nzcv, eq
; nextln: b.vc 12
; nextln: udf
; nextln: udf
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxtw x1, w0
; nextln: movz x0, #2
; nextln: sxtw x2, w0
; nextln: sdiv x0, x1, x2
; nextln: cbz x2, 20
; nextln: adds wzr, w2, #1
; nextln: ccmp w1, #1, #nzcv, eq
; nextln: b.vc 12
; nextln: udf
; nextln: udf
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i32, i32) -> i32 {
function %f14(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = udiv.i32 v0, v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov w0, w0
; nextln: mov w1, w1
; nextln: udiv x0, x0, x1
; nextln: cbnz x1, 8
; nextln: udf
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov w0, w0
; nextln: mov w1, w1
; nextln: udiv x0, x0, x1
; nextln: cbnz x1, 8
; nextln: udf
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i32) -> i32 {
function %f15(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 2
v2 = udiv.i32 v0, v1
@@ -245,9 +245,8 @@ block0(v0: i32):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x1, #2
; nextln: mov w0, w0
; nextln: mov w1, w1
; nextln: movz x1, #2
; nextln: udiv x0, x0, x1
; nextln: cbnz x1, 8
; nextln: udf
@@ -255,7 +254,7 @@ block0(v0: i32):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i32, i32) -> i32 {
function %f16(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = srem.i32 v0, v1
return v2
@@ -273,7 +272,7 @@ block0(v0: i32, v1: i32):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i32, i32) -> i32 {
function %f17(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = urem.i32 v0, v1
return v2
@@ -291,7 +290,7 @@ block0(v0: i32, v1: i32):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f18(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = band.i64 v0, v1
return v2
@@ -304,7 +303,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f19(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bor.i64 v0, v1
return v2
@@ -317,7 +316,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f20(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bxor.i64 v0, v1
return v2
@@ -330,7 +329,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f21(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = band_not.i64 v0, v1
return v2
@@ -343,7 +342,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f22(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bor_not.i64 v0, v1
return v2
@@ -356,7 +355,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f23(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bxor_not.i64 v0, v1
return v2
@@ -369,7 +368,7 @@ block0(v0: i64, v1: i64):
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i64, i64) -> i64 {
function %f24(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bnot.i64 v0
return v2

View File

@@ -30,17 +30,18 @@ block2:
return v5
}
; check: Block 0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: subs xzr, x0, x1
; nextln: b.eq 20
; check: Block 2:
; check: movz x0, #2
; nextln: b.eq label1 ; b label2
; check: Block 1:
; check: movz x0, #1
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
; check: Block 1:
; check: movz x0, #1
; check: Block 2:
; check: movz x0, #2
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

View File

@@ -30,15 +30,15 @@ block5(v5: i64):
; check: subs wzr, w0, #3
; nextln: b.hs
; nextln: adr x2, pc+16 ; ldrsw x1, [x2, x0, LSL 2] ; add x2, x2, x1 ; br x2 ; jt_entries
; nextln: adr x1, pc+16 ; ldrsw x2, [x1, x0, LSL 2] ; add x1, x1, x2 ; br x1 ; jt_entries
; check: movz x1, #3
; check: movz x1, #1
; nextln: b
; check: movz x1, #2
; nextln: b
; check: movz x1, #1
; check: movz x1, #3
; check: add x0, x0, x1

View File

@@ -25,10 +25,10 @@ block0(v0: i8, v1: i8):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: uxtb x0, w0
; nextln: uxtb x1, w1
; nextln: mov v0.d[0], x0
; nextln: mov v1.d[0], x1
; nextln: uxtb x2, w0
; nextln: uxtb x0, w1
; nextln: mov v0.d[0], x2
; nextln: mov v1.d[0], x0
; nextln: uqadd d0, d0, d1
; nextln: mov x0, v0.d[0]
; nextln: mov sp, fp

View File

@@ -366,15 +366,15 @@ block0(v0: i16):
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: uxth w0, w0
; nextln: lsr w1, w0, #6
; nextln: lsl w0, w0, #10
; nextln: orr w0, w0, w1
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: uxth w1, w0
; nextln: lsr w0, w1, #6
; nextln: lsl w1, w1, #10
; nextln: orr w0, w1, w0
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f24(i8) -> i8 {
block0(v0: i8):
@@ -385,10 +385,10 @@ block0(v0: i8):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: uxtb w0, w0
; nextln: lsr w1, w0, #5
; nextln: lsl w0, w0, #3
; nextln: orr w0, w0, w1
; nextln: uxtb w1, w0
; nextln: lsr w0, w1, #5
; nextln: lsl w1, w1, #3
; nextln: orr w0, w1, w0
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

View File

@@ -93,7 +93,7 @@ use crate::compilation::{
use crate::func_environ::{get_func_name, FuncEnvironment};
use crate::{CacheConfig, FunctionBodyData, ModuleLocal, ModuleTranslation, Tunables};
use cranelift_codegen::ir::{self, ExternalName};
use cranelift_codegen::machinst::sections::MachSrcLoc;
use cranelift_codegen::machinst::buffer::MachSrcLoc;
use cranelift_codegen::print_errors::pretty_error;
use cranelift_codegen::{binemit, isa, Context};
use cranelift_entity::PrimaryMap;
@@ -215,7 +215,7 @@ fn get_function_address_map<'data>(
if let Some(ref mcr) = &context.mach_compile_result {
// New-style backend: we have a `MachCompileResult` that will give us `MachSrcLoc` mapping
// tuples.
for &MachSrcLoc { start, end, loc } in mcr.sections.get_srclocs_sorted() {
for &MachSrcLoc { start, end, loc } in mcr.buffer.get_srclocs_sorted() {
instructions.push(InstructionAddressMap {
srcloc: loc,
code_offset: start as usize,