Reduce arm64 Inst enum size
This reduces the size of the Inst enum from 112 bytes to 48 bytes. Using DHAT on a regex-rs.wasm benchmark, `valgrind --tool=dhat clif-util compile --target aarch64` The total number of allocated bytes, drops by around 170 MB. At t-gmax drops by 3 MB. Using `perf stat clif-util compile --target aarch64`, the instructions count dropped by 0.6%. Cache misses dropped by 6%. Cycles dropped by 2.3%.
This commit is contained in:
@@ -68,6 +68,7 @@ use crate::isa::aarch64::{self, inst::*};
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
|
||||
@@ -1275,9 +1276,9 @@ impl ABICall for AArch64ABICall {
|
||||
);
|
||||
match &self.dest {
|
||||
&CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::Call {
|
||||
dest: name.clone(),
|
||||
uses,
|
||||
defs,
|
||||
dest: Box::new(name.clone()),
|
||||
uses: Box::new(uses),
|
||||
defs: Box::new(defs),
|
||||
loc: self.loc,
|
||||
opcode: self.opcode,
|
||||
}),
|
||||
@@ -1290,16 +1291,16 @@ impl ABICall for AArch64ABICall {
|
||||
});
|
||||
ctx.emit(Inst::CallInd {
|
||||
rn: spilltmp_reg(),
|
||||
uses,
|
||||
defs,
|
||||
uses: Box::new(uses),
|
||||
defs: Box::new(defs),
|
||||
loc: self.loc,
|
||||
opcode: self.opcode,
|
||||
});
|
||||
}
|
||||
&CallDest::Reg(reg) => ctx.emit(Inst::CallInd {
|
||||
rn: reg,
|
||||
uses,
|
||||
defs,
|
||||
uses: Box::new(uses),
|
||||
defs: Box::new(defs),
|
||||
loc: self.loc,
|
||||
opcode: self.opcode,
|
||||
}),
|
||||
|
||||
@@ -1303,7 +1303,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
// Emit jump table (table of 32-bit offsets).
|
||||
for target in targets {
|
||||
for target in targets.iter() {
|
||||
let off = target.as_offset_words() * 4;
|
||||
let off = i32::try_from(off).unwrap();
|
||||
// cast i32 to u32 (two's-complement)
|
||||
|
||||
@@ -3,6 +3,7 @@ use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::test_utils;
|
||||
use crate::settings;
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
#[test]
|
||||
@@ -2112,9 +2113,9 @@ fn test_aarch64_binemit() {
|
||||
|
||||
insns.push((
|
||||
Inst::Call {
|
||||
dest: ExternalName::testcase("test0"),
|
||||
uses: Set::empty(),
|
||||
defs: Set::empty(),
|
||||
dest: Box::new(ExternalName::testcase("test0")),
|
||||
uses: Box::new(Set::empty()),
|
||||
defs: Box::new(Set::empty()),
|
||||
loc: SourceLoc::default(),
|
||||
opcode: Opcode::Call,
|
||||
},
|
||||
@@ -2125,8 +2126,8 @@ fn test_aarch64_binemit() {
|
||||
insns.push((
|
||||
Inst::CallInd {
|
||||
rn: xreg(10),
|
||||
uses: Set::empty(),
|
||||
defs: Set::empty(),
|
||||
uses: Box::new(Set::empty()),
|
||||
defs: Box::new(Set::empty()),
|
||||
loc: SourceLoc::default(),
|
||||
opcode: Opcode::CallIndirect,
|
||||
},
|
||||
|
||||
@@ -12,6 +12,7 @@ use crate::{settings, CodegenError, CodegenResult};
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
|
||||
use regalloc::{RegUsageCollector, RegUsageMapper, Set};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::string::{String, ToString};
|
||||
@@ -616,17 +617,17 @@ pub enum Inst {
|
||||
/// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
|
||||
/// target.
|
||||
Call {
|
||||
dest: ExternalName,
|
||||
uses: Set<Reg>,
|
||||
defs: Set<Writable<Reg>>,
|
||||
dest: Box<ExternalName>,
|
||||
uses: Box<Set<Reg>>,
|
||||
defs: Box<Set<Writable<Reg>>>,
|
||||
loc: SourceLoc,
|
||||
opcode: Opcode,
|
||||
},
|
||||
/// A machine indirect-call instruction.
|
||||
CallInd {
|
||||
rn: Reg,
|
||||
uses: Set<Reg>,
|
||||
defs: Set<Writable<Reg>>,
|
||||
uses: Box<Set<Reg>>,
|
||||
defs: Box<Set<Writable<Reg>>>,
|
||||
loc: SourceLoc,
|
||||
opcode: Opcode,
|
||||
},
|
||||
@@ -704,8 +705,8 @@ pub enum Inst {
|
||||
/// Jump-table sequence, as one compound instruction (see note in lower.rs
|
||||
/// for rationale).
|
||||
JTSequence {
|
||||
targets: Vec<BranchTarget>,
|
||||
targets_for_term: Vec<BlockIndex>, // needed for MachTerminator.
|
||||
targets: Box<[BranchTarget]>,
|
||||
targets_for_term: Box<[BlockIndex]>, // needed for MachTerminator.
|
||||
ridx: Reg,
|
||||
rtmp1: Writable<Reg>,
|
||||
rtmp2: Writable<Reg>,
|
||||
@@ -760,6 +761,13 @@ fn count_zero_half_words(mut value: u64) -> usize {
|
||||
count
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inst_size_test() {
|
||||
// This test will help with unintentionally growing the size
|
||||
// of the Inst enum.
|
||||
assert_eq!(48, std::mem::size_of::<Inst>());
|
||||
}
|
||||
|
||||
impl Inst {
|
||||
/// Create a move instruction.
|
||||
pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
|
||||
@@ -1090,8 +1098,8 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
&Inst::Call {
|
||||
ref uses, ref defs, ..
|
||||
} => {
|
||||
collector.add_uses(uses);
|
||||
collector.add_defs(defs);
|
||||
collector.add_uses(&*uses);
|
||||
collector.add_defs(&*defs);
|
||||
}
|
||||
&Inst::CallInd {
|
||||
ref uses,
|
||||
@@ -1099,8 +1107,8 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
rn,
|
||||
..
|
||||
} => {
|
||||
collector.add_uses(uses);
|
||||
collector.add_defs(defs);
|
||||
collector.add_uses(&*uses);
|
||||
collector.add_defs(&*defs);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::CondBr { ref kind, .. }
|
||||
@@ -1643,8 +1651,8 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
|
||||
map_def(mapper, &mut r);
|
||||
r
|
||||
});
|
||||
*uses = new_uses;
|
||||
*defs = new_defs;
|
||||
*uses = Box::new(new_uses);
|
||||
*defs = Box::new(new_defs);
|
||||
}
|
||||
&mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
|
||||
&mut Inst::CallInd {
|
||||
@@ -1664,8 +1672,8 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
|
||||
map_def(mapper, &mut r);
|
||||
r
|
||||
});
|
||||
*uses = new_uses;
|
||||
*defs = new_defs;
|
||||
*uses = Box::new(new_uses);
|
||||
*defs = Box::new(new_defs);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::CondBr { ref mut kind, .. } => {
|
||||
@@ -1895,7 +1903,7 @@ impl MachInst for Inst {
|
||||
&mut Inst::JTSequence {
|
||||
targets: ref mut t, ..
|
||||
} => {
|
||||
for target in t {
|
||||
for target in t.iter_mut() {
|
||||
// offset+20: jumptable is 20 bytes into compound sequence.
|
||||
target.lower(targets, my_offset + 20);
|
||||
}
|
||||
|
||||
@@ -2132,8 +2132,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
||||
ridx,
|
||||
rtmp1,
|
||||
rtmp2,
|
||||
targets: jt_targets,
|
||||
targets_for_term,
|
||||
targets: jt_targets.into_boxed_slice(),
|
||||
targets_for_term: targets_for_term.into_boxed_slice(),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user