Reduce arm64 Inst enum size

This reduces the size of the Inst enum from 112 bytes to 48 bytes.

Using DHAT on a regex-rs.wasm benchmark, `valgrind --tool=dhat clif-util compile --target aarch64`

The total number of allocated bytes, drops by around 170 MB.
At t-gmax drops by 3 MB.

Using `perf stat clif-util compile --target aarch64`, the instructions count dropped by 0.6%. Cache misses dropped by 6%. Cycles dropped by 2.3%.
This commit is contained in:
Joey Gouly
2020-05-14 15:45:55 +01:00
parent 5987cf5cda
commit f418b7a700
5 changed files with 41 additions and 31 deletions

View File

@@ -68,6 +68,7 @@ use crate::isa::aarch64::{self, inst::*};
use crate::machinst::*;
use crate::settings;
use alloc::boxed::Box;
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
@@ -1275,9 +1276,9 @@ impl ABICall for AArch64ABICall {
);
match &self.dest {
&CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::Call {
dest: name.clone(),
uses,
defs,
dest: Box::new(name.clone()),
uses: Box::new(uses),
defs: Box::new(defs),
loc: self.loc,
opcode: self.opcode,
}),
@@ -1290,16 +1291,16 @@ impl ABICall for AArch64ABICall {
});
ctx.emit(Inst::CallInd {
rn: spilltmp_reg(),
uses,
defs,
uses: Box::new(uses),
defs: Box::new(defs),
loc: self.loc,
opcode: self.opcode,
});
}
&CallDest::Reg(reg) => ctx.emit(Inst::CallInd {
rn: reg,
uses,
defs,
uses: Box::new(uses),
defs: Box::new(defs),
loc: self.loc,
opcode: self.opcode,
}),

View File

@@ -1303,7 +1303,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
};
inst.emit(sink, flags, state);
// Emit jump table (table of 32-bit offsets).
for target in targets {
for target in targets.iter() {
let off = target.as_offset_words() * 4;
let off = i32::try_from(off).unwrap();
// cast i32 to u32 (two's-complement)

View File

@@ -3,6 +3,7 @@ use crate::isa::aarch64::inst::*;
use crate::isa::test_utils;
use crate::settings;
use alloc::boxed::Box;
use alloc::vec::Vec;
#[test]
@@ -2112,9 +2113,9 @@ fn test_aarch64_binemit() {
insns.push((
Inst::Call {
dest: ExternalName::testcase("test0"),
uses: Set::empty(),
defs: Set::empty(),
dest: Box::new(ExternalName::testcase("test0")),
uses: Box::new(Set::empty()),
defs: Box::new(Set::empty()),
loc: SourceLoc::default(),
opcode: Opcode::Call,
},
@@ -2125,8 +2126,8 @@ fn test_aarch64_binemit() {
insns.push((
Inst::CallInd {
rn: xreg(10),
uses: Set::empty(),
defs: Set::empty(),
uses: Box::new(Set::empty()),
defs: Box::new(Set::empty()),
loc: SourceLoc::default(),
opcode: Opcode::CallIndirect,
},

View File

@@ -12,6 +12,7 @@ use crate::{settings, CodegenError, CodegenResult};
use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
use regalloc::{RegUsageCollector, RegUsageMapper, Set};
use alloc::boxed::Box;
use alloc::vec::Vec;
use smallvec::{smallvec, SmallVec};
use std::string::{String, ToString};
@@ -616,17 +617,17 @@ pub enum Inst {
/// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
/// target.
Call {
dest: ExternalName,
uses: Set<Reg>,
defs: Set<Writable<Reg>>,
dest: Box<ExternalName>,
uses: Box<Set<Reg>>,
defs: Box<Set<Writable<Reg>>>,
loc: SourceLoc,
opcode: Opcode,
},
/// A machine indirect-call instruction.
CallInd {
rn: Reg,
uses: Set<Reg>,
defs: Set<Writable<Reg>>,
uses: Box<Set<Reg>>,
defs: Box<Set<Writable<Reg>>>,
loc: SourceLoc,
opcode: Opcode,
},
@@ -704,8 +705,8 @@ pub enum Inst {
/// Jump-table sequence, as one compound instruction (see note in lower.rs
/// for rationale).
JTSequence {
targets: Vec<BranchTarget>,
targets_for_term: Vec<BlockIndex>, // needed for MachTerminator.
targets: Box<[BranchTarget]>,
targets_for_term: Box<[BlockIndex]>, // needed for MachTerminator.
ridx: Reg,
rtmp1: Writable<Reg>,
rtmp2: Writable<Reg>,
@@ -760,6 +761,13 @@ fn count_zero_half_words(mut value: u64) -> usize {
count
}
#[test]
fn inst_size_test() {
// This test will help with unintentionally growing the size
// of the Inst enum.
assert_eq!(48, std::mem::size_of::<Inst>());
}
impl Inst {
/// Create a move instruction.
pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
@@ -1090,8 +1098,8 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
&Inst::Call {
ref uses, ref defs, ..
} => {
collector.add_uses(uses);
collector.add_defs(defs);
collector.add_uses(&*uses);
collector.add_defs(&*defs);
}
&Inst::CallInd {
ref uses,
@@ -1099,8 +1107,8 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
rn,
..
} => {
collector.add_uses(uses);
collector.add_defs(defs);
collector.add_uses(&*uses);
collector.add_defs(&*defs);
collector.add_use(rn);
}
&Inst::CondBr { ref kind, .. }
@@ -1643,8 +1651,8 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
map_def(mapper, &mut r);
r
});
*uses = new_uses;
*defs = new_defs;
*uses = Box::new(new_uses);
*defs = Box::new(new_defs);
}
&mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
&mut Inst::CallInd {
@@ -1664,8 +1672,8 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
map_def(mapper, &mut r);
r
});
*uses = new_uses;
*defs = new_defs;
*uses = Box::new(new_uses);
*defs = Box::new(new_defs);
map_use(mapper, rn);
}
&mut Inst::CondBr { ref mut kind, .. } => {
@@ -1895,7 +1903,7 @@ impl MachInst for Inst {
&mut Inst::JTSequence {
targets: ref mut t, ..
} => {
for target in t {
for target in t.iter_mut() {
// offset+20: jumptable is 20 bytes into compound sequence.
target.lower(targets, my_offset + 20);
}

View File

@@ -2132,8 +2132,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ridx,
rtmp1,
rtmp2,
targets: jt_targets,
targets_for_term,
targets: jt_targets.into_boxed_slice(),
targets_for_term: targets_for_term.into_boxed_slice(),
});
}