Reduce arm64 Inst enum size

This reduces the size of the Inst enum from 112 bytes to 48 bytes.

Using DHAT on a regex-rs.wasm benchmark, `valgrind --tool=dhat clif-util compile --target aarch64`

The total number of allocated bytes, drops by around 170 MB.
At t-gmax drops by 3 MB.

Using `perf stat clif-util compile --target aarch64`, the instructions count dropped by 0.6%. Cache misses dropped by 6%. Cycles dropped by 2.3%.
This commit is contained in:
Joey Gouly
2020-05-14 15:45:55 +01:00
parent 5987cf5cda
commit f418b7a700
5 changed files with 41 additions and 31 deletions

View File

@@ -68,6 +68,7 @@ use crate::isa::aarch64::{self, inst::*};
use crate::machinst::*; use crate::machinst::*;
use crate::settings; use crate::settings;
use alloc::boxed::Box;
use alloc::vec::Vec; use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
@@ -1275,9 +1276,9 @@ impl ABICall for AArch64ABICall {
); );
match &self.dest { match &self.dest {
&CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::Call { &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::Call {
dest: name.clone(), dest: Box::new(name.clone()),
uses, uses: Box::new(uses),
defs, defs: Box::new(defs),
loc: self.loc, loc: self.loc,
opcode: self.opcode, opcode: self.opcode,
}), }),
@@ -1290,16 +1291,16 @@ impl ABICall for AArch64ABICall {
}); });
ctx.emit(Inst::CallInd { ctx.emit(Inst::CallInd {
rn: spilltmp_reg(), rn: spilltmp_reg(),
uses, uses: Box::new(uses),
defs, defs: Box::new(defs),
loc: self.loc, loc: self.loc,
opcode: self.opcode, opcode: self.opcode,
}); });
} }
&CallDest::Reg(reg) => ctx.emit(Inst::CallInd { &CallDest::Reg(reg) => ctx.emit(Inst::CallInd {
rn: reg, rn: reg,
uses, uses: Box::new(uses),
defs, defs: Box::new(defs),
loc: self.loc, loc: self.loc,
opcode: self.opcode, opcode: self.opcode,
}), }),

View File

@@ -1303,7 +1303,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
}; };
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
// Emit jump table (table of 32-bit offsets). // Emit jump table (table of 32-bit offsets).
for target in targets { for target in targets.iter() {
let off = target.as_offset_words() * 4; let off = target.as_offset_words() * 4;
let off = i32::try_from(off).unwrap(); let off = i32::try_from(off).unwrap();
// cast i32 to u32 (two's-complement) // cast i32 to u32 (two's-complement)

View File

@@ -3,6 +3,7 @@ use crate::isa::aarch64::inst::*;
use crate::isa::test_utils; use crate::isa::test_utils;
use crate::settings; use crate::settings;
use alloc::boxed::Box;
use alloc::vec::Vec; use alloc::vec::Vec;
#[test] #[test]
@@ -2112,9 +2113,9 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::Call { Inst::Call {
dest: ExternalName::testcase("test0"), dest: Box::new(ExternalName::testcase("test0")),
uses: Set::empty(), uses: Box::new(Set::empty()),
defs: Set::empty(), defs: Box::new(Set::empty()),
loc: SourceLoc::default(), loc: SourceLoc::default(),
opcode: Opcode::Call, opcode: Opcode::Call,
}, },
@@ -2125,8 +2126,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::CallInd { Inst::CallInd {
rn: xreg(10), rn: xreg(10),
uses: Set::empty(), uses: Box::new(Set::empty()),
defs: Set::empty(), defs: Box::new(Set::empty()),
loc: SourceLoc::default(), loc: SourceLoc::default(),
opcode: Opcode::CallIndirect, opcode: Opcode::CallIndirect,
}, },

View File

@@ -12,6 +12,7 @@ use crate::{settings, CodegenError, CodegenResult};
use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
use regalloc::{RegUsageCollector, RegUsageMapper, Set}; use regalloc::{RegUsageCollector, RegUsageMapper, Set};
use alloc::boxed::Box;
use alloc::vec::Vec; use alloc::vec::Vec;
use smallvec::{smallvec, SmallVec}; use smallvec::{smallvec, SmallVec};
use std::string::{String, ToString}; use std::string::{String, ToString};
@@ -616,17 +617,17 @@ pub enum Inst {
/// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit /// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
/// target. /// target.
Call { Call {
dest: ExternalName, dest: Box<ExternalName>,
uses: Set<Reg>, uses: Box<Set<Reg>>,
defs: Set<Writable<Reg>>, defs: Box<Set<Writable<Reg>>>,
loc: SourceLoc, loc: SourceLoc,
opcode: Opcode, opcode: Opcode,
}, },
/// A machine indirect-call instruction. /// A machine indirect-call instruction.
CallInd { CallInd {
rn: Reg, rn: Reg,
uses: Set<Reg>, uses: Box<Set<Reg>>,
defs: Set<Writable<Reg>>, defs: Box<Set<Writable<Reg>>>,
loc: SourceLoc, loc: SourceLoc,
opcode: Opcode, opcode: Opcode,
}, },
@@ -704,8 +705,8 @@ pub enum Inst {
/// Jump-table sequence, as one compound instruction (see note in lower.rs /// Jump-table sequence, as one compound instruction (see note in lower.rs
/// for rationale). /// for rationale).
JTSequence { JTSequence {
targets: Vec<BranchTarget>, targets: Box<[BranchTarget]>,
targets_for_term: Vec<BlockIndex>, // needed for MachTerminator. targets_for_term: Box<[BlockIndex]>, // needed for MachTerminator.
ridx: Reg, ridx: Reg,
rtmp1: Writable<Reg>, rtmp1: Writable<Reg>,
rtmp2: Writable<Reg>, rtmp2: Writable<Reg>,
@@ -760,6 +761,13 @@ fn count_zero_half_words(mut value: u64) -> usize {
count count
} }
#[test]
fn inst_size_test() {
// This test will help with unintentionally growing the size
// of the Inst enum.
assert_eq!(48, std::mem::size_of::<Inst>());
}
impl Inst { impl Inst {
/// Create a move instruction. /// Create a move instruction.
pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst { pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
@@ -1090,8 +1098,8 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
&Inst::Call { &Inst::Call {
ref uses, ref defs, .. ref uses, ref defs, ..
} => { } => {
collector.add_uses(uses); collector.add_uses(&*uses);
collector.add_defs(defs); collector.add_defs(&*defs);
} }
&Inst::CallInd { &Inst::CallInd {
ref uses, ref uses,
@@ -1099,8 +1107,8 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
rn, rn,
.. ..
} => { } => {
collector.add_uses(uses); collector.add_uses(&*uses);
collector.add_defs(defs); collector.add_defs(&*defs);
collector.add_use(rn); collector.add_use(rn);
} }
&Inst::CondBr { ref kind, .. } &Inst::CondBr { ref kind, .. }
@@ -1643,8 +1651,8 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
map_def(mapper, &mut r); map_def(mapper, &mut r);
r r
}); });
*uses = new_uses; *uses = Box::new(new_uses);
*defs = new_defs; *defs = Box::new(new_defs);
} }
&mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {} &mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
&mut Inst::CallInd { &mut Inst::CallInd {
@@ -1664,8 +1672,8 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
map_def(mapper, &mut r); map_def(mapper, &mut r);
r r
}); });
*uses = new_uses; *uses = Box::new(new_uses);
*defs = new_defs; *defs = Box::new(new_defs);
map_use(mapper, rn); map_use(mapper, rn);
} }
&mut Inst::CondBr { ref mut kind, .. } => { &mut Inst::CondBr { ref mut kind, .. } => {
@@ -1895,7 +1903,7 @@ impl MachInst for Inst {
&mut Inst::JTSequence { &mut Inst::JTSequence {
targets: ref mut t, .. targets: ref mut t, ..
} => { } => {
for target in t { for target in t.iter_mut() {
// offset+20: jumptable is 20 bytes into compound sequence. // offset+20: jumptable is 20 bytes into compound sequence.
target.lower(targets, my_offset + 20); target.lower(targets, my_offset + 20);
} }

View File

@@ -2132,8 +2132,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ridx, ridx,
rtmp1, rtmp1,
rtmp2, rtmp2,
targets: jt_targets, targets: jt_targets.into_boxed_slice(),
targets_for_term, targets_for_term: targets_for_term.into_boxed_slice(),
}); });
} }