Reduce arm64 Inst enum size
This reduces the size of the Inst enum from 112 bytes to 48 bytes. Using DHAT on a regex-rs.wasm benchmark, `valgrind --tool=dhat clif-util compile --target aarch64` The total number of allocated bytes, drops by around 170 MB. At t-gmax drops by 3 MB. Using `perf stat clif-util compile --target aarch64`, the instructions count dropped by 0.6%. Cache misses dropped by 6%. Cycles dropped by 2.3%.
This commit is contained in:
@@ -68,6 +68,7 @@ use crate::isa::aarch64::{self, inst::*};
|
|||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
use crate::settings;
|
use crate::settings;
|
||||||
|
|
||||||
|
use alloc::boxed::Box;
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
|
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
|
||||||
@@ -1275,9 +1276,9 @@ impl ABICall for AArch64ABICall {
|
|||||||
);
|
);
|
||||||
match &self.dest {
|
match &self.dest {
|
||||||
&CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::Call {
|
&CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::Call {
|
||||||
dest: name.clone(),
|
dest: Box::new(name.clone()),
|
||||||
uses,
|
uses: Box::new(uses),
|
||||||
defs,
|
defs: Box::new(defs),
|
||||||
loc: self.loc,
|
loc: self.loc,
|
||||||
opcode: self.opcode,
|
opcode: self.opcode,
|
||||||
}),
|
}),
|
||||||
@@ -1290,16 +1291,16 @@ impl ABICall for AArch64ABICall {
|
|||||||
});
|
});
|
||||||
ctx.emit(Inst::CallInd {
|
ctx.emit(Inst::CallInd {
|
||||||
rn: spilltmp_reg(),
|
rn: spilltmp_reg(),
|
||||||
uses,
|
uses: Box::new(uses),
|
||||||
defs,
|
defs: Box::new(defs),
|
||||||
loc: self.loc,
|
loc: self.loc,
|
||||||
opcode: self.opcode,
|
opcode: self.opcode,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
&CallDest::Reg(reg) => ctx.emit(Inst::CallInd {
|
&CallDest::Reg(reg) => ctx.emit(Inst::CallInd {
|
||||||
rn: reg,
|
rn: reg,
|
||||||
uses,
|
uses: Box::new(uses),
|
||||||
defs,
|
defs: Box::new(defs),
|
||||||
loc: self.loc,
|
loc: self.loc,
|
||||||
opcode: self.opcode,
|
opcode: self.opcode,
|
||||||
}),
|
}),
|
||||||
|
|||||||
@@ -1303,7 +1303,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||||||
};
|
};
|
||||||
inst.emit(sink, flags, state);
|
inst.emit(sink, flags, state);
|
||||||
// Emit jump table (table of 32-bit offsets).
|
// Emit jump table (table of 32-bit offsets).
|
||||||
for target in targets {
|
for target in targets.iter() {
|
||||||
let off = target.as_offset_words() * 4;
|
let off = target.as_offset_words() * 4;
|
||||||
let off = i32::try_from(off).unwrap();
|
let off = i32::try_from(off).unwrap();
|
||||||
// cast i32 to u32 (two's-complement)
|
// cast i32 to u32 (two's-complement)
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ use crate::isa::aarch64::inst::*;
|
|||||||
use crate::isa::test_utils;
|
use crate::isa::test_utils;
|
||||||
use crate::settings;
|
use crate::settings;
|
||||||
|
|
||||||
|
use alloc::boxed::Box;
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -2112,9 +2113,9 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::Call {
|
Inst::Call {
|
||||||
dest: ExternalName::testcase("test0"),
|
dest: Box::new(ExternalName::testcase("test0")),
|
||||||
uses: Set::empty(),
|
uses: Box::new(Set::empty()),
|
||||||
defs: Set::empty(),
|
defs: Box::new(Set::empty()),
|
||||||
loc: SourceLoc::default(),
|
loc: SourceLoc::default(),
|
||||||
opcode: Opcode::Call,
|
opcode: Opcode::Call,
|
||||||
},
|
},
|
||||||
@@ -2125,8 +2126,8 @@ fn test_aarch64_binemit() {
|
|||||||
insns.push((
|
insns.push((
|
||||||
Inst::CallInd {
|
Inst::CallInd {
|
||||||
rn: xreg(10),
|
rn: xreg(10),
|
||||||
uses: Set::empty(),
|
uses: Box::new(Set::empty()),
|
||||||
defs: Set::empty(),
|
defs: Box::new(Set::empty()),
|
||||||
loc: SourceLoc::default(),
|
loc: SourceLoc::default(),
|
||||||
opcode: Opcode::CallIndirect,
|
opcode: Opcode::CallIndirect,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ use crate::{settings, CodegenError, CodegenResult};
|
|||||||
use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
|
use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
|
||||||
use regalloc::{RegUsageCollector, RegUsageMapper, Set};
|
use regalloc::{RegUsageCollector, RegUsageMapper, Set};
|
||||||
|
|
||||||
|
use alloc::boxed::Box;
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
use smallvec::{smallvec, SmallVec};
|
use smallvec::{smallvec, SmallVec};
|
||||||
use std::string::{String, ToString};
|
use std::string::{String, ToString};
|
||||||
@@ -616,17 +617,17 @@ pub enum Inst {
|
|||||||
/// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
|
/// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
|
||||||
/// target.
|
/// target.
|
||||||
Call {
|
Call {
|
||||||
dest: ExternalName,
|
dest: Box<ExternalName>,
|
||||||
uses: Set<Reg>,
|
uses: Box<Set<Reg>>,
|
||||||
defs: Set<Writable<Reg>>,
|
defs: Box<Set<Writable<Reg>>>,
|
||||||
loc: SourceLoc,
|
loc: SourceLoc,
|
||||||
opcode: Opcode,
|
opcode: Opcode,
|
||||||
},
|
},
|
||||||
/// A machine indirect-call instruction.
|
/// A machine indirect-call instruction.
|
||||||
CallInd {
|
CallInd {
|
||||||
rn: Reg,
|
rn: Reg,
|
||||||
uses: Set<Reg>,
|
uses: Box<Set<Reg>>,
|
||||||
defs: Set<Writable<Reg>>,
|
defs: Box<Set<Writable<Reg>>>,
|
||||||
loc: SourceLoc,
|
loc: SourceLoc,
|
||||||
opcode: Opcode,
|
opcode: Opcode,
|
||||||
},
|
},
|
||||||
@@ -704,8 +705,8 @@ pub enum Inst {
|
|||||||
/// Jump-table sequence, as one compound instruction (see note in lower.rs
|
/// Jump-table sequence, as one compound instruction (see note in lower.rs
|
||||||
/// for rationale).
|
/// for rationale).
|
||||||
JTSequence {
|
JTSequence {
|
||||||
targets: Vec<BranchTarget>,
|
targets: Box<[BranchTarget]>,
|
||||||
targets_for_term: Vec<BlockIndex>, // needed for MachTerminator.
|
targets_for_term: Box<[BlockIndex]>, // needed for MachTerminator.
|
||||||
ridx: Reg,
|
ridx: Reg,
|
||||||
rtmp1: Writable<Reg>,
|
rtmp1: Writable<Reg>,
|
||||||
rtmp2: Writable<Reg>,
|
rtmp2: Writable<Reg>,
|
||||||
@@ -760,6 +761,13 @@ fn count_zero_half_words(mut value: u64) -> usize {
|
|||||||
count
|
count
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn inst_size_test() {
|
||||||
|
// This test will help with unintentionally growing the size
|
||||||
|
// of the Inst enum.
|
||||||
|
assert_eq!(48, std::mem::size_of::<Inst>());
|
||||||
|
}
|
||||||
|
|
||||||
impl Inst {
|
impl Inst {
|
||||||
/// Create a move instruction.
|
/// Create a move instruction.
|
||||||
pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
|
pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
|
||||||
@@ -1090,8 +1098,8 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
&Inst::Call {
|
&Inst::Call {
|
||||||
ref uses, ref defs, ..
|
ref uses, ref defs, ..
|
||||||
} => {
|
} => {
|
||||||
collector.add_uses(uses);
|
collector.add_uses(&*uses);
|
||||||
collector.add_defs(defs);
|
collector.add_defs(&*defs);
|
||||||
}
|
}
|
||||||
&Inst::CallInd {
|
&Inst::CallInd {
|
||||||
ref uses,
|
ref uses,
|
||||||
@@ -1099,8 +1107,8 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
rn,
|
rn,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
collector.add_uses(uses);
|
collector.add_uses(&*uses);
|
||||||
collector.add_defs(defs);
|
collector.add_defs(&*defs);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::CondBr { ref kind, .. }
|
&Inst::CondBr { ref kind, .. }
|
||||||
@@ -1643,8 +1651,8 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
|
|||||||
map_def(mapper, &mut r);
|
map_def(mapper, &mut r);
|
||||||
r
|
r
|
||||||
});
|
});
|
||||||
*uses = new_uses;
|
*uses = Box::new(new_uses);
|
||||||
*defs = new_defs;
|
*defs = Box::new(new_defs);
|
||||||
}
|
}
|
||||||
&mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
|
&mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
|
||||||
&mut Inst::CallInd {
|
&mut Inst::CallInd {
|
||||||
@@ -1664,8 +1672,8 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
|
|||||||
map_def(mapper, &mut r);
|
map_def(mapper, &mut r);
|
||||||
r
|
r
|
||||||
});
|
});
|
||||||
*uses = new_uses;
|
*uses = Box::new(new_uses);
|
||||||
*defs = new_defs;
|
*defs = Box::new(new_defs);
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
&mut Inst::CondBr { ref mut kind, .. } => {
|
&mut Inst::CondBr { ref mut kind, .. } => {
|
||||||
@@ -1895,7 +1903,7 @@ impl MachInst for Inst {
|
|||||||
&mut Inst::JTSequence {
|
&mut Inst::JTSequence {
|
||||||
targets: ref mut t, ..
|
targets: ref mut t, ..
|
||||||
} => {
|
} => {
|
||||||
for target in t {
|
for target in t.iter_mut() {
|
||||||
// offset+20: jumptable is 20 bytes into compound sequence.
|
// offset+20: jumptable is 20 bytes into compound sequence.
|
||||||
target.lower(targets, my_offset + 20);
|
target.lower(targets, my_offset + 20);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2132,8 +2132,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||||||
ridx,
|
ridx,
|
||||||
rtmp1,
|
rtmp1,
|
||||||
rtmp2,
|
rtmp2,
|
||||||
targets: jt_targets,
|
targets: jt_targets.into_boxed_slice(),
|
||||||
targets_for_term,
|
targets_for_term: targets_for_term.into_boxed_slice(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user