machinst x64: implement enough to support branch tables;

This commit is contained in:
Benjamin Bouvier
2020-06-26 18:05:51 +02:00
parent f86ecdcb86
commit 2115e70acb
7 changed files with 251 additions and 10 deletions

View File

@@ -875,8 +875,7 @@ pub enum Inst {
data: u64,
},
/// Jump-table sequence, as one compound instruction (see note in lower.rs
/// for rationale).
/// Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale).
JTSequence {
info: Box<JTSequenceInfo>,
ridx: Reg,

View File

@@ -2380,7 +2380,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
info: Box::new(JTSequenceInfo {
targets: jt_targets,
default_target,
targets_for_term: targets_for_term,
targets_for_term,
}),
});
}

View File

@@ -27,6 +27,10 @@ pub enum Amode {
index: Reg,
shift: u8, /* 0 .. 3 only */
},
/// sign-extend-32-to-64(Immediate) + RIP (instruction pointer).
/// To wit: not supported in 32-bits mode.
RipRelative { target: BranchTarget },
}
impl Amode {
@@ -47,6 +51,10 @@ impl Amode {
}
}
pub(crate) fn rip_relative(target: BranchTarget) -> Self {
Self::RipRelative { target }
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
match self {
@@ -57,6 +65,9 @@ impl Amode {
collector.add_use(*base);
collector.add_use(*index);
}
Amode::RipRelative { .. } => {
// RIP isn't involved in regalloc.
}
}
}
}
@@ -79,6 +90,7 @@ impl ShowWithRRU for Amode {
index.show_rru(mb_rru),
1 << shift
),
Amode::RipRelative { ref target } => format!("{}(%rip)", target.show_rru(mb_rru)),
}
}
}

View File

@@ -262,6 +262,36 @@ fn emit_std_enc_mem(
panic!("ImmRegRegShift");
}
}
Amode::RipRelative { ref target } => {
// First, the REX byte, with REX.B = 0.
rex.emit_two_op(sink, enc_g, 0);
// Now the opcode(s). These include any other prefixes the caller
// hands to us.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// RIP-relative is mod=00, rm=101.
sink.put1(encode_modrm(0, enc_g & 7, 0b101));
match *target {
BranchTarget::Label(label) => {
let offset = sink.cur_offset();
sink.use_label_at_offset(offset, label, LabelUse::JmpRel32);
sink.put4(0);
}
BranchTarget::ResolvedOffset(offset) => {
assert!(
offset <= u32::max_value() as isize,
"rip-relative can't hold >= U32_MAX values"
);
sink.put4(offset as u32);
}
}
}
}
}
@@ -1185,6 +1215,62 @@ pub(crate) fn emit(
}
}
Inst::JmpTable {
idx,
tmp1,
tmp2,
ref targets,
..
} => {
// This sequence is *one* instruction in the vcode, and is expanded only here at
// emission time, because we cannot allow the regalloc to insert spills/reloads in
// the middle; we depend on hardcoded PC-rel addressing below.
// Save index in a tmp (the live range of ridx only goes to start of this
// sequence; rtmp1 or rtmp2 may overwrite it).
let inst = Inst::gen_move(*tmp2, *idx, I64);
inst.emit(sink, flags, state);
// Load base address of jump table.
let start_of_jumptable = sink.get_label();
let inst = Inst::lea(
Amode::rip_relative(BranchTarget::Label(start_of_jumptable)),
*tmp1,
);
inst.emit(sink, flags, state);
// Load value out of jump table.
let inst = Inst::movzx_rm_r(
ExtMode::LQ,
RegMem::mem(Amode::imm_reg_reg_shift(0, tmp1.to_reg(), tmp2.to_reg(), 2)),
*tmp2,
);
inst.emit(sink, flags, state);
// Add base of jump table to jump-table-sourced block offset.
let inst = Inst::alu_rmi_r(
true, /* is_64 */
AluRmiROpcode::Add,
RegMemImm::reg(tmp2.to_reg()),
*tmp1,
);
inst.emit(sink, flags, state);
// Branch to computed address.
let inst = Inst::jmp_unknown(RegMem::reg(tmp1.to_reg()));
inst.emit(sink, flags, state);
// Emit jump table (table of 32-bit offsets).
sink.bind_label(start_of_jumptable);
let jt_off = sink.cur_offset();
for &target in targets.iter() {
let word_off = sink.cur_offset();
let off_into_table = word_off - jt_off;
sink.use_label_at_offset(word_off, target.as_label().unwrap(), LabelUse::PCRel32);
sink.put4(off_into_table);
}
}
Inst::XMM_Mov_RM_R {
op,
src: src_e,

View File

@@ -1529,6 +1529,19 @@ fn test_x64_emit() {
"4F8D840AB3000000",
"lea 179(%r10,%r9,1), %r8",
));
insns.push((
Inst::lea(Amode::rip_relative(BranchTarget::ResolvedOffset(0)), w_rdi),
"488D3D00000000",
"lea (offset 0)(%rip), %rdi",
));
insns.push((
Inst::lea(
Amode::rip_relative(BranchTarget::ResolvedOffset(1337)),
w_r15,
),
"4C8D3D39050000",
"lea (offset 1337)(%rip), %r15",
));
// ========================================================
// MovSX_RM_R

View File

@@ -228,6 +228,15 @@ pub enum Inst {
/// straight-line sequences in code to be emitted.
OneWayJmpCond { cc: CC, dst: BranchTarget },
/// Jump-table sequence, as one compound instruction (see note in lower.rs for rationale).
JmpTable {
idx: Reg,
tmp1: Writable<Reg>,
tmp2: Writable<Reg>,
targets: Vec<BranchTarget>,
targets_for_term: Vec<MachLabel>,
},
/// Indirect jump: jmpq (reg mem).
JmpUnknown { target: RegMem },
@@ -726,6 +735,10 @@ impl ShowWithRRU for Inst {
ljustify2("j".to_string(), cc.to_string()),
dst.show_rru(mb_rru),
),
Inst::JmpTable { idx, .. } => {
format!("{} {}", ljustify("br_table".into()), idx.show_rru(mb_rru))
}
//
Inst::JmpUnknown { target } => format!(
"{} *{}",
ljustify("jmp".to_string()),
@@ -858,6 +871,17 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
dest.get_regs_as_uses(collector);
}
Inst::JmpTable {
ref idx,
ref tmp1,
ref tmp2,
..
} => {
collector.add_use(*idx);
collector.add_def(*tmp1);
collector.add_def(*tmp2);
}
Inst::Ret
| Inst::EpiloguePlaceholder
| Inst::JmpKnown { .. }
@@ -913,6 +937,9 @@ impl Amode {
map_use(map, base);
map_use(map, index);
}
Amode::RipRelative { .. } => {
// RIP isn't involved in regalloc.
}
}
}
}
@@ -1077,6 +1104,17 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
dest.map_uses(mapper);
}
Inst::JmpTable {
ref mut idx,
ref mut tmp1,
ref mut tmp2,
..
} => {
map_use(mapper, idx);
map_def(mapper, tmp1);
map_def(mapper, tmp2);
}
Inst::Ret
| Inst::EpiloguePlaceholder
| Inst::JmpKnown { .. }
@@ -1144,6 +1182,10 @@ impl MachInst for Inst {
taken,
not_taken,
} => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
&Self::JmpTable {
ref targets_for_term,
..
} => MachTerminator::Indirect(&targets_for_term[..]),
// All other cases are boring.
_ => MachTerminator::None,
}
@@ -1231,6 +1273,10 @@ pub enum LabelUse {
/// location. Used for control flow instructions which consider an offset from the start of the
/// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
JmpRel32,
/// A 32-bit offset from location of relocation itself, added to the existing value at that
/// location.
PCRel32,
}
impl MachInstLabelUse for LabelUse {
@@ -1238,19 +1284,19 @@ impl MachInstLabelUse for LabelUse {
fn max_pos_range(self) -> CodeOffset {
match self {
LabelUse::JmpRel32 => 0x7fff_ffff,
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
}
}
fn max_neg_range(self) -> CodeOffset {
match self {
LabelUse::JmpRel32 => 0x8000_0000,
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
}
}
fn patch_size(self) -> CodeOffset {
match self {
LabelUse::JmpRel32 => 4,
LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
}
}
@@ -1265,24 +1311,29 @@ impl MachInstLabelUse for LabelUse {
let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
buffer.copy_from_slice(&value.to_le_bytes()[..]);
}
LabelUse::PCRel32 => {
let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
let value = pc_rel.wrapping_add(addend);
buffer.copy_from_slice(&value.to_le_bytes()[..]);
}
}
}
fn supports_veneer(self) -> bool {
match self {
LabelUse::JmpRel32 => false,
LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
}
}
fn veneer_size(self) -> CodeOffset {
match self {
LabelUse::JmpRel32 => 0,
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
}
}
fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
match self {
LabelUse::JmpRel32 => {
LabelUse::JmpRel32 | LabelUse::PCRel32 => {
panic!("Veneer not supported for JumpRel32 label-use.");
}
}

View File

@@ -5,6 +5,8 @@
use log::trace;
use regalloc::{Reg, RegClass, Writable};
use smallvec::SmallVec;
use alloc::vec::Vec;
use std::convert::TryFrom;
use crate::ir::types;
@@ -906,7 +908,85 @@ impl LowerBackend for X64Backend {
Opcode::Jump | Opcode::Fallthrough => {
ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
}
_ => panic!("Unknown branch type!"),
Opcode::BrTable => {
let jt_size = targets.len() - 1;
assert!(jt_size <= u32::max_value() as usize);
let jt_size = jt_size as u32;
let idx_size = ctx.input_ty(branches[0], 0).bits();
// Zero-extend to 32-bits if needed.
// TODO consider factoring this out?
let idx = if idx_size < 32 {
let ext_mode = match idx_size {
1 | 8 => ExtMode::BL,
16 => ExtMode::WL,
_ => unreachable!(),
};
let idx = input_to_reg_mem(
ctx,
InsnInput {
insn: branches[0],
input: 0,
},
);
let tmp_idx = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::movzx_rm_r(ext_mode, idx, tmp_idx));
tmp_idx.to_reg()
} else {
input_to_reg(
ctx,
InsnInput {
insn: branches[0],
input: 0,
},
)
};
// Bounds-check (compute flags from idx - jt_size) and branch to default.
ctx.emit(Inst::cmp_rmi_r(4, RegMemImm::imm(jt_size), idx));
let default_target = BranchTarget::Label(targets[0]);
ctx.emit(Inst::OneWayJmpCond {
dst: default_target,
cc: CC::NB, // unsigned >=
});
// Emit the compound instruction that does:
//
// lea $jt, %rA
// movsbl [%rA, %rIndex, 2], %rB
// add %rB, %rA
// j *%rA
// [jt entries]
//
// This must be *one* instruction in the vcode because we cannot allow regalloc
// to insert any spills/fills in the middle of the sequence; otherwise, the
// lea PC-rel offset to the jumptable would be incorrect. (The alternative
// is to introduce a relocation pass for inlined jumptables, which is much
// worse.)
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
let tmp2 = ctx.alloc_tmp(RegClass::I64, I32);
let jt_targets: Vec<BranchTarget> = targets
.iter()
.skip(1)
.map(|bix| BranchTarget::Label(*bix))
.collect();
let targets_for_term: Vec<MachLabel> = targets.to_vec();
ctx.emit(Inst::JmpTable {
idx,
tmp1,
tmp2,
targets: jt_targets,
targets_for_term,
});
}
_ => panic!("Unknown branch type {:?}", op),
}
}