From 2115e70acb6278452e639047fb57d3655ff78dcb Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 26 Jun 2020 18:05:51 +0200 Subject: [PATCH] machinst x64: implement enough to support branch tables; --- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 3 +- .../codegen/src/isa/aarch64/lower_inst.rs | 2 +- cranelift/codegen/src/isa/x64/inst/args.rs | 12 +++ cranelift/codegen/src/isa/x64/inst/emit.rs | 86 +++++++++++++++++++ .../codegen/src/isa/x64/inst/emit_tests.rs | 13 +++ cranelift/codegen/src/isa/x64/inst/mod.rs | 63 ++++++++++++-- cranelift/codegen/src/isa/x64/lower.rs | 82 +++++++++++++++++- 7 files changed, 251 insertions(+), 10 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 5c3086efce..33f0c1604c 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -875,8 +875,7 @@ pub enum Inst { data: u64, }, - /// Jump-table sequence, as one compound instruction (see note in lower.rs - /// for rationale). + /// Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale). JTSequence { info: Box, ridx: Reg, diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 5dc6030e13..ba8210b875 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2380,7 +2380,7 @@ pub(crate) fn lower_branch>( info: Box::new(JTSequenceInfo { targets: jt_targets, default_target, - targets_for_term: targets_for_term, + targets_for_term, }), }); } diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 1bcc18ee4f..a9580b216d 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -27,6 +27,10 @@ pub enum Amode { index: Reg, shift: u8, /* 0 .. 3 only */ }, + + /// sign-extend-32-to-64(Immediate) + RIP (instruction pointer). + /// To wit: not supported in 32-bits mode. + RipRelative { target: BranchTarget }, } impl Amode { @@ -47,6 +51,10 @@ impl Amode { } } + pub(crate) fn rip_relative(target: BranchTarget) -> Self { + Self::RipRelative { target } + } + /// Add the regs mentioned by `self` to `collector`. pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { match self { @@ -57,6 +65,9 @@ impl Amode { collector.add_use(*base); collector.add_use(*index); } + Amode::RipRelative { .. } => { + // RIP isn't involved in regalloc. + } } } } @@ -79,6 +90,7 @@ impl ShowWithRRU for Amode { index.show_rru(mb_rru), 1 << shift ), + Amode::RipRelative { ref target } => format!("{}(%rip)", target.show_rru(mb_rru)), } } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 1eb402b1c3..e2d3271af1 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -262,6 +262,36 @@ fn emit_std_enc_mem( panic!("ImmRegRegShift"); } } + + Amode::RipRelative { ref target } => { + // First, the REX byte, with REX.B = 0. + rex.emit_two_op(sink, enc_g, 0); + + // Now the opcode(s). These include any other prefixes the caller + // hands to us. + while num_opcodes > 0 { + num_opcodes -= 1; + sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); + } + + // RIP-relative is mod=00, rm=101. + sink.put1(encode_modrm(0, enc_g & 7, 0b101)); + + match *target { + BranchTarget::Label(label) => { + let offset = sink.cur_offset(); + sink.use_label_at_offset(offset, label, LabelUse::JmpRel32); + sink.put4(0); + } + BranchTarget::ResolvedOffset(offset) => { + assert!( + offset <= u32::max_value() as isize, + "rip-relative can't hold >= U32_MAX values" + ); + sink.put4(offset as u32); + } + } + } } } @@ -1185,6 +1215,62 @@ pub(crate) fn emit( } } + Inst::JmpTable { + idx, + tmp1, + tmp2, + ref targets, + .. + } => { + // This sequence is *one* instruction in the vcode, and is expanded only here at + // emission time, because we cannot allow the regalloc to insert spills/reloads in + // the middle; we depend on hardcoded PC-rel addressing below. + + // Save index in a tmp (the live range of ridx only goes to start of this + // sequence; rtmp1 or rtmp2 may overwrite it). + let inst = Inst::gen_move(*tmp2, *idx, I64); + inst.emit(sink, flags, state); + + // Load base address of jump table. + let start_of_jumptable = sink.get_label(); + let inst = Inst::lea( + Amode::rip_relative(BranchTarget::Label(start_of_jumptable)), + *tmp1, + ); + inst.emit(sink, flags, state); + + // Load value out of jump table. + let inst = Inst::movzx_rm_r( + ExtMode::LQ, + RegMem::mem(Amode::imm_reg_reg_shift(0, tmp1.to_reg(), tmp2.to_reg(), 2)), + *tmp2, + ); + inst.emit(sink, flags, state); + + // Add base of jump table to jump-table-sourced block offset. + let inst = Inst::alu_rmi_r( + true, /* is_64 */ + AluRmiROpcode::Add, + RegMemImm::reg(tmp2.to_reg()), + *tmp1, + ); + inst.emit(sink, flags, state); + + // Branch to computed address. + let inst = Inst::jmp_unknown(RegMem::reg(tmp1.to_reg())); + inst.emit(sink, flags, state); + + // Emit jump table (table of 32-bit offsets). + sink.bind_label(start_of_jumptable); + let jt_off = sink.cur_offset(); + for &target in targets.iter() { + let word_off = sink.cur_offset(); + let off_into_table = word_off - jt_off; + sink.use_label_at_offset(word_off, target.as_label().unwrap(), LabelUse::PCRel32); + sink.put4(off_into_table); + } + } + Inst::XMM_Mov_RM_R { op, src: src_e, diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 614efaa1b7..cbdb80aae2 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -1529,6 +1529,19 @@ fn test_x64_emit() { "4F8D840AB3000000", "lea 179(%r10,%r9,1), %r8", )); + insns.push(( + Inst::lea(Amode::rip_relative(BranchTarget::ResolvedOffset(0)), w_rdi), + "488D3D00000000", + "lea (offset 0)(%rip), %rdi", + )); + insns.push(( + Inst::lea( + Amode::rip_relative(BranchTarget::ResolvedOffset(1337)), + w_r15, + ), + "4C8D3D39050000", + "lea (offset 1337)(%rip), %r15", + )); // ======================================================== // MovSX_RM_R diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 3791c573e0..2cc2574540 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -228,6 +228,15 @@ pub enum Inst { /// straight-line sequences in code to be emitted. OneWayJmpCond { cc: CC, dst: BranchTarget }, + /// Jump-table sequence, as one compound instruction (see note in lower.rs for rationale). + JmpTable { + idx: Reg, + tmp1: Writable, + tmp2: Writable, + targets: Vec, + targets_for_term: Vec, + }, + /// Indirect jump: jmpq (reg mem). JmpUnknown { target: RegMem }, @@ -726,6 +735,10 @@ impl ShowWithRRU for Inst { ljustify2("j".to_string(), cc.to_string()), dst.show_rru(mb_rru), ), + Inst::JmpTable { idx, .. } => { + format!("{} {}", ljustify("br_table".into()), idx.show_rru(mb_rru)) + } + // Inst::JmpUnknown { target } => format!( "{} *{}", ljustify("jmp".to_string()), @@ -858,6 +871,17 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { dest.get_regs_as_uses(collector); } + Inst::JmpTable { + ref idx, + ref tmp1, + ref tmp2, + .. + } => { + collector.add_use(*idx); + collector.add_def(*tmp1); + collector.add_def(*tmp2); + } + Inst::Ret | Inst::EpiloguePlaceholder | Inst::JmpKnown { .. } @@ -913,6 +937,9 @@ impl Amode { map_use(map, base); map_use(map, index); } + Amode::RipRelative { .. } => { + // RIP isn't involved in regalloc. + } } } } @@ -1077,6 +1104,17 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { dest.map_uses(mapper); } + Inst::JmpTable { + ref mut idx, + ref mut tmp1, + ref mut tmp2, + .. + } => { + map_use(mapper, idx); + map_def(mapper, tmp1); + map_def(mapper, tmp2); + } + Inst::Ret | Inst::EpiloguePlaceholder | Inst::JmpKnown { .. } @@ -1144,6 +1182,10 @@ impl MachInst for Inst { taken, not_taken, } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()), + &Self::JmpTable { + ref targets_for_term, + .. + } => MachTerminator::Indirect(&targets_for_term[..]), // All other cases are boring. _ => MachTerminator::None, } @@ -1231,6 +1273,10 @@ pub enum LabelUse { /// location. Used for control flow instructions which consider an offset from the start of the /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload). JmpRel32, + + /// A 32-bit offset from location of relocation itself, added to the existing value at that + /// location. + PCRel32, } impl MachInstLabelUse for LabelUse { @@ -1238,19 +1284,19 @@ impl MachInstLabelUse for LabelUse { fn max_pos_range(self) -> CodeOffset { match self { - LabelUse::JmpRel32 => 0x7fff_ffff, + LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff, } } fn max_neg_range(self) -> CodeOffset { match self { - LabelUse::JmpRel32 => 0x8000_0000, + LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000, } } fn patch_size(self) -> CodeOffset { match self { - LabelUse::JmpRel32 => 4, + LabelUse::JmpRel32 | LabelUse::PCRel32 => 4, } } @@ -1265,24 +1311,29 @@ impl MachInstLabelUse for LabelUse { let value = pc_rel.wrapping_add(addend).wrapping_sub(4); buffer.copy_from_slice(&value.to_le_bytes()[..]); } + LabelUse::PCRel32 => { + let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); + let value = pc_rel.wrapping_add(addend); + buffer.copy_from_slice(&value.to_le_bytes()[..]); + } } } fn supports_veneer(self) -> bool { match self { - LabelUse::JmpRel32 => false, + LabelUse::JmpRel32 | LabelUse::PCRel32 => false, } } fn veneer_size(self) -> CodeOffset { match self { - LabelUse::JmpRel32 => 0, + LabelUse::JmpRel32 | LabelUse::PCRel32 => 0, } } fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) { match self { - LabelUse::JmpRel32 => { + LabelUse::JmpRel32 | LabelUse::PCRel32 => { panic!("Veneer not supported for JumpRel32 label-use."); } } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index c29f0ebbdd..30bd611153 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -5,6 +5,8 @@ use log::trace; use regalloc::{Reg, RegClass, Writable}; use smallvec::SmallVec; + +use alloc::vec::Vec; use std::convert::TryFrom; use crate::ir::types; @@ -906,7 +908,85 @@ impl LowerBackend for X64Backend { Opcode::Jump | Opcode::Fallthrough => { ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0]))); } - _ => panic!("Unknown branch type!"), + + Opcode::BrTable => { + let jt_size = targets.len() - 1; + assert!(jt_size <= u32::max_value() as usize); + let jt_size = jt_size as u32; + + let idx_size = ctx.input_ty(branches[0], 0).bits(); + + // Zero-extend to 32-bits if needed. + // TODO consider factoring this out? + let idx = if idx_size < 32 { + let ext_mode = match idx_size { + 1 | 8 => ExtMode::BL, + 16 => ExtMode::WL, + _ => unreachable!(), + }; + let idx = input_to_reg_mem( + ctx, + InsnInput { + insn: branches[0], + input: 0, + }, + ); + let tmp_idx = ctx.alloc_tmp(RegClass::I64, I32); + ctx.emit(Inst::movzx_rm_r(ext_mode, idx, tmp_idx)); + tmp_idx.to_reg() + } else { + input_to_reg( + ctx, + InsnInput { + insn: branches[0], + input: 0, + }, + ) + }; + + // Bounds-check (compute flags from idx - jt_size) and branch to default. + ctx.emit(Inst::cmp_rmi_r(4, RegMemImm::imm(jt_size), idx)); + + let default_target = BranchTarget::Label(targets[0]); + ctx.emit(Inst::OneWayJmpCond { + dst: default_target, + cc: CC::NB, // unsigned >= + }); + + // Emit the compound instruction that does: + // + // lea $jt, %rA + // movsbl [%rA, %rIndex, 2], %rB + // add %rB, %rA + // j *%rA + // [jt entries] + // + // This must be *one* instruction in the vcode because we cannot allow regalloc + // to insert any spills/fills in the middle of the sequence; otherwise, the + // lea PC-rel offset to the jumptable would be incorrect. (The alternative + // is to introduce a relocation pass for inlined jumptables, which is much + // worse.) + + let tmp1 = ctx.alloc_tmp(RegClass::I64, I32); + let tmp2 = ctx.alloc_tmp(RegClass::I64, I32); + + let jt_targets: Vec = targets + .iter() + .skip(1) + .map(|bix| BranchTarget::Label(*bix)) + .collect(); + + let targets_for_term: Vec = targets.to_vec(); + ctx.emit(Inst::JmpTable { + idx, + tmp1, + tmp2, + targets: jt_targets, + targets_for_term, + }); + } + + _ => panic!("Unknown branch type {:?}", op), } }