From 293bb5b3348bdb58ee3a9f23fa25c7075e77d046 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Tue, 6 Dec 2022 10:54:10 -0800 Subject: [PATCH] riscv64: Only emit jumps at the end of basic blocks (#5381) This PR fixes two bugs in the riscv64 backend, where branch instructions were emitted in the middle of a basic block: Constant emission, where the constants are inlined into the vcode and are jumped over at runtime, The BrTableCheck pseudo-instruction, which was always emitted before a BrTable instruction, and would handle jumping to the default label. The first bug was resolved by introducing two new psuedo instructions, LoadConst32 and LoadConst64. Both of these instructions serve to delay the original encoding to emission time, after regalloc2 has run. The second bug was fixed by removing the BrTableCheck instruction. As it was always emitted directly before BrTable, it was easier to remove it and merge the two into a single instruction. --- cranelift/codegen/src/isa/riscv64/inst.isle | 14 ++-- .../codegen/src/isa/riscv64/inst/emit.rs | 38 +++++---- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 52 +++++++----- .../codegen/src/isa/riscv64/lower/isle.rs | 16 ++-- .../filetests/isa/riscv64/constants.clif | 83 +++++-------------- 5 files changed, 88 insertions(+), 115 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 439775bec8..a33575d0a0 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -10,6 +10,14 @@ (rd WritableReg) (imm Imm20)) + (LoadConst32 + (rd WritableReg) + (imm u32)) + + (LoadConst64 + (rd WritableReg) + (imm u64)) + (Auipc (rd WritableReg) (imm Imm20)) @@ -209,11 +217,7 @@ (rd WritableReg) (op ReferenceCheckOP) (x Reg)) - - (BrTableCheck - (index Reg) - (targets_len i32) - (default_ BranchTarget)) + (BrTable (index Reg) (tmp1 WritableReg) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index b27916be2e..fb7295d1be 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -635,6 +635,20 @@ impl MachInstEmit for Inst { let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12); sink.put4(x); } + &Inst::LoadConst32 { rd, imm } => { + let rd = allocs.next_writable(rd); + LoadConstant::U32(imm) + .load_constant(rd, &mut |_| rd) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + } + &Inst::LoadConst64 { rd, imm } => { + let rd = allocs.next_writable(rd); + LoadConstant::U64(imm) + .load_constant(rd, &mut |_| rd) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + } &Inst::FpuRR { frm, alu_op, @@ -1109,14 +1123,15 @@ impl MachInstEmit for Inst { } } } - &Inst::BrTableCheck { + &Inst::BrTable { index, - targets_len, - default_, + tmp1, + ref targets, } => { let index = allocs.next(index); - // load - Inst::load_constant_u32(writable_spilltmp_reg(), targets_len as u64, &mut |_| { + let tmp1 = allocs.next_writable(tmp1); + + Inst::load_constant_u32(writable_spilltmp_reg(), targets.len() as u64, &mut |_| { writable_spilltmp_reg() }) .iter() @@ -1133,20 +1148,13 @@ impl MachInstEmit for Inst { .emit(&[], sink, emit_info, state); sink.use_label_at_offset( sink.cur_offset(), - default_.as_label().unwrap(), + targets[0].as_label().unwrap(), LabelUse::PCRel32, ); Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0) .iter() .for_each(|i| i.emit(&[], sink, emit_info, state)); - } - &Inst::BrTable { - index, - tmp1, - ref targets, - } => { - let index = allocs.next(index); - let tmp1 = allocs.next_writable(tmp1); + let mut insts = SmallInstVec::new(); // get current pc. insts.push(Inst::Auipc { @@ -1175,7 +1183,7 @@ impl MachInstEmit for Inst { // here is all the jumps. let mut need_label_use = vec![]; - for t in targets { + for t in targets.iter().skip(1) { need_label_use.push((insts.len(), t.clone())); insts.extend(Inst::construct_auipc_and_jalr( None, diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index c3b53bd567..aa66278187 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -17,7 +17,7 @@ pub use crate::ir::condcodes::FloatCC; use alloc::vec::Vec; use regalloc2::{PRegSet, VReg}; -use smallvec::SmallVec; +use smallvec::{smallvec, SmallVec}; use std::boxed::Box; use std::string::{String, ToString}; @@ -235,7 +235,12 @@ impl Inst { alloc_tmp: &mut F, ) -> SmallInstVec { let insts = Inst::load_const_imm(rd, value, alloc_tmp); - insts.unwrap_or(LoadConstant::U32(value as u32).load_constant(rd, alloc_tmp)) + insts.unwrap_or_else(|| { + smallvec![Inst::LoadConst32 { + rd, + imm: value as u32 + }] + }) } pub fn load_constant_u64 Writable>( @@ -244,7 +249,7 @@ impl Inst { alloc_tmp: &mut F, ) -> SmallInstVec { let insts = Inst::load_const_imm(rd, value, alloc_tmp); - insts.unwrap_or(LoadConstant::U64(value).load_constant(rd, alloc_tmp)) + insts.unwrap_or_else(|| smallvec![Inst::LoadConst64 { rd, imm: value }]) } pub(crate) fn construct_auipc_and_jalr( @@ -337,11 +342,10 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_use(index); collector.reg_early_def(tmp1); } - &Inst::BrTableCheck { index, .. } => { - collector.reg_use(index); - } &Inst::Auipc { rd, .. } => collector.reg_def(rd), &Inst::Lui { rd, .. } => collector.reg_def(rd), + &Inst::LoadConst32 { rd, .. } => collector.reg_def(rd), + &Inst::LoadConst64 { rd, .. } => collector.reg_def(rd), &Inst::AluRRR { rd, rs1, rs2, .. } => { collector.reg_use(rs1); collector.reg_use(rs2); @@ -695,9 +699,7 @@ impl MachInst for Inst { &Inst::CondBr { .. } => MachTerminator::Cond, &Inst::Jalr { .. } => MachTerminator::Uncond, &Inst::Ret { .. } => MachTerminator::Ret, - // BrTableCheck is a check before BrTable - // can lead transfer to default_. - &Inst::BrTable { .. } | &Inst::BrTableCheck { .. } => MachTerminator::Indirect, + &Inst::BrTable { .. } => MachTerminator::Indirect, _ => MachTerminator::None, } } @@ -1202,17 +1204,6 @@ impl Inst { let dst = format_regs(&dst[..], allocs); format!("{} {},{},{}##ty={}", op.op_name(), dst, x, y, ty,) } - &Inst::BrTableCheck { - index, - targets_len, - default_, - } => { - let index = format_reg(index, allocs); - format!( - "br_table_check {}##targets_len={} default_={}", - index, targets_len, default_ - ) - } &Inst::BrTable { index, tmp1, @@ -1249,7 +1240,28 @@ impl Inst { &Inst::Lui { rd, ref imm } => { format!("{} {},{}", "lui", format_reg(rd.to_reg(), allocs), imm.bits) } + &Inst::LoadConst32 { rd, imm } => { + use std::fmt::Write; + let rd = format_reg(rd.to_reg(), allocs); + let mut buf = String::new(); + write!(&mut buf, "auipc {},0; ", rd).unwrap(); + write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap(); + write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 4).unwrap(); + write!(&mut buf, ".4byte 0x{:x}", imm).unwrap(); + buf + } + &Inst::LoadConst64 { rd, imm } => { + use std::fmt::Write; + + let rd = format_reg(rd.to_reg(), allocs); + let mut buf = String::new(); + write!(&mut buf, "auipc {},0; ", rd).unwrap(); + write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap(); + write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 8).unwrap(); + write!(&mut buf, ".8byte 0x{:x}", imm).unwrap(); + buf + } &Inst::AluRRR { alu_op, rd, diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 2ee3732577..6b89815f29 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -430,21 +430,15 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> } } fn lower_br_table(&mut self, index: Reg, targets: &VecMachLabel) -> InstOutput { - let tmp = self.temp_writable_reg(I64); - let default_ = BranchTarget::Label(targets[0]); + let tmp1 = self.temp_writable_reg(I64); let targets: Vec = targets - .iter() - .skip(1) - .map(|bix| BranchTarget::Label(*bix)) + .into_iter() + .copied() + .map(BranchTarget::Label) .collect(); - self.emit(&MInst::BrTableCheck { - index, - targets_len: targets.len() as i32, - default_, - }); self.emit(&MInst::BrTable { index, - tmp1: tmp, + tmp1, targets, }); InstOutput::default() diff --git a/cranelift/filetests/filetests/isa/riscv64/constants.clif b/cranelift/filetests/filetests/isa/riscv64/constants.clif index 1b4af0bdf3..43190c6c7e 100644 --- a/cranelift/filetests/filetests/isa/riscv64/constants.clif +++ b/cranelift/filetests/filetests/isa/riscv64/constants.clif @@ -50,10 +50,7 @@ block0: } ; block0: -; auipc t1,0 -; ld a0,12(t1) -; j 12 -; .8byte 0xffff0000 +; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff0000 ; ret function %f() -> i64 { @@ -63,10 +60,7 @@ block0: } ; block0: -; auipc t1,0 -; ld a0,12(t1) -; j 12 -; .8byte 0xffff00000000 +; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff00000000 ; ret function %f() -> i64 { @@ -76,10 +70,7 @@ block0: } ; block0: -; auipc t1,0 -; ld a0,12(t1) -; j 12 -; .8byte 0xffff000000000000 +; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff000000000000 ; ret function %f() -> i64 { @@ -109,10 +100,7 @@ block0: } ; block0: -; auipc t1,0 -; ld a0,12(t1) -; j 12 -; .8byte 0xffffffff0000ffff +; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffffffff0000ffff ; ret function %f() -> i64 { @@ -122,10 +110,7 @@ block0: } ; block0: -; auipc t1,0 -; ld a0,12(t1) -; j 12 -; .8byte 0xffff0000ffffffff +; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff0000ffffffff ; ret function %f() -> i64 { @@ -135,10 +120,7 @@ block0: } ; block0: -; auipc t1,0 -; ld a0,12(t1) -; j 12 -; .8byte 0xffffffffffff +; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffffffffffff ; ret function %f() -> i64 { @@ -148,10 +130,7 @@ block0: } ; block0: -; auipc t1,0 -; ld a0,12(t1) -; j 12 -; .8byte 0xf34bf0a31212003a +; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xf34bf0a31212003a ; ret function %f() -> i64 { @@ -161,10 +140,7 @@ block0: } ; block0: -; auipc t1,0 -; ld a0,12(t1) -; j 12 -; .8byte 0x12e900001ef40000 +; auipc a0,0; ld a0,12(a0); j 12; .8byte 0x12e900001ef40000 ; ret function %f() -> i64 { @@ -174,10 +150,7 @@ block0: } ; block0: -; auipc t1,0 -; ld a0,12(t1) -; j 12 -; .8byte 0x12e9ffff1ef4ffff +; auipc a0,0; ld a0,12(a0); j 12; .8byte 0x12e9ffff1ef4ffff ; ret function %f() -> i32 { @@ -197,10 +170,7 @@ block0: } ; block0: -; auipc t1,0 -; ld a0,12(t1) -; j 12 -; .8byte 0xfffffff7 +; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xfffffff7 ; ret function %f() -> i64 { @@ -210,10 +180,7 @@ block0: } ; block0: -; auipc t1,0 -; ld a0,12(t1) -; j 12 -; .8byte 0xfffffff7 +; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xfffffff7 ; ret function %f() -> i64 { @@ -233,11 +200,8 @@ block0: } ; block0: -; auipc t2,0 -; ld t2,12(t2) -; j 12 -; .8byte 0x3ff0000000000000 -; fmv.d.x fa0,t2 +; auipc t1,0; ld t1,12(t1); j 12; .8byte 0x3ff0000000000000 +; fmv.d.x fa0,t1 ; ret function %f() -> f32 { @@ -258,11 +222,8 @@ block0: } ; block0: -; auipc t2,0 -; ld t2,12(t2) -; j 12 -; .8byte 0x4049000000000000 -; fmv.d.x fa0,t2 +; auipc t1,0; ld t1,12(t1); j 12; .8byte 0x4049000000000000 +; fmv.d.x fa0,t1 ; ret function %f() -> f32 { @@ -305,11 +266,8 @@ block0: } ; block0: -; auipc t2,0 -; ld t2,12(t2) -; j 12 -; .8byte 0xc030000000000000 -; fmv.d.x fa0,t2 +; auipc t1,0; ld t1,12(t1); j 12; .8byte 0xc030000000000000 +; fmv.d.x fa0,t1 ; ret function %f() -> f32 { @@ -319,10 +277,7 @@ block0: } ; block0: -; auipc t2,0 -; lwu t2,12(t2) -; j 8 -; .4byte 0xc1800000 -; fmv.w.x fa0,t2 +; auipc t1,0; ld t1,12(t1); j 8; .4byte 0xc1800000 +; fmv.w.x fa0,t1 ; ret