Merge pull request #1494 from cfallin/arm64-merge
Add new `MachInst` backend and ARM64 support.
This commit is contained in:
18
Cargo.lock
generated
18
Cargo.lock
generated
@@ -379,6 +379,7 @@ dependencies = [
|
||||
"gimli",
|
||||
"hashbrown 0.7.1",
|
||||
"log",
|
||||
"regalloc",
|
||||
"serde",
|
||||
"smallvec",
|
||||
"target-lexicon",
|
||||
@@ -432,6 +433,7 @@ dependencies = [
|
||||
"memmap",
|
||||
"num_cpus",
|
||||
"region",
|
||||
"target-lexicon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1589,6 +1591,16 @@ dependencies = [
|
||||
"rust-argon2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regalloc"
|
||||
version = "0.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89ce0cd835fa6e91bbf5d010beee19d0c2e97e4ad5e13c399a31122cfc83bdd6"
|
||||
dependencies = [
|
||||
"log",
|
||||
"rustc-hash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.3.6"
|
||||
@@ -1653,6 +1665,12 @@ version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.2.3"
|
||||
|
||||
@@ -24,6 +24,7 @@ gimli = { version = "0.20.0", default-features = false, features = ["write"], op
|
||||
smallvec = { version = "1.0.0" }
|
||||
thiserror = "1.0.4"
|
||||
byteorder = { version = "1.3.2", default-features = false }
|
||||
regalloc = "0.0.17"
|
||||
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
|
||||
# Please don't add any unless they are essential to the task of creating binary
|
||||
# machine code. Integration tests that need external dependencies can be
|
||||
|
||||
@@ -54,7 +54,9 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
||||
let mut a64 = CpuMode::new("A64");
|
||||
|
||||
// TODO refine these.
|
||||
let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
|
||||
let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags");
|
||||
a64.legalize_monomorphic(expand_flags);
|
||||
a64.legalize_default(narrow_flags);
|
||||
|
||||
let cpu_modes = vec![a64];
|
||||
|
||||
@@ -54,7 +54,9 @@ pub enum Reloc {
|
||||
X86GOTPCRel4,
|
||||
/// Arm32 call target
|
||||
Arm32Call,
|
||||
/// Arm64 call target
|
||||
/// Arm64 call target. Encoded as bottom 26 bits of instruction. This
|
||||
/// value is sign-extended, multiplied by 4, and added to the PC of
|
||||
/// the call instruction to form the destination address.
|
||||
Arm64Call,
|
||||
/// RISC-V call target
|
||||
RiscvCall,
|
||||
|
||||
@@ -19,8 +19,10 @@ use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::legalize_function;
|
||||
use crate::legalizer::simple_legalize;
|
||||
use crate::licm::do_licm;
|
||||
use crate::loop_analysis::LoopAnalysis;
|
||||
use crate::machinst::MachCompileResult;
|
||||
use crate::nan_canonicalization::do_nan_canonicalization;
|
||||
use crate::postopt::do_postopt;
|
||||
use crate::redundant_reload_remover::RedundantReloadRemover;
|
||||
@@ -55,6 +57,12 @@ pub struct Context {
|
||||
|
||||
/// Redundant-reload remover context.
|
||||
pub redundant_reload_remover: RedundantReloadRemover,
|
||||
|
||||
/// Result of MachBackend compilation, if computed.
|
||||
pub mach_compile_result: Option<MachCompileResult>,
|
||||
|
||||
/// Flag: do we want a disassembly with the MachCompileResult?
|
||||
pub want_disasm: bool,
|
||||
}
|
||||
|
||||
impl Context {
|
||||
@@ -78,6 +86,8 @@ impl Context {
|
||||
regalloc: regalloc::Context::new(),
|
||||
loop_analysis: LoopAnalysis::new(),
|
||||
redundant_reload_remover: RedundantReloadRemover::new(),
|
||||
mach_compile_result: None,
|
||||
want_disasm: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,6 +99,14 @@ impl Context {
|
||||
self.regalloc.clear();
|
||||
self.loop_analysis.clear();
|
||||
self.redundant_reload_remover.clear();
|
||||
self.mach_compile_result = None;
|
||||
self.want_disasm = false;
|
||||
}
|
||||
|
||||
/// Set the flag to request a disassembly when compiling with a
|
||||
/// `MachBackend` backend.
|
||||
pub fn set_disasm(&mut self, val: bool) {
|
||||
self.want_disasm = val;
|
||||
}
|
||||
|
||||
/// Compile the function, and emit machine code into a `Vec<u8>`.
|
||||
@@ -130,9 +148,13 @@ impl Context {
|
||||
pub fn compile(&mut self, isa: &dyn TargetIsa) -> CodegenResult<CodeInfo> {
|
||||
let _tt = timing::compile();
|
||||
self.verify_if(isa)?;
|
||||
debug!("Compiling:\n{}", self.func.display(isa));
|
||||
|
||||
let opt_level = isa.flags().opt_level();
|
||||
debug!(
|
||||
"Compiling (opt level {:?}):\n{}",
|
||||
opt_level,
|
||||
self.func.display(isa)
|
||||
);
|
||||
|
||||
self.compute_cfg();
|
||||
if opt_level != OptLevel::None {
|
||||
@@ -141,6 +163,7 @@ impl Context {
|
||||
if isa.flags().enable_nan_canonicalization() {
|
||||
self.canonicalize_nans(isa)?;
|
||||
}
|
||||
|
||||
self.legalize(isa)?;
|
||||
if opt_level != OptLevel::None {
|
||||
self.postopt(isa)?;
|
||||
@@ -149,23 +172,32 @@ impl Context {
|
||||
self.licm(isa)?;
|
||||
self.simple_gvn(isa)?;
|
||||
}
|
||||
|
||||
self.compute_domtree();
|
||||
self.eliminate_unreachable_code(isa)?;
|
||||
if opt_level != OptLevel::None {
|
||||
self.dce(isa)?;
|
||||
}
|
||||
self.regalloc(isa)?;
|
||||
self.prologue_epilogue(isa)?;
|
||||
if opt_level == OptLevel::Speed || opt_level == OptLevel::SpeedAndSize {
|
||||
self.redundant_reload_remover(isa)?;
|
||||
}
|
||||
if opt_level == OptLevel::SpeedAndSize {
|
||||
self.shrink_instructions(isa)?;
|
||||
}
|
||||
let result = self.relax_branches(isa);
|
||||
|
||||
debug!("Compiled:\n{}", self.func.display(isa));
|
||||
result
|
||||
if let Some(backend) = isa.get_mach_backend() {
|
||||
let result = backend.compile_function(&mut self.func, self.want_disasm)?;
|
||||
let info = result.code_info();
|
||||
self.mach_compile_result = Some(result);
|
||||
Ok(info)
|
||||
} else {
|
||||
self.regalloc(isa)?;
|
||||
self.prologue_epilogue(isa)?;
|
||||
if opt_level == OptLevel::Speed || opt_level == OptLevel::SpeedAndSize {
|
||||
self.redundant_reload_remover(isa)?;
|
||||
}
|
||||
if opt_level == OptLevel::SpeedAndSize {
|
||||
self.shrink_instructions(isa)?;
|
||||
}
|
||||
let result = self.relax_branches(isa);
|
||||
|
||||
debug!("Compiled:\n{}", self.func.display(isa));
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit machine code directly into raw memory.
|
||||
@@ -191,7 +223,11 @@ impl Context {
|
||||
) -> CodeInfo {
|
||||
let _tt = timing::binemit();
|
||||
let mut sink = MemoryCodeSink::new(mem, relocs, traps, stackmaps);
|
||||
isa.emit_function_to_memory(&self.func, &mut sink);
|
||||
if let Some(ref result) = &self.mach_compile_result {
|
||||
result.sections.emit(&mut sink);
|
||||
} else {
|
||||
isa.emit_function_to_memory(&self.func, &mut sink);
|
||||
}
|
||||
sink.info
|
||||
}
|
||||
|
||||
@@ -279,9 +315,15 @@ impl Context {
|
||||
// TODO: Avoid doing this when legalization doesn't actually mutate the CFG.
|
||||
self.domtree.clear();
|
||||
self.loop_analysis.clear();
|
||||
legalize_function(&mut self.func, &mut self.cfg, isa);
|
||||
debug!("Legalized:\n{}", self.func.display(isa));
|
||||
self.verify_if(isa)
|
||||
if isa.get_mach_backend().is_some() {
|
||||
// Run some specific legalizations only.
|
||||
simple_legalize(&mut self.func, &mut self.cfg, isa);
|
||||
self.verify_if(isa)
|
||||
} else {
|
||||
legalize_function(&mut self.func, &mut self.cfg, isa);
|
||||
debug!("Legalized:\n{}", self.func.display(isa));
|
||||
self.verify_if(isa)
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform post-legalization rewrites on the function.
|
||||
|
||||
@@ -6,40 +6,10 @@
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::entity::EntityRef;
|
||||
use crate::ir::instructions::InstructionData;
|
||||
use crate::ir::{DataFlowGraph, Function, Inst, Opcode};
|
||||
use crate::inst_predicates::{any_inst_results_used, has_side_effect};
|
||||
use crate::ir::Function;
|
||||
use crate::timing;
|
||||
|
||||
/// Test whether the given opcode is unsafe to even consider for DCE.
|
||||
fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
|
||||
opcode.is_call()
|
||||
|| opcode.is_branch()
|
||||
|| opcode.is_terminator()
|
||||
|| opcode.is_return()
|
||||
|| opcode.can_trap()
|
||||
|| opcode.other_side_effects()
|
||||
|| opcode.can_store()
|
||||
}
|
||||
|
||||
/// Preserve instructions with used result values.
|
||||
fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
|
||||
dfg.inst_results(inst).iter().any(|v| live[v.index()])
|
||||
}
|
||||
|
||||
/// Load instructions without the `notrap` flag are defined to trap when
|
||||
/// operating on inaccessible memory, so we can't DCE them even if the
|
||||
/// loaded value is unused.
|
||||
fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
|
||||
if !opcode.can_load() {
|
||||
return false;
|
||||
}
|
||||
match *data {
|
||||
InstructionData::StackLoad { .. } => false,
|
||||
InstructionData::Load { flags, .. } => !flags.notrap(),
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform DCE on `func`.
|
||||
pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
|
||||
let _tt = timing::dce();
|
||||
@@ -50,10 +20,7 @@ pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
|
||||
let mut pos = FuncCursor::new(func).at_bottom(block);
|
||||
while let Some(inst) = pos.prev_inst() {
|
||||
{
|
||||
let data = &pos.func.dfg[inst];
|
||||
let opcode = data.opcode();
|
||||
if trivially_unsafe_for_dce(opcode)
|
||||
|| is_load_with_defined_trapping(opcode, &data)
|
||||
if has_side_effect(pos.func, inst)
|
||||
|| any_inst_results_used(inst, &live, &pos.func.dfg)
|
||||
{
|
||||
for arg in pos.func.dfg.inst_args(inst) {
|
||||
|
||||
42
cranelift/codegen/src/inst_predicates.rs
Normal file
42
cranelift/codegen/src/inst_predicates.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
//! Instruction predicates/properties, shared by various analyses.
|
||||
|
||||
use crate::ir::{DataFlowGraph, Function, Inst, InstructionData, Opcode};
|
||||
use cranelift_entity::EntityRef;
|
||||
|
||||
/// Preserve instructions with used result values.
|
||||
pub fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
|
||||
dfg.inst_results(inst).iter().any(|v| live[v.index()])
|
||||
}
|
||||
|
||||
/// Test whether the given opcode is unsafe to even consider as side-effect-free.
|
||||
fn trivially_has_side_effects(opcode: Opcode) -> bool {
|
||||
opcode.is_call()
|
||||
|| opcode.is_branch()
|
||||
|| opcode.is_terminator()
|
||||
|| opcode.is_return()
|
||||
|| opcode.can_trap()
|
||||
|| opcode.other_side_effects()
|
||||
|| opcode.can_store()
|
||||
}
|
||||
|
||||
/// Load instructions without the `notrap` flag are defined to trap when
|
||||
/// operating on inaccessible memory, so we can't treat them as side-effect-free even if the loaded
|
||||
/// value is unused.
|
||||
fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
|
||||
if !opcode.can_load() {
|
||||
return false;
|
||||
}
|
||||
match *data {
|
||||
InstructionData::StackLoad { .. } => false,
|
||||
InstructionData::Load { flags, .. } => !flags.notrap(),
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Does the given instruction have any side-effect that would preclude it from being removed when
|
||||
/// its value is unused?
|
||||
pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
|
||||
let data = &func.dfg[inst];
|
||||
let opcode = data.opcode();
|
||||
trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data)
|
||||
}
|
||||
@@ -238,13 +238,21 @@ impl Function {
|
||||
|
||||
/// Wrapper around `encode` which assigns `inst` the resulting encoding.
|
||||
pub fn update_encoding(&mut self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result<(), Legalize> {
|
||||
self.encode(inst, isa).map(|e| self.encodings[inst] = e)
|
||||
if isa.get_mach_backend().is_some() {
|
||||
Ok(())
|
||||
} else {
|
||||
self.encode(inst, isa).map(|e| self.encodings[inst] = e)
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper around `TargetIsa::encode` for encoding an existing instruction
|
||||
/// in the `Function`.
|
||||
pub fn encode(&self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result<Encoding, Legalize> {
|
||||
isa.encode(&self, &self.dfg[inst], self.dfg.ctrl_typevar(inst))
|
||||
if isa.get_mach_backend().is_some() {
|
||||
Ok(Encoding::new(0, 0))
|
||||
} else {
|
||||
isa.encode(&self, &self.dfg[inst], self.dfg.ctrl_typevar(inst))
|
||||
}
|
||||
}
|
||||
|
||||
/// Starts collection of debug information.
|
||||
|
||||
@@ -57,6 +57,11 @@ impl Imm64 {
|
||||
pub fn wrapping_neg(self) -> Self {
|
||||
Self(self.0.wrapping_neg())
|
||||
}
|
||||
|
||||
/// Return bits of this immediate.
|
||||
pub fn bits(&self) -> i64 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<i64> for Imm64 {
|
||||
|
||||
885
cranelift/codegen/src/isa/aarch64/abi.rs
Normal file
885
cranelift/codegen/src/isa/aarch64/abi.rs
Normal file
@@ -0,0 +1,885 @@
|
||||
//! Implementation of the standard AArch64 ABI.
|
||||
|
||||
use crate::ir;
|
||||
use crate::ir::types;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::StackSlot;
|
||||
use crate::isa;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
|
||||
|
||||
use log::debug;
|
||||
|
||||
/// A location for an argument or return value.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
enum ABIArg {
|
||||
/// In a real register.
|
||||
Reg(RealReg, ir::Type),
|
||||
/// Arguments only: on stack, at given offset from SP at entry.
|
||||
Stack(i64, ir::Type),
|
||||
}
|
||||
|
||||
/// AArch64 ABI information shared between body (callee) and caller.
|
||||
struct ABISig {
|
||||
args: Vec<ABIArg>,
|
||||
rets: Vec<ABIArg>,
|
||||
stack_arg_space: i64,
|
||||
call_conv: isa::CallConv,
|
||||
}
|
||||
|
||||
// Spidermonkey specific ABI convention.
|
||||
|
||||
/// This is SpiderMonkey's `WasmTableCallSigReg`.
|
||||
static BALDRDASH_SIG_REG: u8 = 10;
|
||||
|
||||
/// This is SpiderMonkey's `WasmTlsReg`.
|
||||
static BALDRDASH_TLS_REG: u8 = 23;
|
||||
|
||||
// These two lists represent the registers the JIT may *not* use at any point in generated code.
|
||||
//
|
||||
// So these are callee-preserved from the JIT's point of view, and every register not in this list
|
||||
// has to be caller-preserved by definition.
|
||||
//
|
||||
// Keep these lists in sync with the NonAllocatableMask set in Spidermonkey's
|
||||
// Architecture-arm64.cpp.
|
||||
|
||||
// Indexed by physical register number.
|
||||
#[rustfmt::skip]
|
||||
static BALDRDASH_JIT_CALLEE_SAVED_GPR: &[bool] = &[
|
||||
/* 0 = */ false, false, false, false, false, false, false, false,
|
||||
/* 8 = */ false, false, false, false, false, false, false, false,
|
||||
/* 16 = */ true /* x16 / ip1 */, true /* x17 / ip2 */, true /* x18 / TLS */, false,
|
||||
/* 20 = */ false, false, false, false,
|
||||
/* 24 = */ false, false, false, false,
|
||||
// There should be 28, the pseudo stack pointer in this list, however the wasm stubs trash it
|
||||
// gladly right now.
|
||||
/* 28 = */ false, false, true /* x30 = FP */, true /* x31 = SP */
|
||||
];
|
||||
|
||||
#[rustfmt::skip]
|
||||
static BALDRDASH_JIT_CALLEE_SAVED_FPU: &[bool] = &[
|
||||
/* 0 = */ false, false, false, false, false, false, false, false,
|
||||
/* 8 = */ false, false, false, false, false, false, false, false,
|
||||
/* 16 = */ false, false, false, false, false, false, false, false,
|
||||
/* 24 = */ false, false, false, false, false, false, false, true /* v31 / d31 */
|
||||
];
|
||||
|
||||
/// Try to fill a Baldrdash register, returning it if it was found.
|
||||
fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
|
||||
if call_conv.extends_baldrdash() {
|
||||
match ¶m.purpose {
|
||||
&ir::ArgumentPurpose::VMContext => {
|
||||
// This is SpiderMonkey's `WasmTlsReg`.
|
||||
Some(ABIArg::Reg(
|
||||
xreg(BALDRDASH_TLS_REG).to_real_reg(),
|
||||
ir::types::I64,
|
||||
))
|
||||
}
|
||||
&ir::ArgumentPurpose::SignatureId => {
|
||||
// This is SpiderMonkey's `WasmTableCallSigReg`.
|
||||
Some(ABIArg::Reg(
|
||||
xreg(BALDRDASH_SIG_REG).to_real_reg(),
|
||||
ir::types::I64,
|
||||
))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Process a list of parameters or return values and allocate them to X-regs,
|
||||
/// V-regs, and stack slots.
|
||||
///
|
||||
/// Returns the list of argument locations, and the stack-space used (rounded up
|
||||
/// to a 16-byte-aligned boundary).
|
||||
fn compute_arg_locs(call_conv: isa::CallConv, params: &[ir::AbiParam]) -> (Vec<ABIArg>, i64) {
|
||||
// See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
|
||||
let mut next_xreg = 0;
|
||||
let mut next_vreg = 0;
|
||||
let mut next_stack: u64 = 0;
|
||||
let mut ret = vec![];
|
||||
for param in params {
|
||||
// Validate "purpose".
|
||||
match ¶m.purpose {
|
||||
&ir::ArgumentPurpose::VMContext
|
||||
| &ir::ArgumentPurpose::Normal
|
||||
| &ir::ArgumentPurpose::SignatureId => {}
|
||||
_ => panic!(
|
||||
"Unsupported argument purpose {:?} in signature: {:?}",
|
||||
param.purpose, params
|
||||
),
|
||||
}
|
||||
|
||||
if in_int_reg(param.value_type) {
|
||||
if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
|
||||
ret.push(param);
|
||||
} else if next_xreg < 8 {
|
||||
ret.push(ABIArg::Reg(xreg(next_xreg).to_real_reg(), param.value_type));
|
||||
next_xreg += 1;
|
||||
} else {
|
||||
ret.push(ABIArg::Stack(next_stack as i64, param.value_type));
|
||||
next_stack += 8;
|
||||
}
|
||||
} else if in_vec_reg(param.value_type) {
|
||||
if next_vreg < 8 {
|
||||
ret.push(ABIArg::Reg(vreg(next_vreg).to_real_reg(), param.value_type));
|
||||
next_vreg += 1;
|
||||
} else {
|
||||
let size: u64 = match param.value_type {
|
||||
F32 | F64 => 8,
|
||||
_ => panic!("Unsupported vector-reg argument type"),
|
||||
};
|
||||
// Align.
|
||||
assert!(size.is_power_of_two());
|
||||
next_stack = (next_stack + size - 1) & !(size - 1);
|
||||
ret.push(ABIArg::Stack(next_stack as i64, param.value_type));
|
||||
next_stack += size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
next_stack = (next_stack + 15) & !15;
|
||||
|
||||
(ret, next_stack as i64)
|
||||
}
|
||||
|
||||
impl ABISig {
|
||||
fn from_func_sig(sig: &ir::Signature) -> ABISig {
|
||||
// Compute args and retvals from signature.
|
||||
// TODO: pass in arg-mode or ret-mode. (Does not matter
|
||||
// for the types of arguments/return values that we support.)
|
||||
let (args, stack_arg_space) = compute_arg_locs(sig.call_conv, &sig.params);
|
||||
let (rets, _) = compute_arg_locs(sig.call_conv, &sig.returns);
|
||||
|
||||
// Verify that there are no return values on the stack.
|
||||
assert!(rets.iter().all(|a| match a {
|
||||
&ABIArg::Stack(..) => false,
|
||||
_ => true,
|
||||
}));
|
||||
|
||||
ABISig {
|
||||
args,
|
||||
rets,
|
||||
stack_arg_space,
|
||||
call_conv: sig.call_conv,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// AArch64 ABI object for a function body.
|
||||
pub struct AArch64ABIBody {
|
||||
/// signature: arg and retval regs
|
||||
sig: ABISig,
|
||||
/// offsets to each stackslot
|
||||
stackslots: Vec<u32>,
|
||||
/// total stack size of all stackslots
|
||||
stackslots_size: u32,
|
||||
/// clobbered registers, from regalloc.
|
||||
clobbered: Set<Writable<RealReg>>,
|
||||
/// total number of spillslots, from regalloc.
|
||||
spillslots: Option<usize>,
|
||||
/// Total frame size.
|
||||
frame_size: Option<u32>,
|
||||
/// Calling convention this function expects.
|
||||
call_conv: isa::CallConv,
|
||||
}
|
||||
|
||||
fn in_int_reg(ty: ir::Type) -> bool {
|
||||
match ty {
|
||||
types::I8 | types::I16 | types::I32 | types::I64 => true,
|
||||
types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn in_vec_reg(ty: ir::Type) -> bool {
|
||||
match ty {
|
||||
types::F32 | types::F64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
impl AArch64ABIBody {
|
||||
/// Create a new body ABI instance.
|
||||
pub fn new(f: &ir::Function) -> Self {
|
||||
debug!("AArch64 ABI: func signature {:?}", f.signature);
|
||||
|
||||
let sig = ABISig::from_func_sig(&f.signature);
|
||||
|
||||
let call_conv = f.signature.call_conv;
|
||||
// Only these calling conventions are supported.
|
||||
assert!(
|
||||
call_conv == isa::CallConv::SystemV
|
||||
|| call_conv == isa::CallConv::Fast
|
||||
|| call_conv == isa::CallConv::Cold
|
||||
|| call_conv.extends_baldrdash(),
|
||||
"Unsupported calling convention: {:?}",
|
||||
call_conv
|
||||
);
|
||||
|
||||
// Compute stackslot locations and total stackslot size.
|
||||
let mut stack_offset: u32 = 0;
|
||||
let mut stackslots = vec![];
|
||||
for (stackslot, data) in f.stack_slots.iter() {
|
||||
let off = stack_offset;
|
||||
stack_offset += data.size;
|
||||
stack_offset = (stack_offset + 7) & !7;
|
||||
assert_eq!(stackslot.as_u32() as usize, stackslots.len());
|
||||
stackslots.push(off);
|
||||
}
|
||||
|
||||
Self {
|
||||
sig,
|
||||
stackslots,
|
||||
stackslots_size: stack_offset,
|
||||
clobbered: Set::empty(),
|
||||
spillslots: None,
|
||||
frame_size: None,
|
||||
call_conv,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn load_stack(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst {
|
||||
let mem = MemArg::FPOffset(fp_offset);
|
||||
|
||||
match ty {
|
||||
types::B1
|
||||
| types::B8
|
||||
| types::I8
|
||||
| types::B16
|
||||
| types::I16
|
||||
| types::B32
|
||||
| types::I32
|
||||
| types::B64
|
||||
| types::I64 => Inst::ULoad64 {
|
||||
rd: into_reg,
|
||||
mem,
|
||||
srcloc: None,
|
||||
},
|
||||
types::F32 => Inst::FpuLoad32 {
|
||||
rd: into_reg,
|
||||
mem,
|
||||
srcloc: None,
|
||||
},
|
||||
types::F64 => Inst::FpuLoad64 {
|
||||
rd: into_reg,
|
||||
mem,
|
||||
srcloc: None,
|
||||
},
|
||||
_ => unimplemented!("load_stack({})", ty),
|
||||
}
|
||||
}
|
||||
|
||||
fn store_stack(fp_offset: i64, from_reg: Reg, ty: Type) -> Inst {
|
||||
let mem = MemArg::FPOffset(fp_offset);
|
||||
|
||||
match ty {
|
||||
types::B1
|
||||
| types::B8
|
||||
| types::I8
|
||||
| types::B16
|
||||
| types::I16
|
||||
| types::B32
|
||||
| types::I32
|
||||
| types::B64
|
||||
| types::I64 => Inst::Store64 {
|
||||
rd: from_reg,
|
||||
mem,
|
||||
srcloc: None,
|
||||
},
|
||||
types::F32 => Inst::FpuStore32 {
|
||||
rd: from_reg,
|
||||
mem,
|
||||
srcloc: None,
|
||||
},
|
||||
types::F64 => Inst::FpuStore64 {
|
||||
rd: from_reg,
|
||||
mem,
|
||||
srcloc: None,
|
||||
},
|
||||
_ => unimplemented!("store_stack({})", ty),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool {
|
||||
if call_conv.extends_baldrdash() {
|
||||
match r.get_class() {
|
||||
RegClass::I64 => {
|
||||
let enc = r.get_hw_encoding();
|
||||
if BALDRDASH_JIT_CALLEE_SAVED_GPR[enc] {
|
||||
return true;
|
||||
}
|
||||
// Otherwise, fall through to preserve native ABI registers.
|
||||
}
|
||||
RegClass::V128 => {
|
||||
let enc = r.get_hw_encoding();
|
||||
if BALDRDASH_JIT_CALLEE_SAVED_FPU[enc] {
|
||||
return true;
|
||||
}
|
||||
// Otherwise, fall through to preserve native ABI registers.
|
||||
}
|
||||
_ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
|
||||
};
|
||||
}
|
||||
|
||||
match r.get_class() {
|
||||
RegClass::I64 => {
|
||||
// x19 - x28 inclusive are callee-saves.
|
||||
r.get_hw_encoding() >= 19 && r.get_hw_encoding() <= 28
|
||||
}
|
||||
RegClass::V128 => {
|
||||
// v8 - v15 inclusive are callee-saves.
|
||||
r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15
|
||||
}
|
||||
_ => panic!("Unexpected RegClass"),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_callee_saves(
|
||||
call_conv: isa::CallConv,
|
||||
regs: Vec<Writable<RealReg>>,
|
||||
) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
|
||||
let mut int_saves = vec![];
|
||||
let mut vec_saves = vec![];
|
||||
for reg in regs.into_iter() {
|
||||
if is_callee_save(call_conv, reg.to_reg()) {
|
||||
match reg.to_reg().get_class() {
|
||||
RegClass::I64 => int_saves.push(reg),
|
||||
RegClass::V128 => vec_saves.push(reg),
|
||||
_ => panic!("Unexpected RegClass"),
|
||||
}
|
||||
}
|
||||
}
|
||||
(int_saves, vec_saves)
|
||||
}
|
||||
|
||||
fn is_caller_save(call_conv: isa::CallConv, r: RealReg) -> bool {
|
||||
if call_conv.extends_baldrdash() {
|
||||
match r.get_class() {
|
||||
RegClass::I64 => {
|
||||
let enc = r.get_hw_encoding();
|
||||
if !BALDRDASH_JIT_CALLEE_SAVED_GPR[enc] {
|
||||
return true;
|
||||
}
|
||||
// Otherwise, fall through to preserve native's ABI caller-saved.
|
||||
}
|
||||
RegClass::V128 => {
|
||||
let enc = r.get_hw_encoding();
|
||||
if !BALDRDASH_JIT_CALLEE_SAVED_FPU[enc] {
|
||||
return true;
|
||||
}
|
||||
// Otherwise, fall through to preserve native's ABI caller-saved.
|
||||
}
|
||||
_ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
|
||||
};
|
||||
}
|
||||
|
||||
match r.get_class() {
|
||||
RegClass::I64 => {
|
||||
// x0 - x17 inclusive are caller-saves.
|
||||
r.get_hw_encoding() <= 17
|
||||
}
|
||||
RegClass::V128 => {
|
||||
// v0 - v7 inclusive and v16 - v31 inclusive are caller-saves.
|
||||
r.get_hw_encoding() <= 7 || (r.get_hw_encoding() >= 16 && r.get_hw_encoding() <= 31)
|
||||
}
|
||||
_ => panic!("Unexpected RegClass"),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_caller_saves_set(call_conv: isa::CallConv) -> Set<Writable<Reg>> {
|
||||
let mut set = Set::empty();
|
||||
for i in 0..29 {
|
||||
let x = writable_xreg(i);
|
||||
if is_caller_save(call_conv, x.to_reg().to_real_reg()) {
|
||||
set.insert(x);
|
||||
}
|
||||
}
|
||||
for i in 0..32 {
|
||||
let v = writable_vreg(i);
|
||||
if is_caller_save(call_conv, v.to_reg().to_real_reg()) {
|
||||
set.insert(v);
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
impl ABIBody for AArch64ABIBody {
|
||||
type I = Inst;
|
||||
|
||||
fn liveins(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for &arg in &self.sig.args {
|
||||
if let ABIArg::Reg(r, _) = arg {
|
||||
set.insert(r);
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn liveouts(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for &ret in &self.sig.rets {
|
||||
if let ABIArg::Reg(r, _) = ret {
|
||||
set.insert(r);
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn num_args(&self) -> usize {
|
||||
self.sig.args.len()
|
||||
}
|
||||
|
||||
fn num_retvals(&self) -> usize {
|
||||
self.sig.rets.len()
|
||||
}
|
||||
|
||||
fn num_stackslots(&self) -> usize {
|
||||
self.stackslots.len()
|
||||
}
|
||||
|
||||
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Inst {
|
||||
match &self.sig.args[idx] {
|
||||
&ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty),
|
||||
&ABIArg::Stack(off, ty) => load_stack(off + 16, into_reg, ty),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> Inst {
|
||||
match &self.sig.rets[idx] {
|
||||
&ABIArg::Reg(r, ty) => Inst::gen_move(Writable::from_reg(r.to_reg()), from_reg, ty),
|
||||
&ABIArg::Stack(off, ty) => store_stack(off + 16, from_reg, ty),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_ret(&self) -> Inst {
|
||||
Inst::Ret {}
|
||||
}
|
||||
|
||||
fn gen_epilogue_placeholder(&self) -> Inst {
|
||||
Inst::EpiloguePlaceholder {}
|
||||
}
|
||||
|
||||
fn set_num_spillslots(&mut self, slots: usize) {
|
||||
self.spillslots = Some(slots);
|
||||
}
|
||||
|
||||
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
|
||||
self.clobbered = clobbered;
|
||||
}
|
||||
|
||||
fn load_stackslot(
|
||||
&self,
|
||||
slot: StackSlot,
|
||||
offset: u32,
|
||||
ty: Type,
|
||||
into_reg: Writable<Reg>,
|
||||
) -> Inst {
|
||||
// Offset from beginning of stackslot area, which is at FP - stackslots_size.
|
||||
let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
|
||||
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
|
||||
load_stack(fp_off, into_reg, ty)
|
||||
}
|
||||
|
||||
fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Inst {
|
||||
// Offset from beginning of stackslot area, which is at FP - stackslots_size.
|
||||
let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
|
||||
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
|
||||
store_stack(fp_off, from_reg, ty)
|
||||
}
|
||||
|
||||
// Load from a spillslot.
|
||||
fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Inst {
|
||||
// Note that when spills/fills are generated, we don't yet know how many
|
||||
// spillslots there will be, so we allocate *downward* from the beginning
|
||||
// of the stackslot area. Hence: FP - stackslot_size - 8*spillslot -
|
||||
// sizeof(ty).
|
||||
let islot = slot.get() as i64;
|
||||
let ty_size = self.get_spillslot_size(into_reg.to_reg().get_class(), ty) * 8;
|
||||
let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64;
|
||||
load_stack(fp_off, into_reg, ty)
|
||||
}
|
||||
|
||||
// Store to a spillslot.
|
||||
fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst {
|
||||
let islot = slot.get() as i64;
|
||||
let ty_size = self.get_spillslot_size(from_reg.get_class(), ty) * 8;
|
||||
let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64;
|
||||
store_stack(fp_off, from_reg, ty)
|
||||
}
|
||||
|
||||
fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<Inst> {
|
||||
let mut insts = vec![];
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
// stp fp (x29), lr (x30), [sp, #-16]!
|
||||
insts.push(Inst::StoreP64 {
|
||||
rt: fp_reg(),
|
||||
rt2: link_reg(),
|
||||
mem: PairMemArg::PreIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
|
||||
),
|
||||
});
|
||||
// mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
|
||||
// the usual encoding (`ORR`) does not work with SP.
|
||||
insts.push(Inst::AluRRImm12 {
|
||||
alu_op: ALUOp::Add64,
|
||||
rd: writable_fp_reg(),
|
||||
rn: stack_reg(),
|
||||
imm12: Imm12 {
|
||||
bits: 0,
|
||||
shift12: false,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
let mut total_stacksize = self.stackslots_size + 8 * self.spillslots.unwrap() as u32;
|
||||
if self.call_conv.extends_baldrdash() {
|
||||
debug_assert!(
|
||||
!flags.enable_probestack(),
|
||||
"baldrdash does not expect cranelift to emit stack probes"
|
||||
);
|
||||
total_stacksize += flags.baldrdash_prologue_words() as u32 * 8;
|
||||
}
|
||||
let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack.
|
||||
|
||||
if !self.call_conv.extends_baldrdash() && total_stacksize > 0 {
|
||||
// sub sp, sp, #total_stacksize
|
||||
if let Some(imm12) = Imm12::maybe_from_u64(total_stacksize as u64) {
|
||||
let sub_inst = Inst::AluRRImm12 {
|
||||
alu_op: ALUOp::Sub64,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
imm12,
|
||||
};
|
||||
insts.push(sub_inst);
|
||||
} else {
|
||||
let tmp = writable_spilltmp_reg();
|
||||
let const_inst = Inst::LoadConst64 {
|
||||
rd: tmp,
|
||||
const_data: total_stacksize as u64,
|
||||
};
|
||||
let sub_inst = Inst::AluRRRExtend {
|
||||
alu_op: ALUOp::Sub64,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
rm: tmp.to_reg(),
|
||||
extendop: ExtendOp::UXTX,
|
||||
};
|
||||
insts.push(const_inst);
|
||||
insts.push(sub_inst);
|
||||
}
|
||||
}
|
||||
|
||||
// Save clobbered registers.
|
||||
let (clobbered_int, clobbered_vec) =
|
||||
get_callee_saves(self.call_conv, self.clobbered.to_vec());
|
||||
for reg_pair in clobbered_int.chunks(2) {
|
||||
let (r1, r2) = if reg_pair.len() == 2 {
|
||||
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
|
||||
(reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg())
|
||||
} else {
|
||||
(reg_pair[0].to_reg().to_reg(), zero_reg())
|
||||
};
|
||||
|
||||
debug_assert!(r1.get_class() == RegClass::I64);
|
||||
debug_assert!(r2.get_class() == RegClass::I64);
|
||||
|
||||
// stp r1, r2, [sp, #-16]!
|
||||
insts.push(Inst::StoreP64 {
|
||||
rt: r1,
|
||||
rt2: r2,
|
||||
mem: PairMemArg::PreIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
|
||||
),
|
||||
});
|
||||
}
|
||||
let vec_save_bytes = clobbered_vec.len() * 16;
|
||||
if vec_save_bytes != 0 {
|
||||
insts.push(Inst::AluRRImm12 {
|
||||
alu_op: ALUOp::Sub64,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(),
|
||||
});
|
||||
}
|
||||
for (i, reg) in clobbered_vec.iter().enumerate() {
|
||||
insts.push(Inst::FpuStore128 {
|
||||
rd: reg.to_reg().to_reg(),
|
||||
mem: MemArg::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()),
|
||||
srcloc: None,
|
||||
});
|
||||
}
|
||||
|
||||
self.frame_size = Some(total_stacksize);
|
||||
insts
|
||||
}
|
||||
|
||||
fn gen_epilogue(&self, _flags: &settings::Flags) -> Vec<Inst> {
|
||||
let mut insts = vec![];
|
||||
|
||||
// Restore clobbered registers.
|
||||
let (clobbered_int, clobbered_vec) =
|
||||
get_callee_saves(self.call_conv, self.clobbered.to_vec());
|
||||
|
||||
for (i, reg) in clobbered_vec.iter().enumerate() {
|
||||
insts.push(Inst::FpuLoad128 {
|
||||
rd: Writable::from_reg(reg.to_reg().to_reg()),
|
||||
mem: MemArg::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()),
|
||||
srcloc: None,
|
||||
});
|
||||
}
|
||||
let vec_save_bytes = clobbered_vec.len() * 16;
|
||||
if vec_save_bytes != 0 {
|
||||
insts.push(Inst::AluRRImm12 {
|
||||
alu_op: ALUOp::Add64,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(),
|
||||
});
|
||||
}
|
||||
|
||||
for reg_pair in clobbered_int.chunks(2).rev() {
|
||||
let (r1, r2) = if reg_pair.len() == 2 {
|
||||
(
|
||||
reg_pair[0].map(|r| r.to_reg()),
|
||||
reg_pair[1].map(|r| r.to_reg()),
|
||||
)
|
||||
} else {
|
||||
(reg_pair[0].map(|r| r.to_reg()), writable_zero_reg())
|
||||
};
|
||||
|
||||
debug_assert!(r1.to_reg().get_class() == RegClass::I64);
|
||||
debug_assert!(r2.to_reg().get_class() == RegClass::I64);
|
||||
|
||||
// ldp r1, r2, [sp], #16
|
||||
insts.push(Inst::LoadP64 {
|
||||
rt: r1,
|
||||
rt2: r2,
|
||||
mem: PairMemArg::PostIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
// The MOV (alias of ORR) interprets x31 as XZR, so use an ADD here.
|
||||
// MOV to SP is an alias of ADD.
|
||||
insts.push(Inst::AluRRImm12 {
|
||||
alu_op: ALUOp::Add64,
|
||||
rd: writable_stack_reg(),
|
||||
rn: fp_reg(),
|
||||
imm12: Imm12 {
|
||||
bits: 0,
|
||||
shift12: false,
|
||||
},
|
||||
});
|
||||
insts.push(Inst::LoadP64 {
|
||||
rt: writable_fp_reg(),
|
||||
rt2: writable_link_reg(),
|
||||
mem: PairMemArg::PostIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
|
||||
),
|
||||
});
|
||||
insts.push(Inst::Ret {});
|
||||
}
|
||||
|
||||
debug!("Epilogue: {:?}", insts);
|
||||
insts
|
||||
}
|
||||
|
||||
fn frame_size(&self) -> u32 {
|
||||
self.frame_size
|
||||
.expect("frame size not computed before prologue generation")
|
||||
}
|
||||
|
||||
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
|
||||
// We allocate in terms of 8-byte slots.
|
||||
match (rc, ty) {
|
||||
(RegClass::I64, _) => 1,
|
||||
(RegClass::V128, F32) | (RegClass::V128, F64) => 1,
|
||||
(RegClass::V128, _) => 2,
|
||||
_ => panic!("Unexpected register class!"),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Inst {
|
||||
self.store_spillslot(to_slot, ty, from_reg.to_reg())
|
||||
}
|
||||
|
||||
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> Inst {
|
||||
self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg()))
|
||||
}
|
||||
}
|
||||
|
||||
enum CallDest {
|
||||
ExtName(ir::ExternalName),
|
||||
Reg(Reg),
|
||||
}
|
||||
|
||||
/// AArch64 ABI object for a function call.
|
||||
pub struct AArch64ABICall {
|
||||
sig: ABISig,
|
||||
uses: Set<Reg>,
|
||||
defs: Set<Writable<Reg>>,
|
||||
dest: CallDest,
|
||||
loc: ir::SourceLoc,
|
||||
opcode: ir::Opcode,
|
||||
}
|
||||
|
||||
fn abisig_to_uses_and_defs(sig: &ABISig) -> (Set<Reg>, Set<Writable<Reg>>) {
|
||||
// Compute uses: all arg regs.
|
||||
let mut uses = Set::empty();
|
||||
for arg in &sig.args {
|
||||
match arg {
|
||||
&ABIArg::Reg(reg, _) => uses.insert(reg.to_reg()),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
|
||||
let mut defs = get_caller_saves_set(sig.call_conv);
|
||||
for ret in &sig.rets {
|
||||
match ret {
|
||||
&ABIArg::Reg(reg, _) => defs.insert(Writable::from_reg(reg.to_reg())),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
(uses, defs)
|
||||
}
|
||||
|
||||
impl AArch64ABICall {
|
||||
/// Create a callsite ABI object for a call directly to the specified function.
|
||||
pub fn from_func(
|
||||
sig: &ir::Signature,
|
||||
extname: &ir::ExternalName,
|
||||
loc: ir::SourceLoc,
|
||||
) -> AArch64ABICall {
|
||||
let sig = ABISig::from_func_sig(sig);
|
||||
let (uses, defs) = abisig_to_uses_and_defs(&sig);
|
||||
AArch64ABICall {
|
||||
sig,
|
||||
uses,
|
||||
defs,
|
||||
dest: CallDest::ExtName(extname.clone()),
|
||||
loc,
|
||||
opcode: ir::Opcode::Call,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a callsite ABI object for a call to a function pointer with the
|
||||
/// given signature.
|
||||
pub fn from_ptr(
|
||||
sig: &ir::Signature,
|
||||
ptr: Reg,
|
||||
loc: ir::SourceLoc,
|
||||
opcode: ir::Opcode,
|
||||
) -> AArch64ABICall {
|
||||
let sig = ABISig::from_func_sig(sig);
|
||||
let (uses, defs) = abisig_to_uses_and_defs(&sig);
|
||||
AArch64ABICall {
|
||||
sig,
|
||||
uses,
|
||||
defs,
|
||||
dest: CallDest::Reg(ptr),
|
||||
loc,
|
||||
opcode,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn adjust_stack(amt: u64, is_sub: bool) -> Vec<Inst> {
|
||||
if amt > 0 {
|
||||
let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
|
||||
if let Some(imm12) = Imm12::maybe_from_u64(amt) {
|
||||
vec![Inst::AluRRImm12 {
|
||||
alu_op,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
imm12,
|
||||
}]
|
||||
} else {
|
||||
let const_load = Inst::LoadConst64 {
|
||||
rd: writable_spilltmp_reg(),
|
||||
const_data: amt,
|
||||
};
|
||||
let adj = Inst::AluRRRExtend {
|
||||
alu_op,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
rm: spilltmp_reg(),
|
||||
extendop: ExtendOp::UXTX,
|
||||
};
|
||||
vec![const_load, adj]
|
||||
}
|
||||
} else {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
|
||||
impl ABICall for AArch64ABICall {
|
||||
type I = Inst;
|
||||
|
||||
fn num_args(&self) -> usize {
|
||||
self.sig.args.len()
|
||||
}
|
||||
|
||||
fn gen_stack_pre_adjust(&self) -> Vec<Inst> {
|
||||
adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ true)
|
||||
}
|
||||
|
||||
fn gen_stack_post_adjust(&self) -> Vec<Inst> {
|
||||
adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ false)
|
||||
}
|
||||
|
||||
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Inst {
|
||||
match &self.sig.args[idx] {
|
||||
&ABIArg::Reg(reg, ty) => Inst::gen_move(Writable::from_reg(reg.to_reg()), from_reg, ty),
|
||||
&ABIArg::Stack(off, _) => Inst::Store64 {
|
||||
rd: from_reg,
|
||||
mem: MemArg::SPOffset(off),
|
||||
srcloc: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Inst {
|
||||
match &self.sig.rets[idx] {
|
||||
&ABIArg::Reg(reg, ty) => Inst::gen_move(into_reg, reg.to_reg(), ty),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_call(&self) -> Vec<Inst> {
|
||||
let (uses, defs) = (self.uses.clone(), self.defs.clone());
|
||||
match &self.dest {
|
||||
&CallDest::ExtName(ref name) => vec![Inst::Call {
|
||||
dest: name.clone(),
|
||||
uses,
|
||||
defs,
|
||||
loc: self.loc,
|
||||
opcode: self.opcode,
|
||||
}],
|
||||
&CallDest::Reg(reg) => vec![Inst::CallInd {
|
||||
rn: reg,
|
||||
uses,
|
||||
defs,
|
||||
loc: self.loc,
|
||||
opcode: self.opcode,
|
||||
}],
|
||||
}
|
||||
}
|
||||
}
|
||||
528
cranelift/codegen/src/isa/aarch64/inst/args.rs
Normal file
528
cranelift/codegen/src/isa/aarch64/inst/args.rs
Normal file
@@ -0,0 +1,528 @@
|
||||
//! AArch64 ISA definitions: instruction arguments.
|
||||
|
||||
// Some variants are never constructed, but we still want them as options in the future.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::Type;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
|
||||
use regalloc::{RealRegUniverse, Reg, Writable};
|
||||
|
||||
use core::convert::{Into, TryFrom};
|
||||
use std::string::String;
|
||||
|
||||
/// A shift operator for a register or immediate.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[repr(u8)]
|
||||
pub enum ShiftOp {
|
||||
LSL = 0b00,
|
||||
LSR = 0b01,
|
||||
ASR = 0b10,
|
||||
ROR = 0b11,
|
||||
}
|
||||
|
||||
impl ShiftOp {
|
||||
/// Get the encoding of this shift op.
|
||||
pub fn bits(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
}
|
||||
|
||||
/// A shift operator amount.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct ShiftOpShiftImm(u8);
|
||||
|
||||
impl ShiftOpShiftImm {
|
||||
/// Maximum shift for shifted-register operands.
|
||||
pub const MAX_SHIFT: u64 = 63;
|
||||
|
||||
/// Create a new shiftop shift amount, if possible.
|
||||
pub fn maybe_from_shift(shift: u64) -> Option<ShiftOpShiftImm> {
|
||||
if shift <= Self::MAX_SHIFT {
|
||||
Some(ShiftOpShiftImm(shift as u8))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the shift amount.
|
||||
pub fn value(self) -> u8 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// A shift operator with an amount, guaranteed to be within range.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ShiftOpAndAmt {
|
||||
op: ShiftOp,
|
||||
shift: ShiftOpShiftImm,
|
||||
}
|
||||
|
||||
impl ShiftOpAndAmt {
|
||||
pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt {
|
||||
ShiftOpAndAmt { op, shift }
|
||||
}
|
||||
|
||||
/// Get the shift op.
|
||||
pub fn op(&self) -> ShiftOp {
|
||||
self.op
|
||||
}
|
||||
|
||||
/// Get the shift amount.
|
||||
pub fn amt(&self) -> ShiftOpShiftImm {
|
||||
self.shift
|
||||
}
|
||||
}
|
||||
|
||||
/// An extend operator for a register.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[repr(u8)]
|
||||
pub enum ExtendOp {
|
||||
UXTB = 0b000,
|
||||
UXTH = 0b001,
|
||||
UXTW = 0b010,
|
||||
UXTX = 0b011,
|
||||
SXTB = 0b100,
|
||||
SXTH = 0b101,
|
||||
SXTW = 0b110,
|
||||
SXTX = 0b111,
|
||||
}
|
||||
|
||||
impl ExtendOp {
|
||||
/// Encoding of this op.
|
||||
pub fn bits(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instruction sub-components (memory addresses): definitions
|
||||
|
||||
/// A reference to some memory address.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum MemLabel {
|
||||
/// An address in the code, a constant pool or jumptable, with relative
|
||||
/// offset from this instruction. This form must be used at emission time;
|
||||
/// see `memlabel_finalize()` for how other forms are lowered to this one.
|
||||
PCRel(i32),
|
||||
}
|
||||
|
||||
/// A memory argument to load/store, encapsulating the possible addressing modes.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum MemArg {
|
||||
Label(MemLabel),
|
||||
/// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
|
||||
PostIndexed(Writable<Reg>, SImm9),
|
||||
/// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
|
||||
PreIndexed(Writable<Reg>, SImm9),
|
||||
|
||||
// N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
|
||||
// what the ISA calls the "register offset" addressing mode. We split out
|
||||
// several options here for more ergonomic codegen.
|
||||
/// Register plus register offset.
|
||||
RegReg(Reg, Reg),
|
||||
|
||||
/// Register plus register offset, scaled by type's size.
|
||||
RegScaled(Reg, Reg, Type),
|
||||
|
||||
/// Register plus register offset, scaled by type's size, with index sign- or zero-extended
|
||||
/// first.
|
||||
RegScaledExtended(Reg, Reg, Type, ExtendOp),
|
||||
|
||||
/// Unscaled signed 9-bit immediate offset from reg.
|
||||
Unscaled(Reg, SImm9),
|
||||
|
||||
/// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
|
||||
UnsignedOffset(Reg, UImm12Scaled),
|
||||
|
||||
/// Offset from the stack pointer. Lowered into a real amode at emission.
|
||||
SPOffset(i64),
|
||||
|
||||
/// Offset from the frame pointer. Lowered into a real amode at emission.
|
||||
FPOffset(i64),
|
||||
}
|
||||
|
||||
impl MemArg {
|
||||
/// Memory reference using an address in a register.
|
||||
pub fn reg(reg: Reg) -> MemArg {
|
||||
// Use UnsignedOffset rather than Unscaled to use ldr rather than ldur.
|
||||
// This also does not use PostIndexed / PreIndexed as they update the register.
|
||||
MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64))
|
||||
}
|
||||
|
||||
/// Memory reference using an address in a register and an offset, if possible.
|
||||
pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option<MemArg> {
|
||||
if let Some(simm9) = SImm9::maybe_from_i64(offset) {
|
||||
Some(MemArg::Unscaled(reg, simm9))
|
||||
} else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) {
|
||||
Some(MemArg::UnsignedOffset(reg, uimm12s))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory reference using the sum of two registers as an address.
|
||||
pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg {
|
||||
MemArg::RegReg(reg1, reg2)
|
||||
}
|
||||
|
||||
/// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
|
||||
pub fn reg_plus_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> MemArg {
|
||||
MemArg::RegScaled(reg1, reg2, ty)
|
||||
}
|
||||
|
||||
/// Memory reference using `reg1 + sizeof(ty) * reg2` as an address, with `reg2` sign- or
|
||||
/// zero-extended as per `op`.
|
||||
pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> MemArg {
|
||||
MemArg::RegScaledExtended(reg1, reg2, ty, op)
|
||||
}
|
||||
|
||||
/// Memory reference to a label: a global function or value, or data in the constant pool.
|
||||
pub fn label(label: MemLabel) -> MemArg {
|
||||
MemArg::Label(label)
|
||||
}
|
||||
}
|
||||
|
||||
/// A memory argument to a load/store-pair.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum PairMemArg {
|
||||
SignedOffset(Reg, SImm7Scaled),
|
||||
PreIndexed(Writable<Reg>, SImm7Scaled),
|
||||
PostIndexed(Writable<Reg>, SImm7Scaled),
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instruction sub-components (conditions, branches and branch targets):
|
||||
// definitions
|
||||
|
||||
/// Condition for conditional branches.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum Cond {
|
||||
Eq = 0,
|
||||
Ne = 1,
|
||||
Hs = 2,
|
||||
Lo = 3,
|
||||
Mi = 4,
|
||||
Pl = 5,
|
||||
Vs = 6,
|
||||
Vc = 7,
|
||||
Hi = 8,
|
||||
Ls = 9,
|
||||
Ge = 10,
|
||||
Lt = 11,
|
||||
Gt = 12,
|
||||
Le = 13,
|
||||
Al = 14,
|
||||
Nv = 15,
|
||||
}
|
||||
|
||||
impl Cond {
|
||||
/// Return the inverted condition.
|
||||
pub fn invert(self) -> Cond {
|
||||
match self {
|
||||
Cond::Eq => Cond::Ne,
|
||||
Cond::Ne => Cond::Eq,
|
||||
|
||||
Cond::Hs => Cond::Lo,
|
||||
Cond::Lo => Cond::Hs,
|
||||
|
||||
Cond::Mi => Cond::Pl,
|
||||
Cond::Pl => Cond::Mi,
|
||||
|
||||
Cond::Vs => Cond::Vc,
|
||||
Cond::Vc => Cond::Vs,
|
||||
|
||||
Cond::Hi => Cond::Ls,
|
||||
Cond::Ls => Cond::Hi,
|
||||
|
||||
Cond::Ge => Cond::Lt,
|
||||
Cond::Lt => Cond::Ge,
|
||||
|
||||
Cond::Gt => Cond::Le,
|
||||
Cond::Le => Cond::Gt,
|
||||
|
||||
Cond::Al => Cond::Nv,
|
||||
Cond::Nv => Cond::Al,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the machine encoding of this condition.
|
||||
pub fn bits(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
}
|
||||
|
||||
/// The kind of conditional branch: the common-case-optimized "reg-is-zero" /
|
||||
/// "reg-is-nonzero" variants, or the generic one that tests the machine
|
||||
/// condition codes.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum CondBrKind {
|
||||
/// Condition: given register is zero.
|
||||
Zero(Reg),
|
||||
/// Condition: given register is nonzero.
|
||||
NotZero(Reg),
|
||||
/// Condition: the given condition-code test is true.
|
||||
Cond(Cond),
|
||||
}
|
||||
|
||||
impl CondBrKind {
|
||||
/// Return the inverted branch condition.
|
||||
pub fn invert(self) -> CondBrKind {
|
||||
match self {
|
||||
CondBrKind::Zero(reg) => CondBrKind::NotZero(reg),
|
||||
CondBrKind::NotZero(reg) => CondBrKind::Zero(reg),
|
||||
CondBrKind::Cond(c) => CondBrKind::Cond(c.invert()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A branch target. Either unresolved (basic-block index) or resolved (offset
|
||||
/// from end of current instruction).
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum BranchTarget {
|
||||
/// An unresolved reference to a BlockIndex, as passed into
|
||||
/// `lower_branch_group()`.
|
||||
Block(BlockIndex),
|
||||
/// A resolved reference to another instruction, after
|
||||
/// `Inst::with_block_offsets()`.
|
||||
ResolvedOffset(isize),
|
||||
}
|
||||
|
||||
impl BranchTarget {
|
||||
/// Lower the branch target given offsets of each block.
|
||||
pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
|
||||
match self {
|
||||
&mut BranchTarget::Block(bix) => {
|
||||
let bix = usize::try_from(bix).unwrap();
|
||||
assert!(bix < targets.len());
|
||||
let block_offset_in_func = targets[bix];
|
||||
let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
|
||||
*self = BranchTarget::ResolvedOffset(branch_offset);
|
||||
}
|
||||
&mut BranchTarget::ResolvedOffset(..) => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the block index.
|
||||
pub fn as_block_index(&self) -> Option<BlockIndex> {
|
||||
match self {
|
||||
&BranchTarget::Block(bix) => Some(bix),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the offset as 4-byte words. Returns `0` if not
|
||||
/// yet resolved (in that case, we're only computing
|
||||
/// size and the offset doesn't matter).
|
||||
pub fn as_offset_words(&self) -> isize {
|
||||
match self {
|
||||
&BranchTarget::ResolvedOffset(off) => off >> 2,
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the offset as a 26-bit offset suitable for a 26-bit jump, or `None` if overflow.
|
||||
pub fn as_off26(&self) -> Option<u32> {
|
||||
let off = self.as_offset_words();
|
||||
if (off < (1 << 25)) && (off >= -(1 << 25)) {
|
||||
Some((off as u32) & ((1 << 26) - 1))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the offset as a 19-bit offset, or `None` if overflow.
|
||||
pub fn as_off19(&self) -> Option<u32> {
|
||||
let off = self.as_offset_words();
|
||||
if (off < (1 << 18)) && (off >= -(1 << 18)) {
|
||||
Some((off as u32) & ((1 << 19) - 1))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Map the block index given a transform map.
|
||||
pub fn map(&mut self, block_index_map: &[BlockIndex]) {
|
||||
match self {
|
||||
&mut BranchTarget::Block(ref mut bix) => {
|
||||
let n = block_index_map[usize::try_from(*bix).unwrap()];
|
||||
*bix = n;
|
||||
}
|
||||
&mut BranchTarget::ResolvedOffset(_) => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for ShiftOpAndAmt {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("{:?} {}", self.op(), self.amt().value())
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for ExtendOp {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("{:?}", self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for MemLabel {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
&MemLabel::PCRel(off) => format!("pc+{}", off),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn shift_for_type(ty: Type) -> usize {
|
||||
match ty.bytes() {
|
||||
1 => 0,
|
||||
2 => 1,
|
||||
4 => 2,
|
||||
8 => 3,
|
||||
16 => 4,
|
||||
_ => panic!("unknown type: {}", ty),
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for MemArg {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
&MemArg::Unscaled(reg, simm9) => {
|
||||
if simm9.value != 0 {
|
||||
format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru))
|
||||
} else {
|
||||
format!("[{}]", reg.show_rru(mb_rru))
|
||||
}
|
||||
}
|
||||
&MemArg::UnsignedOffset(reg, uimm12) => {
|
||||
if uimm12.value != 0 {
|
||||
format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru))
|
||||
} else {
|
||||
format!("[{}]", reg.show_rru(mb_rru))
|
||||
}
|
||||
}
|
||||
&MemArg::RegReg(r1, r2) => {
|
||||
format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),)
|
||||
}
|
||||
&MemArg::RegScaled(r1, r2, ty) => {
|
||||
let shift = shift_for_type(ty);
|
||||
format!(
|
||||
"[{}, {}, LSL #{}]",
|
||||
r1.show_rru(mb_rru),
|
||||
r2.show_rru(mb_rru),
|
||||
shift,
|
||||
)
|
||||
}
|
||||
&MemArg::RegScaledExtended(r1, r2, ty, op) => {
|
||||
let shift = shift_for_type(ty);
|
||||
let size = match op {
|
||||
ExtendOp::SXTW | ExtendOp::UXTW => InstSize::Size32,
|
||||
_ => InstSize::Size64,
|
||||
};
|
||||
let op = op.show_rru(mb_rru);
|
||||
format!(
|
||||
"[{}, {}, {} #{}]",
|
||||
r1.show_rru(mb_rru),
|
||||
show_ireg_sized(r2, mb_rru, size),
|
||||
op,
|
||||
shift
|
||||
)
|
||||
}
|
||||
&MemArg::Label(ref label) => label.show_rru(mb_rru),
|
||||
&MemArg::PreIndexed(r, simm9) => format!(
|
||||
"[{}, {}]!",
|
||||
r.to_reg().show_rru(mb_rru),
|
||||
simm9.show_rru(mb_rru)
|
||||
),
|
||||
&MemArg::PostIndexed(r, simm9) => format!(
|
||||
"[{}], {}",
|
||||
r.to_reg().show_rru(mb_rru),
|
||||
simm9.show_rru(mb_rru)
|
||||
),
|
||||
// Eliminated by `mem_finalize()`.
|
||||
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
|
||||
panic!("Unexpected stack-offset mem-arg mode!")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for PairMemArg {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
&PairMemArg::SignedOffset(reg, simm7) => {
|
||||
if simm7.value != 0 {
|
||||
format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru))
|
||||
} else {
|
||||
format!("[{}]", reg.show_rru(mb_rru))
|
||||
}
|
||||
}
|
||||
&PairMemArg::PreIndexed(reg, simm7) => format!(
|
||||
"[{}, {}]!",
|
||||
reg.to_reg().show_rru(mb_rru),
|
||||
simm7.show_rru(mb_rru)
|
||||
),
|
||||
&PairMemArg::PostIndexed(reg, simm7) => format!(
|
||||
"[{}], {}",
|
||||
reg.to_reg().show_rru(mb_rru),
|
||||
simm7.show_rru(mb_rru)
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for Cond {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
let mut s = format!("{:?}", self);
|
||||
s.make_ascii_lowercase();
|
||||
s
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for BranchTarget {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
&BranchTarget::Block(block) => format!("block{}", block),
|
||||
&BranchTarget::ResolvedOffset(off) => format!("{}", off),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
|
||||
/// 64-bit variants of many instructions (and integer registers).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum InstSize {
|
||||
Size32,
|
||||
Size64,
|
||||
}
|
||||
|
||||
impl InstSize {
|
||||
/// 32-bit case?
|
||||
pub fn is32(self) -> bool {
|
||||
self == InstSize::Size32
|
||||
}
|
||||
/// 64-bit case?
|
||||
pub fn is64(self) -> bool {
|
||||
self == InstSize::Size64
|
||||
}
|
||||
/// Convert from an `is32` boolean flag to an `InstSize`.
|
||||
pub fn from_is32(is32: bool) -> InstSize {
|
||||
if is32 {
|
||||
InstSize::Size32
|
||||
} else {
|
||||
InstSize::Size64
|
||||
}
|
||||
}
|
||||
/// Convert from a needed width to the smallest size that fits.
|
||||
pub fn from_bits<I: Into<usize>>(bits: I) -> InstSize {
|
||||
let bits: usize = bits.into();
|
||||
assert!(bits <= 64);
|
||||
if bits <= 32 {
|
||||
InstSize::Size32
|
||||
} else {
|
||||
InstSize::Size64
|
||||
}
|
||||
}
|
||||
}
|
||||
4099
cranelift/codegen/src/isa/aarch64/inst/emit.rs
Normal file
4099
cranelift/codegen/src/isa/aarch64/inst/emit.rs
Normal file
File diff suppressed because it is too large
Load Diff
752
cranelift/codegen/src/isa/aarch64/inst/imms.rs
Normal file
752
cranelift/codegen/src/isa/aarch64/inst/imms.rs
Normal file
@@ -0,0 +1,752 @@
|
||||
//! AArch64 ISA definitions: immediate constants.
|
||||
|
||||
// Some variants are never constructed, but we still want them as options in the future.
|
||||
#[allow(dead_code)]
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::Type;
|
||||
use crate::machinst::*;
|
||||
|
||||
use regalloc::RealRegUniverse;
|
||||
|
||||
use core::convert::TryFrom;
|
||||
use std::string::String;
|
||||
|
||||
/// A signed, scaled 7-bit offset.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct SImm7Scaled {
|
||||
/// The value.
|
||||
pub value: i16,
|
||||
/// multiplied by the size of this type
|
||||
pub scale_ty: Type,
|
||||
}
|
||||
|
||||
impl SImm7Scaled {
|
||||
/// Create a SImm7Scaled from a raw offset and the known scale type, if
|
||||
/// possible.
|
||||
pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> {
|
||||
assert!(scale_ty == I64 || scale_ty == I32);
|
||||
let scale = scale_ty.bytes();
|
||||
assert!(scale.is_power_of_two());
|
||||
let scale = i64::from(scale);
|
||||
let upper_limit = 63 * scale;
|
||||
let lower_limit = -(64 * scale);
|
||||
if value >= lower_limit && value <= upper_limit && (value & (scale - 1)) == 0 {
|
||||
Some(SImm7Scaled {
|
||||
value: i16::try_from(value).unwrap(),
|
||||
scale_ty,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a zero immediate of this format.
|
||||
pub fn zero(scale_ty: Type) -> SImm7Scaled {
|
||||
SImm7Scaled { value: 0, scale_ty }
|
||||
}
|
||||
|
||||
/// Bits for encoding.
|
||||
pub fn bits(&self) -> u32 {
|
||||
let ty_bytes: i16 = self.scale_ty.bytes() as i16;
|
||||
let scaled: i16 = self.value / ty_bytes;
|
||||
assert!(scaled <= 63 && scaled >= -64);
|
||||
let scaled: i8 = scaled as i8;
|
||||
let encoded: u32 = scaled as u32;
|
||||
encoded & 0x7f
|
||||
}
|
||||
}
|
||||
|
||||
/// a 9-bit signed offset.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct SImm9 {
|
||||
/// The value.
|
||||
pub value: i16,
|
||||
}
|
||||
|
||||
impl SImm9 {
|
||||
/// Create a signed 9-bit offset from a full-range value, if possible.
|
||||
pub fn maybe_from_i64(value: i64) -> Option<SImm9> {
|
||||
if value >= -256 && value <= 255 {
|
||||
Some(SImm9 {
|
||||
value: value as i16,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a zero immediate of this format.
|
||||
pub fn zero() -> SImm9 {
|
||||
SImm9 { value: 0 }
|
||||
}
|
||||
|
||||
/// Bits for encoding.
|
||||
pub fn bits(&self) -> u32 {
|
||||
(self.value as u32) & 0x1ff
|
||||
}
|
||||
}
|
||||
|
||||
/// An unsigned, scaled 12-bit offset.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct UImm12Scaled {
|
||||
/// The value.
|
||||
pub value: u16,
|
||||
/// multiplied by the size of this type
|
||||
pub scale_ty: Type,
|
||||
}
|
||||
|
||||
impl UImm12Scaled {
|
||||
/// Create a UImm12Scaled from a raw offset and the known scale type, if
|
||||
/// possible.
|
||||
pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<UImm12Scaled> {
|
||||
let scale = scale_ty.bytes();
|
||||
assert!(scale.is_power_of_two());
|
||||
let scale = scale as i64;
|
||||
let limit = 4095 * scale;
|
||||
if value >= 0 && value <= limit && (value & (scale - 1)) == 0 {
|
||||
Some(UImm12Scaled {
|
||||
value: value as u16,
|
||||
scale_ty,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a zero immediate of this format.
|
||||
pub fn zero(scale_ty: Type) -> UImm12Scaled {
|
||||
UImm12Scaled { value: 0, scale_ty }
|
||||
}
|
||||
|
||||
/// Encoded bits.
|
||||
pub fn bits(&self) -> u32 {
|
||||
(self.value as u32 / self.scale_ty.bytes()) & 0xfff
|
||||
}
|
||||
}
|
||||
|
||||
/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted
|
||||
/// left by 0 or 12 places.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Imm12 {
|
||||
/// The immediate bits.
|
||||
pub bits: u16,
|
||||
/// Whether the immediate bits are shifted left by 12 or not.
|
||||
pub shift12: bool,
|
||||
}
|
||||
|
||||
impl Imm12 {
|
||||
/// Compute a Imm12 from raw bits, if possible.
|
||||
pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
|
||||
if val == 0 {
|
||||
Some(Imm12 {
|
||||
bits: 0,
|
||||
shift12: false,
|
||||
})
|
||||
} else if val < 0xfff {
|
||||
Some(Imm12 {
|
||||
bits: val as u16,
|
||||
shift12: false,
|
||||
})
|
||||
} else if val < 0xfff_000 && (val & 0xfff == 0) {
|
||||
Some(Imm12 {
|
||||
bits: (val >> 12) as u16,
|
||||
shift12: true,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Bits for 2-bit "shift" field in e.g. AddI.
|
||||
pub fn shift_bits(&self) -> u32 {
|
||||
if self.shift12 {
|
||||
0b01
|
||||
} else {
|
||||
0b00
|
||||
}
|
||||
}
|
||||
|
||||
/// Bits for 12-bit "imm" field in e.g. AddI.
|
||||
pub fn imm_bits(&self) -> u32 {
|
||||
self.bits as u32
|
||||
}
|
||||
}
|
||||
|
||||
/// An immediate for logical instructions.
|
||||
#[derive(Clone, Debug)]
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
pub struct ImmLogic {
|
||||
/// The actual value.
|
||||
value: u64,
|
||||
/// `N` flag.
|
||||
pub n: bool,
|
||||
/// `S` field: element size and element bits.
|
||||
pub r: u8,
|
||||
/// `R` field: rotate amount.
|
||||
pub s: u8,
|
||||
}
|
||||
|
||||
impl ImmLogic {
|
||||
/// Compute an ImmLogic from raw bits, if possible.
|
||||
pub fn maybe_from_u64(value: u64, ty: Type) -> Option<ImmLogic> {
|
||||
// Note: This function is a port of VIXL's Assembler::IsImmLogical.
|
||||
|
||||
if ty != I64 && ty != I32 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let original_value = value;
|
||||
|
||||
let value = if ty == I32 {
|
||||
// To handle 32-bit logical immediates, the very easiest thing is to repeat
|
||||
// the input value twice to make a 64-bit word. The correct encoding of that
|
||||
// as a logical immediate will also be the correct encoding of the 32-bit
|
||||
// value.
|
||||
|
||||
// Avoid making the assumption that the most-significant 32 bits are zero by
|
||||
// shifting the value left and duplicating it.
|
||||
let value = value << 32;
|
||||
value | value >> 32
|
||||
} else {
|
||||
value
|
||||
};
|
||||
|
||||
// Logical immediates are encoded using parameters n, imm_s and imm_r using
|
||||
// the following table:
|
||||
//
|
||||
// N imms immr size S R
|
||||
// 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
|
||||
// 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
|
||||
// 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
|
||||
// 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
|
||||
// 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
|
||||
// 0 11110s xxxxxr 2 UInt(s) UInt(r)
|
||||
// (s bits must not be all set)
|
||||
//
|
||||
// A pattern is constructed of size bits, where the least significant S+1 bits
|
||||
// are set. The pattern is rotated right by R, and repeated across a 32 or
|
||||
// 64-bit value, depending on destination register width.
|
||||
//
|
||||
// Put another way: the basic format of a logical immediate is a single
|
||||
// contiguous stretch of 1 bits, repeated across the whole word at intervals
|
||||
// given by a power of 2. To identify them quickly, we first locate the
|
||||
// lowest stretch of 1 bits, then the next 1 bit above that; that combination
|
||||
// is different for every logical immediate, so it gives us all the
|
||||
// information we need to identify the only logical immediate that our input
|
||||
// could be, and then we simply check if that's the value we actually have.
|
||||
//
|
||||
// (The rotation parameter does give the possibility of the stretch of 1 bits
|
||||
// going 'round the end' of the word. To deal with that, we observe that in
|
||||
// any situation where that happens the bitwise NOT of the value is also a
|
||||
// valid logical immediate. So we simply invert the input whenever its low bit
|
||||
// is set, and then we know that the rotated case can't arise.)
|
||||
let (value, inverted) = if value & 1 == 1 {
|
||||
(!value, true)
|
||||
} else {
|
||||
(value, false)
|
||||
};
|
||||
|
||||
if value == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// The basic analysis idea: imagine our input word looks like this.
|
||||
//
|
||||
// 0011111000111110001111100011111000111110001111100011111000111110
|
||||
// c b a
|
||||
// |<--d-->|
|
||||
//
|
||||
// We find the lowest set bit (as an actual power-of-2 value, not its index)
|
||||
// and call it a. Then we add a to our original number, which wipes out the
|
||||
// bottommost stretch of set bits and replaces it with a 1 carried into the
|
||||
// next zero bit. Then we look for the new lowest set bit, which is in
|
||||
// position b, and subtract it, so now our number is just like the original
|
||||
// but with the lowest stretch of set bits completely gone. Now we find the
|
||||
// lowest set bit again, which is position c in the diagram above. Then we'll
|
||||
// measure the distance d between bit positions a and c (using CLZ), and that
|
||||
// tells us that the only valid logical immediate that could possibly be equal
|
||||
// to this number is the one in which a stretch of bits running from a to just
|
||||
// below b is replicated every d bits.
|
||||
fn lowest_set_bit(value: u64) -> u64 {
|
||||
let bit = value.trailing_zeros();
|
||||
1u64.checked_shl(bit).unwrap_or(0)
|
||||
}
|
||||
let a = lowest_set_bit(value);
|
||||
assert_ne!(0, a);
|
||||
let value_plus_a = value.wrapping_add(a);
|
||||
let b = lowest_set_bit(value_plus_a);
|
||||
let value_plus_a_minus_b = value_plus_a - b;
|
||||
let c = lowest_set_bit(value_plus_a_minus_b);
|
||||
|
||||
let (d, clz_a, out_n, mask) = if c != 0 {
|
||||
// The general case, in which there is more than one stretch of set bits.
|
||||
// Compute the repeat distance d, and set up a bitmask covering the basic
|
||||
// unit of repetition (i.e. a word with the bottom d bits set). Also, in all
|
||||
// of these cases the N bit of the output will be zero.
|
||||
let clz_a = a.leading_zeros();
|
||||
let clz_c = c.leading_zeros();
|
||||
let d = clz_a - clz_c;
|
||||
let mask = (1 << d) - 1;
|
||||
(d, clz_a, 0, mask)
|
||||
} else {
|
||||
(64, a.leading_zeros(), 1, u64::max_value())
|
||||
};
|
||||
|
||||
// If the repeat period d is not a power of two, it can't be encoded.
|
||||
if !d.is_power_of_two() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if ((b.wrapping_sub(a)) & !mask) != 0 {
|
||||
// If the bit stretch (b - a) does not fit within the mask derived from the
|
||||
// repeat period, then fail.
|
||||
return None;
|
||||
}
|
||||
|
||||
// The only possible option is b - a repeated every d bits. Now we're going to
|
||||
// actually construct the valid logical immediate derived from that
|
||||
// specification, and see if it equals our original input.
|
||||
//
|
||||
// To repeat a value every d bits, we multiply it by a number of the form
|
||||
// (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
|
||||
// be derived using a table lookup on CLZ(d).
|
||||
const MULTIPLIERS: [u64; 6] = [
|
||||
0x0000000000000001,
|
||||
0x0000000100000001,
|
||||
0x0001000100010001,
|
||||
0x0101010101010101,
|
||||
0x1111111111111111,
|
||||
0x5555555555555555,
|
||||
];
|
||||
let multiplier = MULTIPLIERS[(u64::from(d).leading_zeros() - 57) as usize];
|
||||
let candidate = b.wrapping_sub(a) * multiplier;
|
||||
|
||||
if value != candidate {
|
||||
// The candidate pattern doesn't match our input value, so fail.
|
||||
return None;
|
||||
}
|
||||
|
||||
// We have a match! This is a valid logical immediate, so now we have to
|
||||
// construct the bits and pieces of the instruction encoding that generates
|
||||
// it.
|
||||
|
||||
// Count the set bits in our basic stretch. The special case of clz(0) == -1
|
||||
// makes the answer come out right for stretches that reach the very top of
|
||||
// the word (e.g. numbers like 0xffffc00000000000).
|
||||
let clz_b = if b == 0 {
|
||||
u32::max_value() // -1
|
||||
} else {
|
||||
b.leading_zeros()
|
||||
};
|
||||
let s = clz_a.wrapping_sub(clz_b);
|
||||
|
||||
// Decide how many bits to rotate right by, to put the low bit of that basic
|
||||
// stretch in position a.
|
||||
let (s, r) = if inverted {
|
||||
// If we inverted the input right at the start of this function, here's
|
||||
// where we compensate: the number of set bits becomes the number of clear
|
||||
// bits, and the rotation count is based on position b rather than position
|
||||
// a (since b is the location of the 'lowest' 1 bit after inversion).
|
||||
// Need wrapping for when clz_b is max_value() (for when b == 0).
|
||||
(d - s, clz_b.wrapping_add(1) & (d - 1))
|
||||
} else {
|
||||
(s, (clz_a + 1) & (d - 1))
|
||||
};
|
||||
|
||||
// Now we're done, except for having to encode the S output in such a way that
|
||||
// it gives both the number of set bits and the length of the repeated
|
||||
// segment. The s field is encoded like this:
|
||||
//
|
||||
// imms size S
|
||||
// ssssss 64 UInt(ssssss)
|
||||
// 0sssss 32 UInt(sssss)
|
||||
// 10ssss 16 UInt(ssss)
|
||||
// 110sss 8 UInt(sss)
|
||||
// 1110ss 4 UInt(ss)
|
||||
// 11110s 2 UInt(s)
|
||||
//
|
||||
// So we 'or' (2 * -d) with our computed s to form imms.
|
||||
let s = ((d * 2).wrapping_neg() | (s - 1)) & 0x3f;
|
||||
debug_assert!(u8::try_from(r).is_ok());
|
||||
debug_assert!(u8::try_from(s).is_ok());
|
||||
Some(ImmLogic {
|
||||
value: original_value,
|
||||
n: out_n != 0,
|
||||
r: r as u8,
|
||||
s: s as u8,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn from_raw(value: u64, n: bool, r: u8, s: u8) -> ImmLogic {
|
||||
ImmLogic { n, r, s, value }
|
||||
}
|
||||
|
||||
/// Returns bits ready for encoding: (N:1, R:6, S:6)
|
||||
pub fn enc_bits(&self) -> u32 {
|
||||
((self.n as u32) << 12) | ((self.r as u32) << 6) | (self.s as u32)
|
||||
}
|
||||
|
||||
/// Returns the value that this immediate represents.
|
||||
pub fn value(&self) -> u64 {
|
||||
self.value
|
||||
}
|
||||
|
||||
/// Return an immediate for the bitwise-inverted value.
|
||||
pub fn invert(&self) -> ImmLogic {
|
||||
// For every ImmLogical immediate, the inverse can also be encoded.
|
||||
Self::maybe_from_u64(!self.value, I64).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
/// An immediate for shift instructions.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ImmShift {
|
||||
/// 6-bit shift amount.
|
||||
pub imm: u8,
|
||||
}
|
||||
|
||||
impl ImmShift {
|
||||
/// Create an ImmShift from raw bits, if possible.
|
||||
pub fn maybe_from_u64(val: u64) -> Option<ImmShift> {
|
||||
if val < 64 {
|
||||
Some(ImmShift { imm: val as u8 })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the immediate value.
|
||||
pub fn value(&self) -> u8 {
|
||||
self.imm
|
||||
}
|
||||
}
|
||||
|
||||
/// A 16-bit immediate for a MOVZ instruction, with a {0,16,32,48}-bit shift.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct MoveWideConst {
|
||||
/// The value.
|
||||
pub bits: u16,
|
||||
/// Result is `bits` shifted 16*shift bits to the left.
|
||||
pub shift: u8,
|
||||
}
|
||||
|
||||
impl MoveWideConst {
|
||||
/// Construct a MoveWideConst from an arbitrary 64-bit constant if possible.
|
||||
pub fn maybe_from_u64(value: u64) -> Option<MoveWideConst> {
|
||||
let mask0 = 0x0000_0000_0000_ffffu64;
|
||||
let mask1 = 0x0000_0000_ffff_0000u64;
|
||||
let mask2 = 0x0000_ffff_0000_0000u64;
|
||||
let mask3 = 0xffff_0000_0000_0000u64;
|
||||
|
||||
if value == (value & mask0) {
|
||||
return Some(MoveWideConst {
|
||||
bits: (value & mask0) as u16,
|
||||
shift: 0,
|
||||
});
|
||||
}
|
||||
if value == (value & mask1) {
|
||||
return Some(MoveWideConst {
|
||||
bits: ((value >> 16) & mask0) as u16,
|
||||
shift: 1,
|
||||
});
|
||||
}
|
||||
if value == (value & mask2) {
|
||||
return Some(MoveWideConst {
|
||||
bits: ((value >> 32) & mask0) as u16,
|
||||
shift: 2,
|
||||
});
|
||||
}
|
||||
if value == (value & mask3) {
|
||||
return Some(MoveWideConst {
|
||||
bits: ((value >> 48) & mask0) as u16,
|
||||
shift: 3,
|
||||
});
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn maybe_with_shift(imm: u16, shift: u8) -> Option<MoveWideConst> {
|
||||
let shift_enc = shift / 16;
|
||||
if shift_enc > 3 {
|
||||
None
|
||||
} else {
|
||||
Some(MoveWideConst {
|
||||
bits: imm,
|
||||
shift: shift_enc,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the value that this constant represents.
|
||||
pub fn value(&self) -> u64 {
|
||||
(self.bits as u64) << (16 * self.shift)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for Imm12 {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
let shift = if self.shift12 { 12 } else { 0 };
|
||||
let value = u32::from(self.bits) << shift;
|
||||
format!("#{}", value)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for SImm7Scaled {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("#{}", self.value)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for SImm9 {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("#{}", self.value)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for UImm12Scaled {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("#{}", self.value)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for ImmLogic {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("#{}", self.value())
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for ImmShift {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("#{}", self.imm)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for MoveWideConst {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
if self.shift == 0 {
|
||||
format!("#{}", self.bits)
|
||||
} else {
|
||||
format!("#{}, LSL #{}", self.bits, self.shift * 16)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn imm_logical_test() {
|
||||
assert_eq!(None, ImmLogic::maybe_from_u64(0, I64));
|
||||
assert_eq!(None, ImmLogic::maybe_from_u64(u64::max_value(), I64));
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 1,
|
||||
n: true,
|
||||
r: 0,
|
||||
s: 0
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(1, I64)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 2,
|
||||
n: true,
|
||||
r: 63,
|
||||
s: 0
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(2, I64)
|
||||
);
|
||||
|
||||
assert_eq!(None, ImmLogic::maybe_from_u64(5, I64));
|
||||
|
||||
assert_eq!(None, ImmLogic::maybe_from_u64(11, I64));
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 248,
|
||||
n: true,
|
||||
r: 61,
|
||||
s: 4
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(248, I64)
|
||||
);
|
||||
|
||||
assert_eq!(None, ImmLogic::maybe_from_u64(249, I64));
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 1920,
|
||||
n: true,
|
||||
r: 57,
|
||||
s: 3
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(1920, I64)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 0x7ffe,
|
||||
n: true,
|
||||
r: 63,
|
||||
s: 13
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x7ffe, I64)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 0x30000,
|
||||
n: true,
|
||||
r: 48,
|
||||
s: 1
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x30000, I64)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 0x100000,
|
||||
n: true,
|
||||
r: 44,
|
||||
s: 0
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x100000, I64)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: u64::max_value() - 1,
|
||||
n: true,
|
||||
r: 63,
|
||||
s: 62
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(u64::max_value() - 1, I64)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 0xaaaaaaaaaaaaaaaa,
|
||||
n: false,
|
||||
r: 1,
|
||||
s: 60
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 0x8181818181818181,
|
||||
n: false,
|
||||
r: 1,
|
||||
s: 49
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x8181818181818181, I64)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 0xffc3ffc3ffc3ffc3,
|
||||
n: false,
|
||||
r: 10,
|
||||
s: 43
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 0x100000001,
|
||||
n: false,
|
||||
r: 0,
|
||||
s: 0
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x100000001, I64)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(ImmLogic {
|
||||
value: 0x1111111111111111,
|
||||
n: false,
|
||||
r: 0,
|
||||
s: 56
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x1111111111111111, I64)
|
||||
);
|
||||
|
||||
for n in 0..2 {
|
||||
let types = if n == 0 { vec![I64, I32] } else { vec![I64] };
|
||||
for s in 0..64 {
|
||||
for r in 0..64 {
|
||||
let imm = get_logical_imm(n, s, r);
|
||||
for &ty in &types {
|
||||
match ImmLogic::maybe_from_u64(imm, ty) {
|
||||
Some(ImmLogic { value, .. }) => {
|
||||
assert_eq!(imm, value);
|
||||
ImmLogic::maybe_from_u64(!value, ty).unwrap();
|
||||
}
|
||||
None => assert_eq!(0, imm),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Repeat a value that has `width` bits, across a 64-bit value.
|
||||
fn repeat(value: u64, width: u64) -> u64 {
|
||||
let mut result = value & ((1 << width) - 1);
|
||||
let mut i = width;
|
||||
while i < 64 {
|
||||
result |= result << i;
|
||||
i *= 2;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
// Get the logical immediate, from the encoding N/R/S bits.
|
||||
fn get_logical_imm(n: u32, s: u32, r: u32) -> u64 {
|
||||
// An integer is constructed from the n, imm_s and imm_r bits according to
|
||||
// the following table:
|
||||
//
|
||||
// N imms immr size S R
|
||||
// 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
|
||||
// 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
|
||||
// 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
|
||||
// 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
|
||||
// 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
|
||||
// 0 11110s xxxxxr 2 UInt(s) UInt(r)
|
||||
// (s bits must not be all set)
|
||||
//
|
||||
// A pattern is constructed of size bits, where the least significant S+1
|
||||
// bits are set. The pattern is rotated right by R, and repeated across a
|
||||
// 64-bit value.
|
||||
|
||||
if n == 1 {
|
||||
if s == 0x3f {
|
||||
return 0;
|
||||
}
|
||||
let bits = (1u64 << (s + 1)) - 1;
|
||||
bits.rotate_right(r)
|
||||
} else {
|
||||
if (s >> 1) == 0x1f {
|
||||
return 0;
|
||||
}
|
||||
let mut width = 0x20;
|
||||
while width >= 0x2 {
|
||||
if (s & width) == 0 {
|
||||
let mask = width - 1;
|
||||
if (s & mask) == mask {
|
||||
return 0;
|
||||
}
|
||||
let bits = (1u64 << ((s & mask) + 1)) - 1;
|
||||
return repeat(bits.rotate_right(r & mask), width.into());
|
||||
}
|
||||
width >>= 1;
|
||||
}
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
2541
cranelift/codegen/src/isa/aarch64/inst/mod.rs
Normal file
2541
cranelift/codegen/src/isa/aarch64/inst/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
270
cranelift/codegen/src/isa/aarch64/inst/regs.rs
Normal file
270
cranelift/codegen/src/isa/aarch64/inst/regs.rs
Normal file
@@ -0,0 +1,270 @@
|
||||
//! AArch64 ISA definitions: registers.
|
||||
|
||||
use crate::isa::aarch64::inst::InstSize;
|
||||
use crate::machinst::*;
|
||||
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES};
|
||||
|
||||
use std::string::{String, ToString};
|
||||
|
||||
//=============================================================================
|
||||
// Registers, the Universe thereof, and printing
|
||||
|
||||
#[rustfmt::skip]
|
||||
const XREG_INDICES: [u8; 31] = [
|
||||
// X0 - X7
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
// X8 - X14
|
||||
40, 41, 42, 43, 44, 45, 46,
|
||||
// X15
|
||||
59,
|
||||
// X16, X17
|
||||
47, 48,
|
||||
// X18
|
||||
60,
|
||||
// X19 - X28
|
||||
49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
|
||||
// X29
|
||||
61,
|
||||
// X30
|
||||
62,
|
||||
];
|
||||
|
||||
const ZERO_REG_INDEX: u8 = 63;
|
||||
|
||||
const SP_REG_INDEX: u8 = 64;
|
||||
|
||||
/// Get a reference to an X-register (integer register).
|
||||
pub fn xreg(num: u8) -> Reg {
|
||||
assert!(num < 31);
|
||||
Reg::new_real(
|
||||
RegClass::I64,
|
||||
/* enc = */ num,
|
||||
/* index = */ XREG_INDICES[num as usize],
|
||||
)
|
||||
}
|
||||
|
||||
/// Get a writable reference to an X-register.
|
||||
pub fn writable_xreg(num: u8) -> Writable<Reg> {
|
||||
Writable::from_reg(xreg(num))
|
||||
}
|
||||
|
||||
/// Get a reference to a V-register (vector/FP register).
|
||||
pub fn vreg(num: u8) -> Reg {
|
||||
assert!(num < 32);
|
||||
Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num)
|
||||
}
|
||||
|
||||
/// Get a writable reference to a V-register.
|
||||
pub fn writable_vreg(num: u8) -> Writable<Reg> {
|
||||
Writable::from_reg(vreg(num))
|
||||
}
|
||||
|
||||
/// Get a reference to the zero-register.
|
||||
pub fn zero_reg() -> Reg {
|
||||
// This should be the same as what xreg(31) returns, except that
|
||||
// we use the special index into the register index space.
|
||||
Reg::new_real(
|
||||
RegClass::I64,
|
||||
/* enc = */ 31,
|
||||
/* index = */ ZERO_REG_INDEX,
|
||||
)
|
||||
}
|
||||
|
||||
/// Get a writable reference to the zero-register (this discards a result).
|
||||
pub fn writable_zero_reg() -> Writable<Reg> {
|
||||
Writable::from_reg(zero_reg())
|
||||
}
|
||||
|
||||
/// Get a reference to the stack-pointer register.
|
||||
pub fn stack_reg() -> Reg {
|
||||
// XSP (stack) and XZR (zero) are logically different registers which have
|
||||
// the same hardware encoding, and whose meaning, in real aarch64
|
||||
// instructions, is context-dependent. For convenience of
|
||||
// universe-construction and for correct printing, we make them be two
|
||||
// different real registers.
|
||||
Reg::new_real(
|
||||
RegClass::I64,
|
||||
/* enc = */ 31,
|
||||
/* index = */ SP_REG_INDEX,
|
||||
)
|
||||
}
|
||||
|
||||
/// Get a writable reference to the stack-pointer register.
|
||||
pub fn writable_stack_reg() -> Writable<Reg> {
|
||||
Writable::from_reg(stack_reg())
|
||||
}
|
||||
|
||||
/// Get a reference to the link register (x30).
|
||||
pub fn link_reg() -> Reg {
|
||||
xreg(30)
|
||||
}
|
||||
|
||||
/// Get a writable reference to the link register.
|
||||
pub fn writable_link_reg() -> Writable<Reg> {
|
||||
Writable::from_reg(link_reg())
|
||||
}
|
||||
|
||||
/// Get a reference to the frame pointer (x29).
|
||||
pub fn fp_reg() -> Reg {
|
||||
xreg(29)
|
||||
}
|
||||
|
||||
/// Get a writable reference to the frame pointer.
|
||||
pub fn writable_fp_reg() -> Writable<Reg> {
|
||||
Writable::from_reg(fp_reg())
|
||||
}
|
||||
|
||||
/// Get a reference to the "spill temp" register. This register is used to
|
||||
/// compute the address of a spill slot when a direct offset addressing mode from
|
||||
/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc
|
||||
/// and reserve it for this purpose for simplicity; otherwise we need a
|
||||
/// multi-stage analysis where we first determine how many spill slots we have,
|
||||
/// then perhaps remove the reg from the pool and recompute regalloc.
|
||||
pub fn spilltmp_reg() -> Reg {
|
||||
xreg(15)
|
||||
}
|
||||
|
||||
/// Get a writable reference to the spilltmp reg.
|
||||
pub fn writable_spilltmp_reg() -> Writable<Reg> {
|
||||
Writable::from_reg(spilltmp_reg())
|
||||
}
|
||||
|
||||
/// Create the register universe for AArch64.
|
||||
pub fn create_reg_universe() -> RealRegUniverse {
|
||||
let mut regs = vec![];
|
||||
let mut allocable_by_class = [None; NUM_REG_CLASSES];
|
||||
|
||||
// Numbering Scheme: we put V-regs first, then X-regs. The X-regs
|
||||
// exclude several registers: x18 (globally reserved for platform-specific
|
||||
// purposes), x29 (frame pointer), x30 (link register), x31 (stack pointer
|
||||
// or zero register, depending on context).
|
||||
|
||||
let v_reg_base = 0u8; // in contiguous real-register index space
|
||||
let v_reg_count = 32;
|
||||
for i in 0u8..v_reg_count {
|
||||
let reg = Reg::new_real(
|
||||
RegClass::V128,
|
||||
/* enc = */ i,
|
||||
/* index = */ v_reg_base + i,
|
||||
)
|
||||
.to_real_reg();
|
||||
let name = format!("v{}", i);
|
||||
regs.push((reg, name));
|
||||
}
|
||||
let v_reg_last = v_reg_base + v_reg_count - 1;
|
||||
|
||||
// Add the X registers. N.B.: the order here must match the order implied
|
||||
// by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above.
|
||||
|
||||
let x_reg_base = 32u8; // in contiguous real-register index space
|
||||
let mut x_reg_count = 0;
|
||||
for i in 0u8..32u8 {
|
||||
// See above for excluded registers.
|
||||
if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 {
|
||||
continue;
|
||||
}
|
||||
let reg = Reg::new_real(
|
||||
RegClass::I64,
|
||||
/* enc = */ i,
|
||||
/* index = */ x_reg_base + x_reg_count,
|
||||
)
|
||||
.to_real_reg();
|
||||
let name = format!("x{}", i);
|
||||
regs.push((reg, name));
|
||||
x_reg_count += 1;
|
||||
}
|
||||
let x_reg_last = x_reg_base + x_reg_count - 1;
|
||||
|
||||
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: x_reg_base as usize,
|
||||
last: x_reg_last as usize,
|
||||
suggested_scratch: Some(XREG_INDICES[13] as usize),
|
||||
});
|
||||
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: v_reg_base as usize,
|
||||
last: v_reg_last as usize,
|
||||
suggested_scratch: Some(/* V31: */ 31),
|
||||
});
|
||||
|
||||
// Other regs, not available to the allocator.
|
||||
let allocable = regs.len();
|
||||
regs.push((xreg(15).to_real_reg(), "x15".to_string()));
|
||||
regs.push((xreg(18).to_real_reg(), "x18".to_string()));
|
||||
regs.push((fp_reg().to_real_reg(), "fp".to_string()));
|
||||
regs.push((link_reg().to_real_reg(), "lr".to_string()));
|
||||
regs.push((zero_reg().to_real_reg(), "xzr".to_string()));
|
||||
regs.push((stack_reg().to_real_reg(), "sp".to_string()));
|
||||
// FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs
|
||||
// to 65, which is potentially inconvenient from a compiler performance
|
||||
// standpoint. We could possibly drop back to 64 by "losing" a vector
|
||||
// register in future.
|
||||
|
||||
// Assert sanity: the indices in the register structs must match their
|
||||
// actual indices in the array.
|
||||
for (i, reg) in regs.iter().enumerate() {
|
||||
assert_eq!(i, reg.0.get_index());
|
||||
}
|
||||
|
||||
RealRegUniverse {
|
||||
regs,
|
||||
allocable,
|
||||
allocable_by_class,
|
||||
}
|
||||
}
|
||||
|
||||
/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
|
||||
/// its name at the 32-bit size.
|
||||
pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
if reg.get_class() != RegClass::I64 || !size.is32() {
|
||||
// We can't do any better.
|
||||
return s;
|
||||
}
|
||||
|
||||
if reg.is_real() {
|
||||
// Change (eg) "x42" into "w42" as appropriate
|
||||
if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") {
|
||||
s = "w".to_string() + &s[1..];
|
||||
}
|
||||
} else {
|
||||
// Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role
|
||||
if reg.get_class() == RegClass::I64 && size.is32() {
|
||||
s.push('w');
|
||||
}
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Show a vector register when its use as a 32-bit or 64-bit float is known.
|
||||
pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
if reg.get_class() != RegClass::V128 {
|
||||
return s;
|
||||
}
|
||||
let prefix = if size.is32() { "s" } else { "d" };
|
||||
s.replace_range(0..1, prefix);
|
||||
s
|
||||
}
|
||||
|
||||
/// Show a vector register used in a scalar context.
|
||||
pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
if reg.get_class() != RegClass::V128 {
|
||||
// We can't do any better.
|
||||
return s;
|
||||
}
|
||||
|
||||
if reg.is_real() {
|
||||
// Change (eg) "v0" into "d0".
|
||||
if reg.get_class() == RegClass::V128 && s.starts_with("v") {
|
||||
s.replace_range(0..1, "d");
|
||||
}
|
||||
} else {
|
||||
// Add a "d" suffix to RegClass::V128 vregs.
|
||||
if reg.get_class() == RegClass::V128 {
|
||||
s.push('d');
|
||||
}
|
||||
}
|
||||
s
|
||||
}
|
||||
2768
cranelift/codegen/src/isa/aarch64/lower.rs
Normal file
2768
cranelift/codegen/src/isa/aarch64/lower.rs
Normal file
File diff suppressed because it is too large
Load Diff
220
cranelift/codegen/src/isa/aarch64/mod.rs
Normal file
220
cranelift/codegen/src/isa/aarch64/mod.rs
Normal file
@@ -0,0 +1,220 @@
|
||||
//! ARM 64-bit Instruction Set Architecture.
|
||||
|
||||
use crate::ir::Function;
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::{
|
||||
compile, MachBackend, MachCompileResult, ShowWithRRU, TargetIsaAdapter, VCode,
|
||||
};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings;
|
||||
|
||||
use alloc::boxed::Box;
|
||||
|
||||
use regalloc::RealRegUniverse;
|
||||
use target_lexicon::{Aarch64Architecture, Architecture, Triple};
|
||||
|
||||
// New backend:
|
||||
mod abi;
|
||||
mod inst;
|
||||
mod lower;
|
||||
|
||||
use inst::create_reg_universe;
|
||||
|
||||
/// An AArch64 backend.
|
||||
pub struct AArch64Backend {
|
||||
triple: Triple,
|
||||
flags: settings::Flags,
|
||||
}
|
||||
|
||||
impl AArch64Backend {
|
||||
/// Create a new AArch64 backend with the given (shared) flags.
|
||||
pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
|
||||
AArch64Backend { triple, flags }
|
||||
}
|
||||
|
||||
fn compile_vcode(&self, func: &Function, flags: &settings::Flags) -> VCode<inst::Inst> {
|
||||
// This performs lowering to VCode, register-allocates the code, computes
|
||||
// block layout and finalizes branches. The result is ready for binary emission.
|
||||
let abi = Box::new(abi::AArch64ABIBody::new(func));
|
||||
compile::compile::<AArch64Backend>(func, self, abi, flags)
|
||||
}
|
||||
}
|
||||
|
||||
impl MachBackend for AArch64Backend {
|
||||
fn compile_function(
|
||||
&self,
|
||||
func: &Function,
|
||||
want_disasm: bool,
|
||||
) -> CodegenResult<MachCompileResult> {
|
||||
let flags = self.flags();
|
||||
let vcode = self.compile_vcode(func, flags);
|
||||
let sections = vcode.emit();
|
||||
let frame_size = vcode.frame_size();
|
||||
|
||||
let disasm = if want_disasm {
|
||||
Some(vcode.show_rru(Some(&create_reg_universe())))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(MachCompileResult {
|
||||
sections,
|
||||
frame_size,
|
||||
disasm,
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
"aarch64"
|
||||
}
|
||||
|
||||
fn triple(&self) -> Triple {
|
||||
self.triple.clone()
|
||||
}
|
||||
|
||||
fn flags(&self) -> &settings::Flags {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
fn reg_universe(&self) -> RealRegUniverse {
|
||||
create_reg_universe()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `isa::Builder`.
|
||||
pub fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
assert!(triple.architecture == Architecture::Aarch64(Aarch64Architecture::Aarch64));
|
||||
IsaBuilder {
|
||||
triple,
|
||||
setup: settings::builder(),
|
||||
constructor: |triple, shared_flags, _| {
|
||||
let backend = AArch64Backend::new_with_flags(triple, shared_flags);
|
||||
Box::new(TargetIsaAdapter::new(backend))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
|
||||
use crate::isa::CallConv;
|
||||
use crate::settings;
|
||||
use crate::settings::Configurable;
|
||||
use core::str::FromStr;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
#[test]
|
||||
fn test_compile_function() {
|
||||
let name = ExternalName::testcase("test0");
|
||||
let mut sig = Signature::new(CallConv::SystemV);
|
||||
sig.params.push(AbiParam::new(I32));
|
||||
sig.returns.push(AbiParam::new(I32));
|
||||
let mut func = Function::with_name_signature(name, sig);
|
||||
|
||||
let bb0 = func.dfg.make_block();
|
||||
let arg0 = func.dfg.append_block_param(bb0, I32);
|
||||
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_block(bb0);
|
||||
let v0 = pos.ins().iconst(I32, 0x1234);
|
||||
let v1 = pos.ins().iadd(arg0, v0);
|
||||
pos.ins().return_(&[v1]);
|
||||
|
||||
let mut shared_flags = settings::builder();
|
||||
shared_flags.set("opt_level", "none").unwrap();
|
||||
let backend = AArch64Backend::new_with_flags(
|
||||
Triple::from_str("aarch64").unwrap(),
|
||||
settings::Flags::new(shared_flags),
|
||||
);
|
||||
let sections = backend.compile_function(&mut func, false).unwrap().sections;
|
||||
let code = §ions.sections[0].data;
|
||||
|
||||
// stp x29, x30, [sp, #-16]!
|
||||
// mov x29, sp
|
||||
// mov x1, #0x1234
|
||||
// add w0, w0, w1
|
||||
// mov sp, x29
|
||||
// ldp x29, x30, [sp], #16
|
||||
// ret
|
||||
let golden = vec![
|
||||
0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0x81, 0x46, 0x82, 0xd2, 0x00, 0x00,
|
||||
0x01, 0x0b, 0xbf, 0x03, 0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_branch_lowering() {
|
||||
let name = ExternalName::testcase("test0");
|
||||
let mut sig = Signature::new(CallConv::SystemV);
|
||||
sig.params.push(AbiParam::new(I32));
|
||||
sig.returns.push(AbiParam::new(I32));
|
||||
let mut func = Function::with_name_signature(name, sig);
|
||||
|
||||
let bb0 = func.dfg.make_block();
|
||||
let arg0 = func.dfg.append_block_param(bb0, I32);
|
||||
let bb1 = func.dfg.make_block();
|
||||
let bb2 = func.dfg.make_block();
|
||||
let bb3 = func.dfg.make_block();
|
||||
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_block(bb0);
|
||||
let v0 = pos.ins().iconst(I32, 0x1234);
|
||||
let v1 = pos.ins().iadd(arg0, v0);
|
||||
pos.ins().brnz(v1, bb1, &[]);
|
||||
pos.ins().jump(bb2, &[]);
|
||||
pos.insert_block(bb1);
|
||||
pos.ins().brnz(v1, bb2, &[]);
|
||||
pos.ins().jump(bb3, &[]);
|
||||
pos.insert_block(bb2);
|
||||
let v2 = pos.ins().iadd(v1, v0);
|
||||
pos.ins().brnz(v2, bb2, &[]);
|
||||
pos.ins().jump(bb1, &[]);
|
||||
pos.insert_block(bb3);
|
||||
let v3 = pos.ins().isub(v1, v0);
|
||||
pos.ins().return_(&[v3]);
|
||||
|
||||
let mut shared_flags = settings::builder();
|
||||
shared_flags.set("opt_level", "none").unwrap();
|
||||
let backend = AArch64Backend::new_with_flags(
|
||||
Triple::from_str("aarch64").unwrap(),
|
||||
settings::Flags::new(shared_flags),
|
||||
);
|
||||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
.unwrap();
|
||||
let code = &result.sections.sections[0].data;
|
||||
|
||||
// stp x29, x30, [sp, #-16]!
|
||||
// mov x29, sp
|
||||
// mov x1, x0
|
||||
// mov x0, #0x1234
|
||||
// add w1, w1, w0
|
||||
// mov w2, w1
|
||||
// cbz x2, ...
|
||||
// mov w2, w1
|
||||
// cbz x2, ...
|
||||
// sub w0, w1, w0
|
||||
// mov sp, x29
|
||||
// ldp x29, x30, [sp], #16
|
||||
// ret
|
||||
// add w2, w1, w0
|
||||
// mov w2, w2
|
||||
// cbnz x2, ... <---- compound branch (cond / uncond)
|
||||
// b ... <----
|
||||
|
||||
let golden = vec![
|
||||
0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0xe1, 0x03, 0x00, 0xaa, 0x80, 0x46,
|
||||
0x82, 0xd2, 0x21, 0x00, 0x00, 0x0b, 0xe2, 0x03, 0x01, 0x2a, 0xe2, 0x00, 0x00, 0xb4,
|
||||
0xe2, 0x03, 0x01, 0x2a, 0xa2, 0x00, 0x00, 0xb5, 0x20, 0x00, 0x00, 0x4b, 0xbf, 0x03,
|
||||
0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6, 0x22, 0x00, 0x00, 0x0b,
|
||||
0xe2, 0x03, 0x02, 0x2a, 0xc2, 0xff, 0xff, 0xb5, 0xf7, 0xff, 0xff, 0x17,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden);
|
||||
}
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
//! ARM 64 ABI implementation.
|
||||
|
||||
use super::registers::{FPR, GPR};
|
||||
use crate::ir;
|
||||
use crate::isa::RegClass;
|
||||
use crate::regalloc::RegisterSet;
|
||||
use crate::settings as shared_settings;
|
||||
use alloc::borrow::Cow;
|
||||
|
||||
/// Legalize `sig`.
|
||||
pub fn legalize_signature(
|
||||
_sig: &mut Cow<ir::Signature>,
|
||||
_flags: &shared_settings::Flags,
|
||||
_current: bool,
|
||||
) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Get register class for a type appearing in a legalized signature.
|
||||
pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
|
||||
if ty.is_int() {
|
||||
GPR
|
||||
} else {
|
||||
FPR
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the set of allocatable registers for `func`.
|
||||
pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
|
||||
unimplemented!()
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
//! Emitting binary ARM64 machine code.
|
||||
|
||||
use crate::binemit::{bad_encoding, CodeSink};
|
||||
use crate::ir::{Function, Inst};
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::regalloc::RegDiversions;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/binemit-arm64.rs"));
|
||||
@@ -1,10 +0,0 @@
|
||||
//! Encoding tables for ARM64 ISA.
|
||||
|
||||
use crate::ir;
|
||||
use crate::isa;
|
||||
use crate::isa::constraints::*;
|
||||
use crate::isa::enc_tables::*;
|
||||
use crate::isa::encoding::RecipeSizing;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/encoding-arm64.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/legalize-arm64.rs"));
|
||||
@@ -1,132 +0,0 @@
|
||||
//! ARM 64-bit Instruction Set Architecture.
|
||||
|
||||
mod abi;
|
||||
mod binemit;
|
||||
mod enc_tables;
|
||||
mod registers;
|
||||
pub mod settings;
|
||||
|
||||
use super::super::settings as shared_settings;
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
use crate::binemit::CodeSink;
|
||||
use crate::binemit::{emit_function, MemoryCodeSink};
|
||||
use crate::ir;
|
||||
use crate::isa::enc_tables::{lookup_enclist, Encodings};
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
||||
use crate::regalloc;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use core::fmt;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
#[allow(dead_code)]
|
||||
struct Isa {
|
||||
triple: Triple,
|
||||
shared_flags: shared_settings::Flags,
|
||||
isa_flags: settings::Flags,
|
||||
}
|
||||
|
||||
/// Get an ISA builder for creating ARM64 targets.
|
||||
pub fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
IsaBuilder {
|
||||
triple,
|
||||
setup: settings::builder(),
|
||||
constructor: isa_constructor,
|
||||
}
|
||||
}
|
||||
|
||||
fn isa_constructor(
|
||||
triple: Triple,
|
||||
shared_flags: shared_settings::Flags,
|
||||
builder: shared_settings::Builder,
|
||||
) -> Box<dyn TargetIsa> {
|
||||
Box::new(Isa {
|
||||
triple,
|
||||
isa_flags: settings::Flags::new(&shared_flags, builder),
|
||||
shared_flags,
|
||||
})
|
||||
}
|
||||
|
||||
impl TargetIsa for Isa {
|
||||
fn name(&self) -> &'static str {
|
||||
"arm64"
|
||||
}
|
||||
|
||||
fn triple(&self) -> &Triple {
|
||||
&self.triple
|
||||
}
|
||||
|
||||
fn flags(&self) -> &shared_settings::Flags {
|
||||
&self.shared_flags
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
registers::INFO.clone()
|
||||
}
|
||||
|
||||
fn encoding_info(&self) -> EncInfo {
|
||||
enc_tables::INFO.clone()
|
||||
}
|
||||
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
func: &'a ir::Function,
|
||||
inst: &'a ir::InstructionData,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a> {
|
||||
lookup_enclist(
|
||||
ctrl_typevar,
|
||||
inst,
|
||||
func,
|
||||
&enc_tables::LEVEL1_A64[..],
|
||||
&enc_tables::LEVEL2[..],
|
||||
&enc_tables::ENCLISTS[..],
|
||||
&enc_tables::LEGALIZE_ACTIONS[..],
|
||||
&enc_tables::RECIPE_PREDICATES[..],
|
||||
&enc_tables::INST_PREDICATES[..],
|
||||
self.isa_flags.predicate_view(),
|
||||
)
|
||||
}
|
||||
|
||||
fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool) {
|
||||
abi::legalize_signature(sig, &self.shared_flags, current)
|
||||
}
|
||||
|
||||
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
|
||||
abi::regclass_for_abi_type(ty)
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
|
||||
abi::allocatable_registers(func)
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
fn emit_inst(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
inst: ir::Inst,
|
||||
divert: &mut regalloc::RegDiversions,
|
||||
sink: &mut dyn CodeSink,
|
||||
) {
|
||||
binemit::emit_inst(func, inst, divert, sink, self)
|
||||
}
|
||||
|
||||
fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
|
||||
emit_function(func, binemit::emit_inst, sink, self)
|
||||
}
|
||||
|
||||
fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
ir::condcodes::IntCC::UnsignedLessThan
|
||||
}
|
||||
|
||||
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
ir::condcodes::IntCC::UnsignedGreaterThanOrEqual
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Isa {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
//! ARM64 register descriptions.
|
||||
|
||||
use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/registers-arm64.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::INFO;
|
||||
use crate::isa::RegUnit;
|
||||
use alloc::string::{String, ToString};
|
||||
|
||||
#[test]
|
||||
fn unit_encodings() {
|
||||
assert_eq!(INFO.parse_regunit("x0"), Some(0));
|
||||
assert_eq!(INFO.parse_regunit("x31"), Some(31));
|
||||
assert_eq!(INFO.parse_regunit("v0"), Some(32));
|
||||
assert_eq!(INFO.parse_regunit("v31"), Some(63));
|
||||
|
||||
assert_eq!(INFO.parse_regunit("x32"), None);
|
||||
assert_eq!(INFO.parse_regunit("v32"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unit_names() {
|
||||
fn uname(ru: RegUnit) -> String {
|
||||
INFO.display_regunit(ru).to_string()
|
||||
}
|
||||
|
||||
assert_eq!(uname(0), "%x0");
|
||||
assert_eq!(uname(1), "%x1");
|
||||
assert_eq!(uname(31), "%x31");
|
||||
assert_eq!(uname(32), "%v0");
|
||||
assert_eq!(uname(33), "%v1");
|
||||
assert_eq!(uname(63), "%v31");
|
||||
assert_eq!(uname(64), "%nzcv");
|
||||
assert_eq!(uname(65), "%INVALID65");
|
||||
}
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
//! ARM64 Settings.
|
||||
|
||||
use crate::settings::{self, detail, Builder};
|
||||
use core::fmt;
|
||||
|
||||
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a
|
||||
// public `Flags` struct with an impl for all of the settings defined in
|
||||
// `cranelift-codegen/meta/src/isa/arm64/mod.rs`.
|
||||
include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));
|
||||
@@ -48,6 +48,7 @@ pub use crate::isa::call_conv::CallConv;
|
||||
pub use crate::isa::constraints::{
|
||||
BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints,
|
||||
};
|
||||
pub use crate::isa::enc_tables::Encodings;
|
||||
pub use crate::isa::encoding::{base_size, EncInfo, Encoding};
|
||||
pub use crate::isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit};
|
||||
pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
|
||||
@@ -55,9 +56,9 @@ pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
|
||||
use crate::binemit;
|
||||
use crate::flowgraph;
|
||||
use crate::ir;
|
||||
use crate::isa::enc_tables::Encodings;
|
||||
#[cfg(feature = "unwind")]
|
||||
use crate::isa::fde::RegisterMappingError;
|
||||
#[cfg(feature = "unwind")]
|
||||
use crate::machinst::MachBackend;
|
||||
use crate::regalloc;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings;
|
||||
@@ -83,7 +84,7 @@ pub mod fde;
|
||||
mod arm32;
|
||||
|
||||
#[cfg(feature = "arm64")]
|
||||
mod arm64;
|
||||
mod aarch64;
|
||||
|
||||
mod call_conv;
|
||||
mod constraints;
|
||||
@@ -92,6 +93,9 @@ mod encoding;
|
||||
pub mod registers;
|
||||
mod stack;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_utils;
|
||||
|
||||
/// Returns a builder that can create a corresponding `TargetIsa`
|
||||
/// or `Err(LookupError::SupportDisabled)` if not enabled.
|
||||
macro_rules! isa_builder {
|
||||
@@ -116,7 +120,7 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
|
||||
isa_builder!(x86, "x86", triple)
|
||||
}
|
||||
Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
|
||||
Architecture::Aarch64 { .. } => isa_builder!(arm64, "arm64", triple),
|
||||
Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple),
|
||||
_ => Err(LookupError::Unsupported),
|
||||
}
|
||||
}
|
||||
@@ -402,6 +406,11 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
|
||||
// No-op by default
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the new-style MachBackend, if this is an adapter around one.
|
||||
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for &dyn TargetIsa {
|
||||
|
||||
88
cranelift/codegen/src/isa/test_utils.rs
Normal file
88
cranelift/codegen/src/isa/test_utils.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
// This is unused when no platforms with the new backend are enabled.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
|
||||
use crate::ir::Value;
|
||||
use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, Opcode, SourceLoc, TrapCode};
|
||||
use crate::isa::TargetIsa;
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use std::string::String;
|
||||
|
||||
pub struct TestCodeSink {
|
||||
bytes: Vec<u8>,
|
||||
}
|
||||
|
||||
impl TestCodeSink {
|
||||
/// Create a new TestCodeSink.
|
||||
pub fn new() -> TestCodeSink {
|
||||
TestCodeSink { bytes: vec![] }
|
||||
}
|
||||
|
||||
/// Return the code emitted to this sink as a hex string.
|
||||
pub fn stringify(&self) -> String {
|
||||
// This is pretty lame, but whatever ..
|
||||
use std::fmt::Write;
|
||||
let mut s = String::with_capacity(self.bytes.len() * 2);
|
||||
for b in &self.bytes {
|
||||
write!(&mut s, "{:02X}", b).unwrap();
|
||||
}
|
||||
s
|
||||
}
|
||||
}
|
||||
|
||||
impl CodeSink for TestCodeSink {
|
||||
fn offset(&self) -> CodeOffset {
|
||||
self.bytes.len() as CodeOffset
|
||||
}
|
||||
|
||||
fn put1(&mut self, x: u8) {
|
||||
self.bytes.push(x);
|
||||
}
|
||||
|
||||
fn put2(&mut self, x: u16) {
|
||||
self.bytes.push((x >> 0) as u8);
|
||||
self.bytes.push((x >> 8) as u8);
|
||||
}
|
||||
|
||||
fn put4(&mut self, mut x: u32) {
|
||||
for _ in 0..4 {
|
||||
self.bytes.push(x as u8);
|
||||
x >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
fn put8(&mut self, mut x: u64) {
|
||||
for _ in 0..8 {
|
||||
self.bytes.push(x as u8);
|
||||
x >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
fn reloc_block(&mut self, _rel: Reloc, _block_offset: CodeOffset) {}
|
||||
|
||||
fn reloc_external(
|
||||
&mut self,
|
||||
_srcloc: SourceLoc,
|
||||
_rel: Reloc,
|
||||
_name: &ExternalName,
|
||||
_addend: Addend,
|
||||
) {
|
||||
}
|
||||
|
||||
fn reloc_constant(&mut self, _rel: Reloc, _constant_offset: ConstantOffset) {}
|
||||
|
||||
fn reloc_jt(&mut self, _rel: Reloc, _jt: JumpTable) {}
|
||||
|
||||
fn trap(&mut self, _code: TrapCode, _srcloc: SourceLoc) {}
|
||||
|
||||
fn begin_jumptables(&mut self) {}
|
||||
|
||||
fn begin_rodata(&mut self) {}
|
||||
|
||||
fn end_codegen(&mut self) {}
|
||||
|
||||
fn add_stackmap(&mut self, _val_list: &[Value], _func: &Function, _isa: &dyn TargetIsa) {}
|
||||
|
||||
fn add_call_site(&mut self, _opcode: Opcode, _srcloc: SourceLoc) {}
|
||||
}
|
||||
@@ -196,6 +196,55 @@ pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, is
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform a simple legalization by expansion of the function, without
|
||||
/// platform-specific transforms.
|
||||
pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
|
||||
let mut pos = FuncCursor::new(func);
|
||||
let func_begin = pos.position();
|
||||
pos.set_position(func_begin);
|
||||
while let Some(_block) = pos.next_block() {
|
||||
let mut prev_pos = pos.position();
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
let expanded = match pos.func.dfg[inst].opcode() {
|
||||
ir::Opcode::BrIcmp
|
||||
| ir::Opcode::GlobalValue
|
||||
| ir::Opcode::HeapAddr
|
||||
| ir::Opcode::StackLoad
|
||||
| ir::Opcode::StackStore
|
||||
| ir::Opcode::TableAddr
|
||||
| ir::Opcode::Trapnz
|
||||
| ir::Opcode::Trapz
|
||||
| ir::Opcode::BandImm
|
||||
| ir::Opcode::BorImm
|
||||
| ir::Opcode::BxorImm
|
||||
| ir::Opcode::IaddImm
|
||||
| ir::Opcode::IfcmpImm
|
||||
| ir::Opcode::ImulImm
|
||||
| ir::Opcode::IrsubImm
|
||||
| ir::Opcode::IshlImm
|
||||
| ir::Opcode::RotlImm
|
||||
| ir::Opcode::RotrImm
|
||||
| ir::Opcode::SdivImm
|
||||
| ir::Opcode::SremImm
|
||||
| ir::Opcode::SshrImm
|
||||
| ir::Opcode::UdivImm
|
||||
| ir::Opcode::UremImm
|
||||
| ir::Opcode::UshrImm
|
||||
| ir::Opcode::IcmpImm => expand(inst, &mut pos.func, cfg, isa),
|
||||
_ => false,
|
||||
};
|
||||
|
||||
if expanded {
|
||||
// Legalization implementations require fixpoint loop
|
||||
// here. TODO: fix this.
|
||||
pos.set_position(prev_pos);
|
||||
} else {
|
||||
prev_pos = pos.position();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Include legalization patterns that were generated by `gen_legalizer.rs` from the
|
||||
// `TransformGroup` in `cranelift-codegen/meta/shared/legalize.rs`.
|
||||
//
|
||||
|
||||
@@ -71,6 +71,7 @@ pub mod flowgraph;
|
||||
pub mod ir;
|
||||
pub mod isa;
|
||||
pub mod loop_analysis;
|
||||
pub mod machinst;
|
||||
pub mod print_errors;
|
||||
pub mod settings;
|
||||
pub mod timing;
|
||||
@@ -86,10 +87,12 @@ mod context;
|
||||
mod dce;
|
||||
mod divconst_magic_numbers;
|
||||
mod fx;
|
||||
mod inst_predicates;
|
||||
mod iterators;
|
||||
mod legalizer;
|
||||
mod licm;
|
||||
mod nan_canonicalization;
|
||||
mod num_uses;
|
||||
mod partition_slice;
|
||||
mod postopt;
|
||||
mod predicates;
|
||||
|
||||
149
cranelift/codegen/src/machinst/abi.rs
Normal file
149
cranelift/codegen/src/machinst/abi.rs
Normal file
@@ -0,0 +1,149 @@
|
||||
//! ABI definitions.
|
||||
|
||||
use crate::ir::StackSlot;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
use regalloc::{Reg, Set, SpillSlot, Writable};
|
||||
|
||||
/// Trait implemented by an object that tracks ABI-related state (e.g., stack
|
||||
/// layout) and can generate code while emitting the *body* of a function.
|
||||
pub trait ABIBody {
|
||||
/// The instruction type for the ISA associated with this ABI.
|
||||
type I: VCodeInst;
|
||||
|
||||
/// Get the liveins of the function.
|
||||
fn liveins(&self) -> Set<RealReg>;
|
||||
|
||||
/// Get the liveouts of the function.
|
||||
fn liveouts(&self) -> Set<RealReg>;
|
||||
|
||||
/// Number of arguments.
|
||||
fn num_args(&self) -> usize;
|
||||
|
||||
/// Number of return values.
|
||||
fn num_retvals(&self) -> usize;
|
||||
|
||||
/// Number of stack slots (not spill slots).
|
||||
fn num_stackslots(&self) -> usize;
|
||||
|
||||
/// Generate an instruction which copies an argument to a destination
|
||||
/// register.
|
||||
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
|
||||
|
||||
/// Generate an instruction which copies a source register to a return
|
||||
/// value slot.
|
||||
fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> Self::I;
|
||||
|
||||
/// Generate a return instruction.
|
||||
fn gen_ret(&self) -> Self::I;
|
||||
|
||||
/// Generate an epilogue placeholder. The returned instruction should return `true` from
|
||||
/// `is_epilogue_placeholder()`; this is used to indicate to the lowering driver when
|
||||
/// the epilogue should be inserted.
|
||||
fn gen_epilogue_placeholder(&self) -> Self::I;
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Every function above this line may only be called pre-regalloc.
|
||||
// Every function below this line may only be called post-regalloc.
|
||||
// `spillslots()` must be called before any other post-regalloc
|
||||
// function.
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
/// Update with the number of spillslots, post-regalloc.
|
||||
fn set_num_spillslots(&mut self, slots: usize);
|
||||
|
||||
/// Update with the clobbered registers, post-regalloc.
|
||||
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>);
|
||||
|
||||
/// Load from a stackslot.
|
||||
fn load_stackslot(
|
||||
&self,
|
||||
slot: StackSlot,
|
||||
offset: u32,
|
||||
ty: Type,
|
||||
into_reg: Writable<Reg>,
|
||||
) -> Self::I;
|
||||
|
||||
/// Store to a stackslot.
|
||||
fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Self::I;
|
||||
|
||||
/// Load from a spillslot.
|
||||
fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Self::I;
|
||||
|
||||
/// Store to a spillslot.
|
||||
fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Self::I;
|
||||
|
||||
/// Generate a prologue, post-regalloc. This should include any stack
|
||||
/// frame or other setup necessary to use the other methods (`load_arg`,
|
||||
/// `store_retval`, and spillslot accesses.) `self` is mutable so that we
|
||||
/// can store information in it which will be useful when creating the
|
||||
/// epilogue.
|
||||
fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<Self::I>;
|
||||
|
||||
/// Generate an epilogue, post-regalloc. Note that this must generate the
|
||||
/// actual return instruction (rather than emitting this in the lowering
|
||||
/// logic), because the epilogue code comes before the return and the two are
|
||||
/// likely closely related.
|
||||
fn gen_epilogue(&self, flags: &settings::Flags) -> Vec<Self::I>;
|
||||
|
||||
/// Returns the full frame size for the given function, after prologue emission has run. This
|
||||
/// comprises the spill space, incoming argument space, alignment padding, etc.
|
||||
fn frame_size(&self) -> u32;
|
||||
|
||||
/// Get the spill-slot size.
|
||||
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;
|
||||
|
||||
/// Generate a spill.
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Self::I;
|
||||
|
||||
/// Generate a reload (fill).
|
||||
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> Self::I;
|
||||
}
|
||||
|
||||
/// Trait implemented by an object that tracks ABI-related state and can
|
||||
/// generate code while emitting a *call* to a function.
|
||||
///
|
||||
/// An instance of this trait returns information for a *particular*
|
||||
/// callsite. It will usually be computed from the called function's
|
||||
/// signature.
|
||||
///
|
||||
/// Unlike `ABIBody` above, methods on this trait are not invoked directly
|
||||
/// by the machine-independent code. Rather, the machine-specific lowering
|
||||
/// code will typically create an `ABICall` when creating machine instructions
|
||||
/// for an IR call instruction inside `lower()`, directly emit the arg and
|
||||
/// and retval copies, and attach the register use/def info to the call.
|
||||
///
|
||||
/// This trait is thus provided for convenience to the backends.
|
||||
pub trait ABICall {
|
||||
/// The instruction type for the ISA associated with this ABI.
|
||||
type I: VCodeInst;
|
||||
|
||||
/// Get the number of arguments expected.
|
||||
fn num_args(&self) -> usize;
|
||||
|
||||
/// Save the clobbered registers.
|
||||
/// Copy an argument value from a source register, prior to the call.
|
||||
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Self::I;
|
||||
|
||||
/// Copy a return value into a destination register, after the call returns.
|
||||
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
|
||||
|
||||
/// Pre-adjust the stack, prior to argument copies and call.
|
||||
fn gen_stack_pre_adjust(&self) -> Vec<Self::I>;
|
||||
|
||||
/// Post-adjust the satck, after call return and return-value copies.
|
||||
fn gen_stack_post_adjust(&self) -> Vec<Self::I>;
|
||||
|
||||
/// Generate the call itself.
|
||||
///
|
||||
/// The returned instruction should have proper use- and def-sets according
|
||||
/// to the argument registers, return-value registers, and clobbered
|
||||
/// registers for this function signature in this ABI.
|
||||
///
|
||||
/// (Arg registers are uses, and retval registers are defs. Clobbered
|
||||
/// registers are also logically defs, but should never be read; their
|
||||
/// values are "defined" (to the regalloc) but "undefined" in every other
|
||||
/// sense.)
|
||||
fn gen_call(&self) -> Vec<Self::I>;
|
||||
}
|
||||
130
cranelift/codegen/src/machinst/adapter.rs
Normal file
130
cranelift/codegen/src/machinst/adapter.rs
Normal file
@@ -0,0 +1,130 @@
|
||||
//! Adapter for a `MachBackend` to implement the `TargetIsa` trait.
|
||||
|
||||
use crate::binemit;
|
||||
use crate::ir;
|
||||
use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
|
||||
use crate::machinst::*;
|
||||
use crate::regalloc::RegisterSet;
|
||||
use crate::settings::Flags;
|
||||
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
use crate::regalloc::RegDiversions;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
/// A wrapper around a `MachBackend` that provides a `TargetIsa` impl.
|
||||
pub struct TargetIsaAdapter {
|
||||
backend: Box<dyn MachBackend + Send + Sync + 'static>,
|
||||
triple: Triple,
|
||||
}
|
||||
|
||||
impl TargetIsaAdapter {
|
||||
/// Create a new `TargetIsa` wrapper around a `MachBackend`.
|
||||
pub fn new<B: MachBackend + Send + Sync + 'static>(backend: B) -> TargetIsaAdapter {
|
||||
let triple = backend.triple();
|
||||
TargetIsaAdapter {
|
||||
backend: Box::new(backend),
|
||||
triple,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TargetIsaAdapter {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("MachBackend")
|
||||
.field("name", &self.backend.name())
|
||||
.field("triple", &self.backend.triple())
|
||||
.field("flags", &format!("{}", self.backend.flags()))
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl TargetIsa for TargetIsaAdapter {
|
||||
fn name(&self) -> &'static str {
|
||||
self.backend.name()
|
||||
}
|
||||
|
||||
fn triple(&self) -> &Triple {
|
||||
&self.triple
|
||||
}
|
||||
|
||||
fn flags(&self) -> &Flags {
|
||||
self.backend.flags()
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
// Called from function's Display impl, so we need a stub here.
|
||||
RegInfo {
|
||||
banks: &[],
|
||||
classes: &[],
|
||||
}
|
||||
}
|
||||
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
_func: &'a ir::Function,
|
||||
_inst: &'a ir::InstructionData,
|
||||
_ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a> {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn encode(
|
||||
&self,
|
||||
_func: &ir::Function,
|
||||
_inst: &ir::InstructionData,
|
||||
_ctrl_typevar: ir::Type,
|
||||
) -> Result<Encoding, Legalize> {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn encoding_info(&self) -> EncInfo {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn legalize_signature(&self, _sig: &mut Cow<ir::Signature>, _current: bool) {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn regclass_for_abi_type(&self, _ty: ir::Type) -> RegClass {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, _func: &ir::Function) -> RegisterSet {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn prologue_epilogue(&self, _func: &mut ir::Function) -> CodegenResult<()> {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
fn emit_inst(
|
||||
&self,
|
||||
_func: &ir::Function,
|
||||
_inst: ir::Inst,
|
||||
_divert: &mut RegDiversions,
|
||||
_sink: &mut dyn binemit::CodeSink,
|
||||
) {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
/// Emit a whole function into memory.
|
||||
fn emit_function_to_memory(&self, _func: &ir::Function, _sink: &mut binemit::MemoryCodeSink) {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
|
||||
Some(&*self.backend)
|
||||
}
|
||||
|
||||
fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
self.backend.unsigned_add_overflow_condition()
|
||||
}
|
||||
|
||||
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
self.backend.unsigned_sub_overflow_condition()
|
||||
}
|
||||
}
|
||||
59
cranelift/codegen/src/machinst/blockorder.rs
Normal file
59
cranelift/codegen/src/machinst/blockorder.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
//! Computation of basic block order in emitted code.
|
||||
|
||||
use crate::machinst::*;
|
||||
use regalloc::{BlockIx, Function};
|
||||
|
||||
/// Simple reverse postorder-based block order emission.
|
||||
///
|
||||
/// TODO: use a proper algorithm, such as the bottom-up straight-line-section
|
||||
/// construction algorithm.
|
||||
struct BlockRPO {
|
||||
visited: Vec<bool>,
|
||||
postorder: Vec<BlockIndex>,
|
||||
deferred_last: Option<BlockIndex>,
|
||||
}
|
||||
|
||||
impl BlockRPO {
|
||||
fn new<I: VCodeInst>(vcode: &VCode<I>) -> BlockRPO {
|
||||
BlockRPO {
|
||||
visited: vec![false; vcode.num_blocks()],
|
||||
postorder: vec![],
|
||||
deferred_last: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn visit<I: VCodeInst>(&mut self, vcode: &VCode<I>, block: BlockIndex) {
|
||||
self.visited[block as usize] = true;
|
||||
for succ in vcode.succs(block) {
|
||||
if !self.visited[*succ as usize] {
|
||||
self.visit(vcode, *succ);
|
||||
}
|
||||
}
|
||||
|
||||
for i in vcode.block_insns(BlockIx::new(block)) {
|
||||
if vcode.get_insn(i).is_epilogue_placeholder() {
|
||||
debug_assert!(self.deferred_last.is_none());
|
||||
self.deferred_last = Some(block);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
self.postorder.push(block);
|
||||
}
|
||||
|
||||
fn rpo(self) -> Vec<BlockIndex> {
|
||||
let mut rpo = self.postorder;
|
||||
rpo.reverse();
|
||||
if let Some(block) = self.deferred_last {
|
||||
rpo.push(block);
|
||||
}
|
||||
rpo
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the final block order.
|
||||
pub fn compute_final_block_order<I: VCodeInst>(vcode: &VCode<I>) -> Vec<BlockIndex> {
|
||||
let mut rpo = BlockRPO::new(vcode);
|
||||
rpo.visit(vcode, vcode.entry());
|
||||
rpo.rpo()
|
||||
}
|
||||
63
cranelift/codegen/src/machinst/compile.rs
Normal file
63
cranelift/codegen/src/machinst/compile.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
//! Compilation backend pipeline: optimized IR to VCode / binemit.
|
||||
|
||||
use crate::ir::Function;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
use crate::timing;
|
||||
|
||||
use log::debug;
|
||||
use regalloc::{allocate_registers, RegAllocAlgorithm};
|
||||
|
||||
/// Compile the given function down to VCode with allocated registers, ready
|
||||
/// for binary emission.
|
||||
pub fn compile<B: LowerBackend>(
|
||||
f: &Function,
|
||||
b: &B,
|
||||
abi: Box<dyn ABIBody<I = B::MInst>>,
|
||||
flags: &settings::Flags,
|
||||
) -> VCode<B::MInst>
|
||||
where
|
||||
B::MInst: ShowWithRRU,
|
||||
{
|
||||
// This lowers the CL IR.
|
||||
let mut vcode = Lower::new(f, abi).lower(b);
|
||||
|
||||
let universe = &B::MInst::reg_universe();
|
||||
|
||||
debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe)));
|
||||
|
||||
// Perform register allocation.
|
||||
// TODO: select register allocation algorithm from flags.
|
||||
let algorithm = RegAllocAlgorithm::Backtracking;
|
||||
let result = {
|
||||
let _tt = timing::regalloc();
|
||||
allocate_registers(
|
||||
&mut vcode, algorithm, universe, /*request_block_annotations=*/ false,
|
||||
)
|
||||
.map_err(|err| {
|
||||
debug!(
|
||||
"Register allocation error for vcode\n{}\nError: {:?}",
|
||||
vcode.show_rru(Some(universe)),
|
||||
err
|
||||
);
|
||||
err
|
||||
})
|
||||
.expect("register allocation")
|
||||
};
|
||||
|
||||
// Reorder vcode into final order and copy out final instruction sequence
|
||||
// all at once. This also inserts prologues/epilogues.
|
||||
vcode.replace_insns_from_regalloc(result, flags);
|
||||
|
||||
vcode.remove_redundant_branches();
|
||||
|
||||
// Do final passes over code to finalize branches.
|
||||
vcode.finalize_branches();
|
||||
|
||||
debug!(
|
||||
"vcode after regalloc: final version:\n{}",
|
||||
vcode.show_rru(Some(universe))
|
||||
);
|
||||
|
||||
vcode
|
||||
}
|
||||
720
cranelift/codegen/src/machinst/lower.rs
Normal file
720
cranelift/codegen/src/machinst/lower.rs
Normal file
@@ -0,0 +1,720 @@
|
||||
//! This module implements lowering (instruction selection) from Cranelift IR
|
||||
//! to machine instructions with virtual registers. This is *almost* the final
|
||||
//! machine code, except for register allocation.
|
||||
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::inst_predicates::has_side_effect;
|
||||
use crate::ir::instructions::BranchInfo;
|
||||
use crate::ir::{
|
||||
Block, ExternalName, Function, GlobalValueData, Inst, InstructionData, MemFlags, Opcode,
|
||||
Signature, SourceLoc, Type, Value, ValueDef,
|
||||
};
|
||||
use crate::machinst::{ABIBody, BlockIndex, VCode, VCodeBuilder, VCodeInst};
|
||||
use crate::num_uses::NumUses;
|
||||
|
||||
use regalloc::{Reg, RegClass, Set, VirtualReg, Writable};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use log::debug;
|
||||
use smallvec::SmallVec;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
/// A context that machine-specific lowering code can use to emit lowered instructions. This is the
|
||||
/// view of the machine-independent per-function lowering context that is seen by the machine
|
||||
/// backend.
|
||||
pub trait LowerCtx {
|
||||
/// The instruction type for which this lowering framework is instantiated.
|
||||
type I;
|
||||
|
||||
/// Get the instdata for a given IR instruction.
|
||||
fn data(&self, ir_inst: Inst) -> &InstructionData;
|
||||
/// Get the controlling type for a polymorphic IR instruction.
|
||||
fn ty(&self, ir_inst: Inst) -> Type;
|
||||
/// Emit a machine instruction.
|
||||
fn emit(&mut self, mach_inst: Self::I);
|
||||
/// Indicate that an IR instruction has been merged, and so one of its
|
||||
/// uses is gone (replaced by uses of the instruction's inputs). This
|
||||
/// helps the lowering algorithm to perform on-the-fly DCE, skipping over
|
||||
/// unused instructions (such as immediates incorporated directly).
|
||||
fn merged(&mut self, from_inst: Inst);
|
||||
/// Get the producing instruction, if any, and output number, for the `idx`th input to the
|
||||
/// given IR instruction
|
||||
fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)>;
|
||||
/// Map a Value to its associated writable (probably virtual) Reg.
|
||||
fn value_to_writable_reg(&self, val: Value) -> Writable<Reg>;
|
||||
/// Map a Value to its associated (probably virtual) Reg.
|
||||
fn value_to_reg(&self, val: Value) -> Reg;
|
||||
/// Get the `idx`th input to the given IR instruction as a virtual register.
|
||||
fn input(&self, ir_inst: Inst, idx: usize) -> Reg;
|
||||
/// Get the `idx`th output of the given IR instruction as a virtual register.
|
||||
fn output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg>;
|
||||
/// Get the number of inputs to the given IR instruction.
|
||||
fn num_inputs(&self, ir_inst: Inst) -> usize;
|
||||
/// Get the number of outputs to the given IR instruction.
|
||||
fn num_outputs(&self, ir_inst: Inst) -> usize;
|
||||
/// Get the type for an instruction's input.
|
||||
fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type;
|
||||
/// Get the type for an instruction's output.
|
||||
fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type;
|
||||
/// Get a new temp.
|
||||
fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg>;
|
||||
/// Get the number of block params.
|
||||
fn num_bb_params(&self, bb: Block) -> usize;
|
||||
/// Get the register for a block param.
|
||||
fn bb_param(&self, bb: Block, idx: usize) -> Reg;
|
||||
/// Get the register for a return value.
|
||||
fn retval(&self, idx: usize) -> Writable<Reg>;
|
||||
/// Get the target for a call instruction, as an `ExternalName`.
|
||||
fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName>;
|
||||
/// Get the signature for a call or call-indirect instruction.
|
||||
fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature>;
|
||||
/// Get the symbol name and offset for a symbol_value instruction.
|
||||
fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)>;
|
||||
/// Returns the memory flags of a given memory access.
|
||||
fn memflags(&self, ir_inst: Inst) -> Option<MemFlags>;
|
||||
/// Get the source location for a given instruction.
|
||||
fn srcloc(&self, ir_inst: Inst) -> SourceLoc;
|
||||
}
|
||||
|
||||
/// A machine backend.
|
||||
pub trait LowerBackend {
|
||||
/// The machine instruction type.
|
||||
type MInst: VCodeInst;
|
||||
|
||||
/// Lower a single instruction. Instructions are lowered in reverse order.
|
||||
/// This function need not handle branches; those are always passed to
|
||||
/// `lower_branch_group` below.
|
||||
fn lower<C: LowerCtx<I = Self::MInst>>(&self, ctx: &mut C, inst: Inst);
|
||||
|
||||
/// Lower a block-terminating group of branches (which together can be seen as one
|
||||
/// N-way branch), given a vcode BlockIndex for each target.
|
||||
fn lower_branch_group<C: LowerCtx<I = Self::MInst>>(
|
||||
&self,
|
||||
ctx: &mut C,
|
||||
insts: &[Inst],
|
||||
targets: &[BlockIndex],
|
||||
fallthrough: Option<BlockIndex>,
|
||||
);
|
||||
}
|
||||
|
||||
/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
|
||||
/// from original Inst to MachInsts.
|
||||
pub struct Lower<'a, I: VCodeInst> {
|
||||
/// The function to lower.
|
||||
f: &'a Function,
|
||||
|
||||
/// Lowered machine instructions.
|
||||
vcode: VCodeBuilder<I>,
|
||||
|
||||
/// Number of active uses (minus `dec_use()` calls by backend) of each instruction.
|
||||
num_uses: SecondaryMap<Inst, u32>,
|
||||
|
||||
/// Mapping from `Value` (SSA value in IR) to virtual register.
|
||||
value_regs: SecondaryMap<Value, Reg>,
|
||||
|
||||
/// Return-value vregs.
|
||||
retval_regs: Vec<Reg>,
|
||||
|
||||
/// Next virtual register number to allocate.
|
||||
next_vreg: u32,
|
||||
}
|
||||
|
||||
fn alloc_vreg(
|
||||
value_regs: &mut SecondaryMap<Value, Reg>,
|
||||
regclass: RegClass,
|
||||
value: Value,
|
||||
next_vreg: &mut u32,
|
||||
) -> VirtualReg {
|
||||
if value_regs[value].get_index() == 0 {
|
||||
// default value in map.
|
||||
let v = *next_vreg;
|
||||
*next_vreg += 1;
|
||||
value_regs[value] = Reg::new_virtual(regclass, v);
|
||||
}
|
||||
value_regs[value].as_virtual_reg().unwrap()
|
||||
}
|
||||
|
||||
enum GenerateReturn {
|
||||
Yes,
|
||||
No,
|
||||
}
|
||||
|
||||
impl<'a, I: VCodeInst> Lower<'a, I> {
|
||||
/// Prepare a new lowering context for the given IR function.
|
||||
pub fn new(f: &'a Function, abi: Box<dyn ABIBody<I = I>>) -> Lower<'a, I> {
|
||||
let mut vcode = VCodeBuilder::new(abi);
|
||||
|
||||
let num_uses = NumUses::compute(f).take_uses();
|
||||
|
||||
let mut next_vreg: u32 = 1;
|
||||
|
||||
// Default register should never be seen, but the `value_regs` map needs a default and we
|
||||
// don't want to push `Option` everywhere. All values will be assigned registers by the
|
||||
// loops over block parameters and instruction results below.
|
||||
//
|
||||
// We do not use vreg 0 so that we can detect any unassigned register that leaks through.
|
||||
let default_register = Reg::new_virtual(RegClass::I32, 0);
|
||||
let mut value_regs = SecondaryMap::with_default(default_register);
|
||||
|
||||
// Assign a vreg to each value.
|
||||
for bb in f.layout.blocks() {
|
||||
for param in f.dfg.block_params(bb) {
|
||||
let vreg = alloc_vreg(
|
||||
&mut value_regs,
|
||||
I::rc_for_type(f.dfg.value_type(*param)),
|
||||
*param,
|
||||
&mut next_vreg,
|
||||
);
|
||||
vcode.set_vreg_type(vreg, f.dfg.value_type(*param));
|
||||
}
|
||||
for inst in f.layout.block_insts(bb) {
|
||||
for result in f.dfg.inst_results(inst) {
|
||||
let vreg = alloc_vreg(
|
||||
&mut value_regs,
|
||||
I::rc_for_type(f.dfg.value_type(*result)),
|
||||
*result,
|
||||
&mut next_vreg,
|
||||
);
|
||||
vcode.set_vreg_type(vreg, f.dfg.value_type(*result));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assign a vreg to each return value.
|
||||
let mut retval_regs = vec![];
|
||||
for ret in &f.signature.returns {
|
||||
let v = next_vreg;
|
||||
next_vreg += 1;
|
||||
let regclass = I::rc_for_type(ret.value_type);
|
||||
let vreg = Reg::new_virtual(regclass, v);
|
||||
retval_regs.push(vreg);
|
||||
vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ret.value_type);
|
||||
}
|
||||
|
||||
Lower {
|
||||
f,
|
||||
vcode,
|
||||
num_uses,
|
||||
value_regs,
|
||||
retval_regs,
|
||||
next_vreg,
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_arg_setup(&mut self) {
|
||||
if let Some(entry_bb) = self.f.layout.entry_block() {
|
||||
debug!(
|
||||
"gen_arg_setup: entry BB {} args are:\n{:?}",
|
||||
entry_bb,
|
||||
self.f.dfg.block_params(entry_bb)
|
||||
);
|
||||
for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {
|
||||
let reg = Writable::from_reg(self.value_regs[*param]);
|
||||
let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg);
|
||||
self.vcode.push(insn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) {
|
||||
for (i, reg) in self.retval_regs.iter().enumerate() {
|
||||
let insn = self.vcode.abi().gen_copy_reg_to_retval(i, *reg);
|
||||
self.vcode.push(insn);
|
||||
}
|
||||
let inst = match gen_ret_inst {
|
||||
GenerateReturn::Yes => self.vcode.abi().gen_ret(),
|
||||
GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(),
|
||||
};
|
||||
self.vcode.push(inst);
|
||||
}
|
||||
|
||||
fn find_reachable_bbs(&self) -> SmallVec<[Block; 16]> {
|
||||
if let Some(entry) = self.f.layout.entry_block() {
|
||||
let mut ret = SmallVec::new();
|
||||
let mut queue = VecDeque::new();
|
||||
let mut visited = SecondaryMap::with_default(false);
|
||||
queue.push_back(entry);
|
||||
visited[entry] = true;
|
||||
while !queue.is_empty() {
|
||||
let b = queue.pop_front().unwrap();
|
||||
ret.push(b);
|
||||
let mut succs: SmallVec<[Block; 16]> = SmallVec::new();
|
||||
for inst in self.f.layout.block_insts(b) {
|
||||
if self.f.dfg[inst].opcode().is_branch() {
|
||||
visit_branch_targets(self.f, b, inst, |succ| {
|
||||
succs.push(succ);
|
||||
});
|
||||
}
|
||||
}
|
||||
for succ in succs.into_iter() {
|
||||
if !visited[succ] {
|
||||
queue.push_back(succ);
|
||||
visited[succ] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret
|
||||
} else {
|
||||
SmallVec::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Lower the function.
|
||||
pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> VCode<I> {
|
||||
// Find all reachable blocks.
|
||||
let bbs = self.find_reachable_bbs();
|
||||
|
||||
// This records a Block-to-BlockIndex map so that branch targets can be resolved.
|
||||
let mut next_bindex = self.vcode.init_bb_map(&bbs[..]);
|
||||
|
||||
// Allocate a separate BlockIndex for each control-flow instruction so that we can create
|
||||
// the edge blocks later. Each entry for a control-flow inst is the edge block; the list
|
||||
// has (control flow inst, edge block, orig block) tuples.
|
||||
let mut edge_blocks_by_inst: SecondaryMap<Inst, Vec<BlockIndex>> =
|
||||
SecondaryMap::with_default(vec![]);
|
||||
let mut edge_blocks: Vec<(Inst, BlockIndex, Block)> = vec![];
|
||||
|
||||
debug!("about to lower function: {:?}", self.f);
|
||||
debug!("bb map: {:?}", self.vcode.blocks_by_bb());
|
||||
|
||||
// Work backward (reverse block order, reverse through each block), skipping insns with zero
|
||||
// uses.
|
||||
for bb in bbs.iter().rev() {
|
||||
for inst in self.f.layout.block_insts(*bb) {
|
||||
let op = self.f.dfg[inst].opcode();
|
||||
if op.is_branch() {
|
||||
// Find the original target.
|
||||
let mut add_succ = |next_bb| {
|
||||
let edge_block = next_bindex;
|
||||
next_bindex += 1;
|
||||
edge_blocks_by_inst[inst].push(edge_block);
|
||||
edge_blocks.push((inst, edge_block, next_bb));
|
||||
};
|
||||
visit_branch_targets(self.f, *bb, inst, |succ| {
|
||||
add_succ(succ);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for bb in bbs.iter() {
|
||||
debug!("lowering bb: {}", bb);
|
||||
|
||||
// If this is a return block, produce the return value setup. N.B.: this comes
|
||||
// *before* the below because it must occur *after* any other instructions, and
|
||||
// instructions are lowered in reverse order.
|
||||
let last_insn = self.f.layout.block_insts(*bb).last().unwrap();
|
||||
let last_insn_opcode = self.f.dfg[last_insn].opcode();
|
||||
if last_insn_opcode.is_return() {
|
||||
let gen_ret = if last_insn_opcode == Opcode::Return {
|
||||
GenerateReturn::Yes
|
||||
} else {
|
||||
debug_assert!(last_insn_opcode == Opcode::FallthroughReturn);
|
||||
GenerateReturn::No
|
||||
};
|
||||
self.gen_retval_setup(gen_ret);
|
||||
self.vcode.end_ir_inst();
|
||||
}
|
||||
|
||||
// Find the branches at the end first, and process those, if any.
|
||||
let mut branches: SmallVec<[Inst; 2]> = SmallVec::new();
|
||||
let mut targets: SmallVec<[BlockIndex; 2]> = SmallVec::new();
|
||||
|
||||
for inst in self.f.layout.block_insts(*bb).rev() {
|
||||
debug!("lower: inst {}", inst);
|
||||
if edge_blocks_by_inst[inst].len() > 0 {
|
||||
branches.push(inst);
|
||||
for target in edge_blocks_by_inst[inst].iter().rev().cloned() {
|
||||
targets.push(target);
|
||||
}
|
||||
} else {
|
||||
// We've reached the end of the branches -- process all as a group, first.
|
||||
if branches.len() > 0 {
|
||||
let fallthrough = self.f.layout.next_block(*bb);
|
||||
let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb));
|
||||
branches.reverse();
|
||||
targets.reverse();
|
||||
debug!(
|
||||
"lower_branch_group: targets = {:?} branches = {:?}",
|
||||
targets, branches
|
||||
);
|
||||
backend.lower_branch_group(
|
||||
&mut self,
|
||||
&branches[..],
|
||||
&targets[..],
|
||||
fallthrough,
|
||||
);
|
||||
self.vcode.end_ir_inst();
|
||||
branches.clear();
|
||||
targets.clear();
|
||||
}
|
||||
|
||||
// Only codegen an instruction if it either has a side
|
||||
// effect, or has at least one use of one of its results.
|
||||
let num_uses = self.num_uses[inst];
|
||||
let side_effect = has_side_effect(self.f, inst);
|
||||
if side_effect || num_uses > 0 {
|
||||
backend.lower(&mut self, inst);
|
||||
self.vcode.end_ir_inst();
|
||||
} else {
|
||||
// If we're skipping the instruction, we need to dec-ref
|
||||
// its arguments.
|
||||
for arg in self.f.dfg.inst_args(inst) {
|
||||
let val = self.f.dfg.resolve_aliases(*arg);
|
||||
match self.f.dfg.value_def(val) {
|
||||
ValueDef::Result(src_inst, _) => {
|
||||
self.dec_use(src_inst);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// There are possibly some branches left if the block contained only branches.
|
||||
if branches.len() > 0 {
|
||||
let fallthrough = self.f.layout.next_block(*bb);
|
||||
let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb));
|
||||
branches.reverse();
|
||||
targets.reverse();
|
||||
debug!(
|
||||
"lower_branch_group: targets = {:?} branches = {:?}",
|
||||
targets, branches
|
||||
);
|
||||
backend.lower_branch_group(&mut self, &branches[..], &targets[..], fallthrough);
|
||||
self.vcode.end_ir_inst();
|
||||
branches.clear();
|
||||
targets.clear();
|
||||
}
|
||||
|
||||
// If this is the entry block, produce the argument setup.
|
||||
if Some(*bb) == self.f.layout.entry_block() {
|
||||
self.gen_arg_setup();
|
||||
self.vcode.end_ir_inst();
|
||||
}
|
||||
|
||||
let vcode_bb = self.vcode.end_bb();
|
||||
debug!("finished building bb: BlockIndex {}", vcode_bb);
|
||||
debug!("bb_to_bindex map says: {}", self.vcode.bb_to_bindex(*bb));
|
||||
assert!(vcode_bb == self.vcode.bb_to_bindex(*bb));
|
||||
if Some(*bb) == self.f.layout.entry_block() {
|
||||
self.vcode.set_entry(vcode_bb);
|
||||
}
|
||||
}
|
||||
|
||||
// Now create the edge blocks, with phi lowering (block parameter copies).
|
||||
for (inst, edge_block, orig_block) in edge_blocks.into_iter() {
|
||||
debug!(
|
||||
"creating edge block: inst {}, edge_block {}, orig_block {}",
|
||||
inst, edge_block, orig_block
|
||||
);
|
||||
|
||||
// Create a temporary for each block parameter.
|
||||
let phi_classes: Vec<(Type, RegClass)> = self
|
||||
.f
|
||||
.dfg
|
||||
.block_params(orig_block)
|
||||
.iter()
|
||||
.map(|p| self.f.dfg.value_type(*p))
|
||||
.map(|ty| (ty, I::rc_for_type(ty)))
|
||||
.collect();
|
||||
|
||||
// FIXME sewardj 2020Feb29: use SmallVec
|
||||
let mut src_regs = vec![];
|
||||
let mut dst_regs = vec![];
|
||||
|
||||
// Create all of the phi uses (reads) from jump args to temps.
|
||||
|
||||
// Round up all the source and destination regs
|
||||
for (i, arg) in self.f.dfg.inst_variable_args(inst).iter().enumerate() {
|
||||
let arg = self.f.dfg.resolve_aliases(*arg);
|
||||
debug!("jump arg {} is {}", i, arg);
|
||||
src_regs.push(self.value_regs[arg]);
|
||||
}
|
||||
for (i, param) in self.f.dfg.block_params(orig_block).iter().enumerate() {
|
||||
debug!("bb arg {} is {}", i, param);
|
||||
dst_regs.push(Writable::from_reg(self.value_regs[*param]));
|
||||
}
|
||||
debug_assert!(src_regs.len() == dst_regs.len());
|
||||
debug_assert!(phi_classes.len() == dst_regs.len());
|
||||
|
||||
// If, as is mostly the case, the source and destination register
|
||||
// sets are non overlapping, then we can copy directly, so as to
|
||||
// save the register allocator work.
|
||||
if !Set::<Reg>::from_vec(src_regs.clone()).intersects(&Set::<Reg>::from_vec(
|
||||
dst_regs.iter().map(|r| r.to_reg()).collect(),
|
||||
)) {
|
||||
for (dst_reg, (src_reg, (ty, _))) in
|
||||
dst_regs.iter().zip(src_regs.iter().zip(phi_classes))
|
||||
{
|
||||
self.vcode.push(I::gen_move(*dst_reg, *src_reg, ty));
|
||||
}
|
||||
} else {
|
||||
// There's some overlap, so play safe and copy via temps.
|
||||
|
||||
let tmp_regs: Vec<Writable<Reg>> = phi_classes
|
||||
.iter()
|
||||
.map(|&(ty, rc)| self.tmp(rc, ty)) // borrows `self` mutably.
|
||||
.collect();
|
||||
|
||||
debug!("phi_temps = {:?}", tmp_regs);
|
||||
debug_assert!(tmp_regs.len() == src_regs.len());
|
||||
|
||||
for (tmp_reg, (src_reg, &(ty, _))) in
|
||||
tmp_regs.iter().zip(src_regs.iter().zip(phi_classes.iter()))
|
||||
{
|
||||
self.vcode.push(I::gen_move(*tmp_reg, *src_reg, ty));
|
||||
}
|
||||
for (dst_reg, (tmp_reg, &(ty, _))) in
|
||||
dst_regs.iter().zip(tmp_regs.iter().zip(phi_classes.iter()))
|
||||
{
|
||||
self.vcode.push(I::gen_move(*dst_reg, tmp_reg.to_reg(), ty));
|
||||
}
|
||||
}
|
||||
|
||||
// Create the unconditional jump to the original target block.
|
||||
self.vcode
|
||||
.push(I::gen_jump(self.vcode.bb_to_bindex(orig_block)));
|
||||
|
||||
// End the IR inst and block. (We lower this as if it were one IR instruction so that
|
||||
// we can emit machine instructions in forward order.)
|
||||
self.vcode.end_ir_inst();
|
||||
let blocknum = self.vcode.end_bb();
|
||||
assert!(blocknum == edge_block);
|
||||
}
|
||||
|
||||
// Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
|
||||
self.vcode.build()
|
||||
}
|
||||
|
||||
/// Reduce the use-count of an IR instruction. Use this when, e.g., isel incorporates the
|
||||
/// computation of an input instruction directly, so that input instruction has one
|
||||
/// fewer use.
|
||||
fn dec_use(&mut self, ir_inst: Inst) {
|
||||
assert!(self.num_uses[ir_inst] > 0);
|
||||
self.num_uses[ir_inst] -= 1;
|
||||
debug!(
|
||||
"incref: ir_inst {} now has {} uses",
|
||||
ir_inst, self.num_uses[ir_inst]
|
||||
);
|
||||
}
|
||||
|
||||
/// Increase the use-count of an IR instruction. Use this when, e.g., isel incorporates
|
||||
/// the computation of an input instruction directly, so that input instruction's
|
||||
/// inputs are now used directly by the merged instruction.
|
||||
fn inc_use(&mut self, ir_inst: Inst) {
|
||||
self.num_uses[ir_inst] += 1;
|
||||
debug!(
|
||||
"decref: ir_inst {} now has {} uses",
|
||||
ir_inst, self.num_uses[ir_inst]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, I: VCodeInst> LowerCtx for Lower<'a, I> {
|
||||
type I = I;
|
||||
|
||||
/// Get the instdata for a given IR instruction.
|
||||
fn data(&self, ir_inst: Inst) -> &InstructionData {
|
||||
&self.f.dfg[ir_inst]
|
||||
}
|
||||
|
||||
/// Get the controlling type for a polymorphic IR instruction.
|
||||
fn ty(&self, ir_inst: Inst) -> Type {
|
||||
self.f.dfg.ctrl_typevar(ir_inst)
|
||||
}
|
||||
|
||||
/// Emit a machine instruction.
|
||||
fn emit(&mut self, mach_inst: I) {
|
||||
self.vcode.push(mach_inst);
|
||||
}
|
||||
|
||||
/// Indicate that a merge has occurred.
|
||||
fn merged(&mut self, from_inst: Inst) {
|
||||
debug!("merged: inst {}", from_inst);
|
||||
// First, inc-ref all inputs of `from_inst`, because they are now used
|
||||
// directly by `into_inst`.
|
||||
for arg in self.f.dfg.inst_args(from_inst) {
|
||||
let arg = self.f.dfg.resolve_aliases(*arg);
|
||||
match self.f.dfg.value_def(arg) {
|
||||
ValueDef::Result(src_inst, _) => {
|
||||
debug!(" -> inc-reffing src inst {}", src_inst);
|
||||
self.inc_use(src_inst);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
// Then, dec-ref the merged instruction itself. It still retains references
|
||||
// to its arguments (inc-ref'd above). If its refcount has reached zero,
|
||||
// it will be skipped during emission and its args will be dec-ref'd at that
|
||||
// time.
|
||||
self.dec_use(from_inst);
|
||||
}
|
||||
|
||||
/// Get the producing instruction, if any, and output number, for the `idx`th input to the
|
||||
/// given IR instruction.
|
||||
fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)> {
|
||||
let val = self.f.dfg.inst_args(ir_inst)[idx];
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
match self.f.dfg.value_def(val) {
|
||||
ValueDef::Result(src_inst, result_idx) => Some((src_inst, result_idx)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Map a Value to its associated writable (probably virtual) Reg.
|
||||
fn value_to_writable_reg(&self, val: Value) -> Writable<Reg> {
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
Writable::from_reg(self.value_regs[val])
|
||||
}
|
||||
|
||||
/// Map a Value to its associated (probably virtual) Reg.
|
||||
fn value_to_reg(&self, val: Value) -> Reg {
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
self.value_regs[val]
|
||||
}
|
||||
|
||||
/// Get the `idx`th input to the given IR instruction as a virtual register.
|
||||
fn input(&self, ir_inst: Inst, idx: usize) -> Reg {
|
||||
let val = self.f.dfg.inst_args(ir_inst)[idx];
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
self.value_to_reg(val)
|
||||
}
|
||||
|
||||
/// Get the `idx`th output of the given IR instruction as a virtual register.
|
||||
fn output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg> {
|
||||
let val = self.f.dfg.inst_results(ir_inst)[idx];
|
||||
self.value_to_writable_reg(val)
|
||||
}
|
||||
|
||||
/// Get a new temp.
|
||||
fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg> {
|
||||
let v = self.next_vreg;
|
||||
self.next_vreg += 1;
|
||||
let vreg = Reg::new_virtual(rc, v);
|
||||
self.vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ty);
|
||||
Writable::from_reg(vreg)
|
||||
}
|
||||
|
||||
/// Get the number of inputs for the given IR instruction.
|
||||
fn num_inputs(&self, ir_inst: Inst) -> usize {
|
||||
self.f.dfg.inst_args(ir_inst).len()
|
||||
}
|
||||
|
||||
/// Get the number of outputs for the given IR instruction.
|
||||
fn num_outputs(&self, ir_inst: Inst) -> usize {
|
||||
self.f.dfg.inst_results(ir_inst).len()
|
||||
}
|
||||
|
||||
/// Get the type for an instruction's input.
|
||||
fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type {
|
||||
let val = self.f.dfg.inst_args(ir_inst)[idx];
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
self.f.dfg.value_type(val)
|
||||
}
|
||||
|
||||
/// Get the type for an instruction's output.
|
||||
fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type {
|
||||
self.f.dfg.value_type(self.f.dfg.inst_results(ir_inst)[idx])
|
||||
}
|
||||
|
||||
/// Get the number of block params.
|
||||
fn num_bb_params(&self, bb: Block) -> usize {
|
||||
self.f.dfg.block_params(bb).len()
|
||||
}
|
||||
|
||||
/// Get the register for a block param.
|
||||
fn bb_param(&self, bb: Block, idx: usize) -> Reg {
|
||||
let val = self.f.dfg.block_params(bb)[idx];
|
||||
self.value_regs[val]
|
||||
}
|
||||
|
||||
/// Get the register for a return value.
|
||||
fn retval(&self, idx: usize) -> Writable<Reg> {
|
||||
Writable::from_reg(self.retval_regs[idx])
|
||||
}
|
||||
|
||||
/// Get the target for a call instruction, as an `ExternalName`.
|
||||
fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName> {
|
||||
match &self.f.dfg[ir_inst] {
|
||||
&InstructionData::Call { func_ref, .. }
|
||||
| &InstructionData::FuncAddr { func_ref, .. } => {
|
||||
let funcdata = &self.f.dfg.ext_funcs[func_ref];
|
||||
Some(&funcdata.name)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
/// Get the signature for a call or call-indirect instruction.
|
||||
fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature> {
|
||||
match &self.f.dfg[ir_inst] {
|
||||
&InstructionData::Call { func_ref, .. } => {
|
||||
let funcdata = &self.f.dfg.ext_funcs[func_ref];
|
||||
Some(&self.f.dfg.signatures[funcdata.signature])
|
||||
}
|
||||
&InstructionData::CallIndirect { sig_ref, .. } => Some(&self.f.dfg.signatures[sig_ref]),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the symbol name and offset for a symbol_value instruction.
|
||||
fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)> {
|
||||
match &self.f.dfg[ir_inst] {
|
||||
&InstructionData::UnaryGlobalValue { global_value, .. } => {
|
||||
let gvdata = &self.f.global_values[global_value];
|
||||
match gvdata {
|
||||
&GlobalValueData::Symbol {
|
||||
ref name,
|
||||
ref offset,
|
||||
..
|
||||
} => {
|
||||
let offset = offset.bits();
|
||||
Some((name, offset))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the memory flags of a given memory access.
|
||||
fn memflags(&self, ir_inst: Inst) -> Option<MemFlags> {
|
||||
match &self.f.dfg[ir_inst] {
|
||||
&InstructionData::Load { flags, .. }
|
||||
| &InstructionData::LoadComplex { flags, .. }
|
||||
| &InstructionData::Store { flags, .. }
|
||||
| &InstructionData::StoreComplex { flags, .. } => Some(flags),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the source location for a given instruction.
|
||||
fn srcloc(&self, ir_inst: Inst) -> SourceLoc {
|
||||
self.f.srclocs[ir_inst]
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_branch_targets<F: FnMut(Block)>(f: &Function, block: Block, inst: Inst, mut visit: F) {
|
||||
if f.dfg[inst].opcode() == Opcode::Fallthrough {
|
||||
visit(f.layout.next_block(block).unwrap());
|
||||
} else {
|
||||
match f.dfg[inst].analyze_branch(&f.dfg.value_lists) {
|
||||
BranchInfo::NotABranch => {}
|
||||
BranchInfo::SingleDest(dest, _) => {
|
||||
visit(dest);
|
||||
}
|
||||
BranchInfo::Table(table, maybe_dest) => {
|
||||
if let Some(dest) = maybe_dest {
|
||||
visit(dest);
|
||||
}
|
||||
for &dest in f.jump_tables[table].as_slice() {
|
||||
visit(dest);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
280
cranelift/codegen/src/machinst/mod.rs
Normal file
280
cranelift/codegen/src/machinst/mod.rs
Normal file
@@ -0,0 +1,280 @@
|
||||
//! This module exposes the machine-specific backend definition pieces.
|
||||
//!
|
||||
//! The MachInst infrastructure is the compiler backend, from CLIF
|
||||
//! (ir::Function) to machine code. The purpose of this infrastructure is, at a
|
||||
//! high level, to do instruction selection/lowering (to machine instructions),
|
||||
//! register allocation, and then perform all the fixups to branches, constant
|
||||
//! data references, etc., needed to actually generate machine code.
|
||||
//!
|
||||
//! The container for machine instructions, at various stages of construction,
|
||||
//! is the `VCode` struct. We refer to a sequence of machine instructions organized
|
||||
//! into basic blocks as "vcode". This is short for "virtual-register code", though
|
||||
//! it's a bit of a misnomer because near the end of the pipeline, vcode has all
|
||||
//! real registers. Nevertheless, the name is catchy and we like it.
|
||||
//!
|
||||
//! The compilation pipeline, from an `ir::Function` (already optimized as much as
|
||||
//! you like by machine-independent optimization passes) onward, is as follows.
|
||||
//! (N.B.: though we show the VCode separately at each stage, the passes
|
||||
//! mutate the VCode in place; these are not separate copies of the code.)
|
||||
//!
|
||||
//! ```plain
|
||||
//!
|
||||
//! ir::Function (SSA IR, machine-independent opcodes)
|
||||
//! |
|
||||
//! | [lower]
|
||||
//! |
|
||||
//! VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | - mostly virtual registers.
|
||||
//! | - cond branches in two-target form.
|
||||
//! | - branch targets are block indices.
|
||||
//! | - in-memory constants held by insns,
|
||||
//! | with unknown offsets.
|
||||
//! | - critical edges (actually all edges)
|
||||
//! | are split.)
|
||||
//! | [regalloc]
|
||||
//! |
|
||||
//! VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | - all real registers.
|
||||
//! | - new instruction sequence returned
|
||||
//! | out-of-band in RegAllocResult.
|
||||
//! | - instruction sequence has spills,
|
||||
//! | reloads, and moves inserted.
|
||||
//! | - other invariants same as above.)
|
||||
//! |
|
||||
//! | [preamble/postamble]
|
||||
//! |
|
||||
//! VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | - stack-frame size known.
|
||||
//! | - out-of-band instruction sequence
|
||||
//! | has preamble prepended to entry
|
||||
//! | block, and postamble injected before
|
||||
//! | every return instruction.
|
||||
//! | - all symbolic stack references to
|
||||
//! | stackslots and spillslots are resolved
|
||||
//! | to concrete FP-offset mem addresses.)
|
||||
//! | [block/insn ordering]
|
||||
//! |
|
||||
//! VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | - vcode.final_block_order is filled in.
|
||||
//! | - new insn sequence from regalloc is
|
||||
//! | placed back into vcode and block
|
||||
//! | boundaries are updated.)
|
||||
//! | [redundant branch/block
|
||||
//! | removal]
|
||||
//! |
|
||||
//! VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | - all blocks that were just an
|
||||
//! | unconditional branch are removed.)
|
||||
//! |
|
||||
//! | [branch finalization
|
||||
//! | (fallthroughs)]
|
||||
//! |
|
||||
//! VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | - all branches are in lowered one-
|
||||
//! | target form, but targets are still
|
||||
//! | block indices.)
|
||||
//! |
|
||||
//! | [branch finalization
|
||||
//! | (offsets)]
|
||||
//! |
|
||||
//! VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | - all branch offsets from start of
|
||||
//! | function are known, and all branches
|
||||
//! | have resolved-offset targets.)
|
||||
//! |
|
||||
//! | [MemArg finalization]
|
||||
//! |
|
||||
//! VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | - all MemArg references to the constant
|
||||
//! | pool are replaced with offsets.
|
||||
//! | - all constant-pool data is collected
|
||||
//! | in the VCode.)
|
||||
//! |
|
||||
//! | [binary emission]
|
||||
//! |
|
||||
//! Vec<u8> (machine code!)
|
||||
//!
|
||||
//! ```
|
||||
|
||||
use crate::binemit::{CodeInfo, CodeOffset};
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::{Function, Type};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::Flags;
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use core::fmt::Debug;
|
||||
use regalloc::Map as RegallocMap;
|
||||
use regalloc::RegUsageCollector;
|
||||
use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
|
||||
use std::string::String;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
pub mod lower;
|
||||
pub use lower::*;
|
||||
pub mod vcode;
|
||||
pub use vcode::*;
|
||||
pub mod compile;
|
||||
pub use compile::*;
|
||||
pub mod blockorder;
|
||||
pub use blockorder::*;
|
||||
pub mod abi;
|
||||
pub use abi::*;
|
||||
pub mod pretty_print;
|
||||
pub use pretty_print::*;
|
||||
pub mod sections;
|
||||
pub use sections::*;
|
||||
pub mod adapter;
|
||||
pub use adapter::*;
|
||||
|
||||
/// A machine instruction.
|
||||
pub trait MachInst: Clone + Debug {
|
||||
/// Return the registers referenced by this machine instruction along with
|
||||
/// the modes of reference (use, def, modify).
|
||||
fn get_regs(&self, collector: &mut RegUsageCollector);
|
||||
|
||||
/// Map virtual registers to physical registers using the given virt->phys
|
||||
/// maps corresponding to the program points prior to, and after, this instruction.
|
||||
fn map_regs(
|
||||
&mut self,
|
||||
pre_map: &RegallocMap<VirtualReg, RealReg>,
|
||||
post_map: &RegallocMap<VirtualReg, RealReg>,
|
||||
);
|
||||
|
||||
/// If this is a simple move, return the (source, destination) tuple of registers.
|
||||
fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
|
||||
|
||||
/// Is this a terminator (branch or ret)? If so, return its type
|
||||
/// (ret/uncond/cond) and target if applicable.
|
||||
fn is_term<'a>(&'a self) -> MachTerminator<'a>;
|
||||
|
||||
/// Returns true if the instruction is an epilogue placeholder.
|
||||
fn is_epilogue_placeholder(&self) -> bool;
|
||||
|
||||
/// Generate a move.
|
||||
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
|
||||
|
||||
/// Generate a zero-length no-op.
|
||||
fn gen_zero_len_nop() -> Self;
|
||||
|
||||
/// Possibly operate on a value directly in a spill-slot rather than a
|
||||
/// register. Useful if the machine has register-memory instruction forms
|
||||
/// (e.g., add directly from or directly to memory), like x86.
|
||||
fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option<Self>;
|
||||
|
||||
/// Determine a register class to store the given CraneLift type.
|
||||
fn rc_for_type(ty: Type) -> RegClass;
|
||||
|
||||
/// Generate a jump to another target. Used during lowering of
|
||||
/// control flow.
|
||||
fn gen_jump(target: BlockIndex) -> Self;
|
||||
|
||||
/// Generate a NOP. The `preferred_size` parameter allows the caller to
|
||||
/// request a NOP of that size, or as close to it as possible. The machine
|
||||
/// backend may return a NOP whose binary encoding is smaller than the
|
||||
/// preferred size, but must not return a NOP that is larger. However,
|
||||
/// the instruction must have a nonzero size.
|
||||
fn gen_nop(preferred_size: usize) -> Self;
|
||||
|
||||
/// Rewrite block targets using the block-target map.
|
||||
fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]);
|
||||
|
||||
/// Finalize branches once the block order (fallthrough) is known.
|
||||
fn with_fallthrough_block(&mut self, fallthrough_block: Option<BlockIndex>);
|
||||
|
||||
/// Update instruction once block offsets are known. These offsets are
|
||||
/// relative to the beginning of the function. `targets` is indexed by
|
||||
/// BlockIndex.
|
||||
fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]);
|
||||
|
||||
/// Get the register universe for this backend.
|
||||
fn reg_universe() -> RealRegUniverse;
|
||||
|
||||
/// Align a basic block offset (from start of function). By default, no
|
||||
/// alignment occurs.
|
||||
fn align_basic_block(offset: CodeOffset) -> CodeOffset {
|
||||
offset
|
||||
}
|
||||
}
|
||||
|
||||
/// Describes a block terminator (not call) in the vcode, when its branches
|
||||
/// have not yet been finalized (so a branch may have two targets).
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum MachTerminator<'a> {
|
||||
/// Not a terminator.
|
||||
None,
|
||||
/// A return instruction.
|
||||
Ret,
|
||||
/// An unconditional branch to another block.
|
||||
Uncond(BlockIndex),
|
||||
/// A conditional branch to one of two other blocks.
|
||||
Cond(BlockIndex, BlockIndex),
|
||||
/// An indirect branch with known possible targets.
|
||||
Indirect(&'a [BlockIndex]),
|
||||
}
|
||||
|
||||
/// A trait describing the ability to encode a MachInst into binary machine code.
|
||||
pub trait MachInstEmit<O: MachSectionOutput> {
|
||||
/// Emit the instruction.
|
||||
fn emit(&self, code: &mut O);
|
||||
}
|
||||
|
||||
/// The result of a `MachBackend::compile_function()` call. Contains machine
|
||||
/// code (as bytes) and a disassembly, if requested.
|
||||
pub struct MachCompileResult {
|
||||
/// Machine code.
|
||||
pub sections: MachSections,
|
||||
/// Size of stack frame, in bytes.
|
||||
pub frame_size: u32,
|
||||
/// Disassembly, if requested.
|
||||
pub disasm: Option<String>,
|
||||
}
|
||||
|
||||
impl MachCompileResult {
|
||||
/// Get a `CodeInfo` describing section sizes from this compilation result.
|
||||
pub fn code_info(&self) -> CodeInfo {
|
||||
let code_size = self.sections.total_size();
|
||||
CodeInfo {
|
||||
code_size,
|
||||
jumptables_size: 0,
|
||||
rodata_size: 0,
|
||||
total_size: code_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Top-level machine backend trait, which wraps all monomorphized code and
|
||||
/// allows a virtual call from the machine-independent `Function::compile()`.
|
||||
pub trait MachBackend {
|
||||
/// Compile the given function.
|
||||
fn compile_function(
|
||||
&self,
|
||||
func: &Function,
|
||||
want_disasm: bool,
|
||||
) -> CodegenResult<MachCompileResult>;
|
||||
|
||||
/// Return flags for this backend.
|
||||
fn flags(&self) -> &Flags;
|
||||
|
||||
/// Return triple for this backend.
|
||||
fn triple(&self) -> Triple;
|
||||
|
||||
/// Return name for this backend.
|
||||
fn name(&self) -> &'static str;
|
||||
|
||||
/// Return the register universe for this backend.
|
||||
fn reg_universe(&self) -> RealRegUniverse;
|
||||
|
||||
/// Machine-specific condcode info needed by TargetIsa.
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC {
|
||||
// TODO: this is what x86 specifies. Is this right for arm64?
|
||||
IntCC::UnsignedLessThan
|
||||
}
|
||||
|
||||
/// Machine-specific condcode info needed by TargetIsa.
|
||||
fn unsigned_sub_overflow_condition(&self) -> IntCC {
|
||||
// TODO: this is what x86 specifies. Is this right for arm64?
|
||||
IntCC::UnsignedLessThan
|
||||
}
|
||||
}
|
||||
66
cranelift/codegen/src/machinst/pretty_print.rs
Normal file
66
cranelift/codegen/src/machinst/pretty_print.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
//! Pretty-printing for machine code (virtual-registerized or final).
|
||||
|
||||
use regalloc::{RealRegUniverse, Reg, Writable};
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
// FIXME: Should this go into regalloc.rs instead?
|
||||
|
||||
/// A trait for printing instruction bits and pieces, with the the ability to
|
||||
/// take a contextualising RealRegUniverse that is used to give proper names to
|
||||
/// registers.
|
||||
pub trait ShowWithRRU {
|
||||
/// Return a string that shows the implementing object in context of the
|
||||
/// given `RealRegUniverse`, if provided.
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String;
|
||||
|
||||
/// The same as |show_rru|, but with an optional hint giving a size in
|
||||
/// bytes. Its interpretation is object-dependent, and it is intended to
|
||||
/// pass around enough information to facilitate printing sub-parts of
|
||||
/// real registers correctly. Objects may ignore size hints that are
|
||||
/// irrelevant to them.
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
|
||||
// Default implementation is to ignore the hint.
|
||||
self.show_rru(mb_rru)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for Reg {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
if self.is_real() {
|
||||
if let Some(rru) = mb_rru {
|
||||
let reg_ix = self.get_index();
|
||||
if reg_ix < rru.regs.len() {
|
||||
return rru.regs[reg_ix].1.to_string();
|
||||
} else {
|
||||
// We have a real reg which isn't listed in the universe.
|
||||
// Per the regalloc.rs interface requirements, this is
|
||||
// Totally Not Allowed. Print it generically anyway, so
|
||||
// we have something to debug.
|
||||
return format!("!!{:?}!!", self);
|
||||
}
|
||||
}
|
||||
}
|
||||
// The reg is virtual, or we have no universe. Be generic.
|
||||
format!("%{:?}", self)
|
||||
}
|
||||
|
||||
fn show_rru_sized(&self, _mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
|
||||
// For the specific case of Reg, we demand not to have a size hint,
|
||||
// since interpretation of the size is target specific, but this code
|
||||
// is used by all targets.
|
||||
panic!("Reg::show_rru_sized: impossible to implement");
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: ShowWithRRU + Copy + Ord + Hash + Eq + Debug> ShowWithRRU for Writable<R> {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
self.to_reg().show_rru(mb_rru)
|
||||
}
|
||||
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
self.to_reg().show_rru_sized(mb_rru, size)
|
||||
}
|
||||
}
|
||||
354
cranelift/codegen/src/machinst/sections.rs
Normal file
354
cranelift/codegen/src/machinst/sections.rs
Normal file
@@ -0,0 +1,354 @@
|
||||
//! In-memory representation of compiled machine code, in multiple sections
|
||||
//! (text, constant pool / rodata, etc). Emission occurs into multiple sections
|
||||
//! simultaneously, so we buffer the result in memory and hand off to the
|
||||
//! caller at the end of compilation.
|
||||
|
||||
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
|
||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
/// A collection of sections with defined start-offsets.
|
||||
pub struct MachSections {
|
||||
/// Sections, in offset order.
|
||||
pub sections: Vec<MachSection>,
|
||||
}
|
||||
|
||||
impl MachSections {
|
||||
/// New, empty set of sections.
|
||||
pub fn new() -> MachSections {
|
||||
MachSections { sections: vec![] }
|
||||
}
|
||||
|
||||
/// Add a section with a known offset and size. Returns the index.
|
||||
pub fn add_section(&mut self, start: CodeOffset, length: CodeOffset) -> usize {
|
||||
let idx = self.sections.len();
|
||||
self.sections.push(MachSection::new(start, length));
|
||||
idx
|
||||
}
|
||||
|
||||
/// Mutably borrow the given section by index.
|
||||
pub fn get_section<'a>(&'a mut self, idx: usize) -> &'a mut MachSection {
|
||||
&mut self.sections[idx]
|
||||
}
|
||||
|
||||
/// Get mutable borrows of two sections simultaneously. Used during
|
||||
/// instruction emission to provide references to the .text and .rodata
|
||||
/// (constant pool) sections.
|
||||
pub fn two_sections<'a>(
|
||||
&'a mut self,
|
||||
idx1: usize,
|
||||
idx2: usize,
|
||||
) -> (&'a mut MachSection, &'a mut MachSection) {
|
||||
assert!(idx1 < idx2);
|
||||
assert!(idx1 < self.sections.len());
|
||||
assert!(idx2 < self.sections.len());
|
||||
let (first, rest) = self.sections.split_at_mut(idx2);
|
||||
(&mut first[idx1], &mut rest[0])
|
||||
}
|
||||
|
||||
/// Emit this set of sections to a set of sinks for the code,
|
||||
/// relocations, traps, and stackmap.
|
||||
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
|
||||
// N.B.: we emit every section into the .text section as far as
|
||||
// the `CodeSink` is concerned; we do not bother to segregate
|
||||
// the contents into the actual program text, the jumptable and the
|
||||
// rodata (constant pool). This allows us to generate code assuming
|
||||
// that these will not be relocated relative to each other, and avoids
|
||||
// having to designate each section as belonging in one of the three
|
||||
// fixed categories defined by `CodeSink`. If this becomes a problem
|
||||
// later (e.g. because of memory permissions or similar), we can
|
||||
// add this designation and segregate the output; take care, however,
|
||||
// to add the appropriate relocations in this case.
|
||||
|
||||
for section in &self.sections {
|
||||
if section.data.len() > 0 {
|
||||
while sink.offset() < section.start_offset {
|
||||
sink.put1(0);
|
||||
}
|
||||
section.emit(sink);
|
||||
}
|
||||
}
|
||||
sink.begin_jumptables();
|
||||
sink.begin_rodata();
|
||||
sink.end_codegen();
|
||||
}
|
||||
|
||||
/// Get the total required size for these sections.
|
||||
pub fn total_size(&self) -> CodeOffset {
|
||||
if self.sections.len() == 0 {
|
||||
0
|
||||
} else {
|
||||
// Find the last non-empty section.
|
||||
self.sections
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|s| s.data.len() > 0)
|
||||
.map(|s| s.cur_offset_from_start())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An abstraction over MachSection and MachSectionSize: some
|
||||
/// receiver of section data.
|
||||
pub trait MachSectionOutput {
|
||||
/// Get the current offset from the start of all sections.
|
||||
fn cur_offset_from_start(&self) -> CodeOffset;
|
||||
|
||||
/// Get the start offset of this section.
|
||||
fn start_offset(&self) -> CodeOffset;
|
||||
|
||||
/// Add 1 byte to the section.
|
||||
fn put1(&mut self, _: u8);
|
||||
|
||||
/// Add 2 bytes to the section.
|
||||
fn put2(&mut self, value: u16) {
|
||||
let [b0, b1] = value.to_le_bytes();
|
||||
self.put1(b0);
|
||||
self.put1(b1);
|
||||
}
|
||||
|
||||
/// Add 4 bytes to the section.
|
||||
fn put4(&mut self, value: u32) {
|
||||
let [b0, b1, b2, b3] = value.to_le_bytes();
|
||||
self.put1(b0);
|
||||
self.put1(b1);
|
||||
self.put1(b2);
|
||||
self.put1(b3);
|
||||
}
|
||||
|
||||
/// Add 8 bytes to the section.
|
||||
fn put8(&mut self, value: u64) {
|
||||
let [b0, b1, b2, b3, b4, b5, b6, b7] = value.to_le_bytes();
|
||||
self.put1(b0);
|
||||
self.put1(b1);
|
||||
self.put1(b2);
|
||||
self.put1(b3);
|
||||
self.put1(b4);
|
||||
self.put1(b5);
|
||||
self.put1(b6);
|
||||
self.put1(b7);
|
||||
}
|
||||
|
||||
/// Add a slice of bytes to the section.
|
||||
fn put_data(&mut self, data: &[u8]);
|
||||
|
||||
/// Add a relocation at the current offset.
|
||||
fn add_reloc(&mut self, loc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend);
|
||||
|
||||
/// Add a trap record at the current offset.
|
||||
fn add_trap(&mut self, loc: SourceLoc, code: TrapCode);
|
||||
|
||||
/// Add a call return address record at the current offset.
|
||||
fn add_call_site(&mut self, loc: SourceLoc, opcode: Opcode);
|
||||
|
||||
/// Align up to the given alignment.
|
||||
fn align_to(&mut self, align_to: CodeOffset) {
|
||||
assert!(align_to.is_power_of_two());
|
||||
while self.cur_offset_from_start() & (align_to - 1) != 0 {
|
||||
self.put1(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A section of output to be emitted to a CodeSink / RelocSink in bulk.
|
||||
/// Multiple sections may be created with known start offsets in advance; the
|
||||
/// usual use-case is to create the .text (code) and .rodata (constant pool) at
|
||||
/// once, after computing the length of the code, so that constant references
|
||||
/// can use known offsets as instructions are emitted.
|
||||
pub struct MachSection {
|
||||
/// The starting offset of this section.
|
||||
pub start_offset: CodeOffset,
|
||||
/// The limit of this section, defined by the start of the next section.
|
||||
pub length_limit: CodeOffset,
|
||||
/// The section contents, as raw bytes.
|
||||
pub data: Vec<u8>,
|
||||
/// Any relocations referring to this section.
|
||||
pub relocs: Vec<MachReloc>,
|
||||
/// Any trap records referring to this section.
|
||||
pub traps: Vec<MachTrap>,
|
||||
/// Any call site record referring to this section.
|
||||
pub call_sites: Vec<MachCallSite>,
|
||||
}
|
||||
|
||||
impl MachSection {
|
||||
/// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`.
|
||||
pub fn new(start_offset: CodeOffset, length_limit: CodeOffset) -> MachSection {
|
||||
MachSection {
|
||||
start_offset,
|
||||
length_limit,
|
||||
data: vec![],
|
||||
relocs: vec![],
|
||||
traps: vec![],
|
||||
call_sites: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit this section to the CodeSink and other associated sinks. The
|
||||
/// current offset of the CodeSink must match the starting offset of this
|
||||
/// section.
|
||||
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
|
||||
assert!(sink.offset() == self.start_offset);
|
||||
|
||||
let mut next_reloc = 0;
|
||||
let mut next_trap = 0;
|
||||
let mut next_call_site = 0;
|
||||
for (idx, byte) in self.data.iter().enumerate() {
|
||||
if next_reloc < self.relocs.len() {
|
||||
let reloc = &self.relocs[next_reloc];
|
||||
if reloc.offset == idx as CodeOffset {
|
||||
sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend);
|
||||
next_reloc += 1;
|
||||
}
|
||||
}
|
||||
if next_trap < self.traps.len() {
|
||||
let trap = &self.traps[next_trap];
|
||||
if trap.offset == idx as CodeOffset {
|
||||
sink.trap(trap.code, trap.srcloc);
|
||||
next_trap += 1;
|
||||
}
|
||||
}
|
||||
if next_call_site < self.call_sites.len() {
|
||||
let call_site = &self.call_sites[next_call_site];
|
||||
if call_site.ret_addr == idx as CodeOffset {
|
||||
sink.add_call_site(call_site.opcode, call_site.srcloc);
|
||||
next_call_site += 1;
|
||||
}
|
||||
}
|
||||
sink.put1(*byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MachSectionOutput for MachSection {
|
||||
fn cur_offset_from_start(&self) -> CodeOffset {
|
||||
self.start_offset + self.data.len() as CodeOffset
|
||||
}
|
||||
|
||||
fn start_offset(&self) -> CodeOffset {
|
||||
self.start_offset
|
||||
}
|
||||
|
||||
fn put1(&mut self, value: u8) {
|
||||
assert!(((self.data.len() + 1) as CodeOffset) <= self.length_limit);
|
||||
self.data.push(value);
|
||||
}
|
||||
|
||||
fn put_data(&mut self, data: &[u8]) {
|
||||
assert!(((self.data.len() + data.len()) as CodeOffset) <= self.length_limit);
|
||||
self.data.extend_from_slice(data);
|
||||
}
|
||||
|
||||
fn add_reloc(&mut self, srcloc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend) {
|
||||
let name = name.clone();
|
||||
self.relocs.push(MachReloc {
|
||||
offset: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
kind,
|
||||
name,
|
||||
addend,
|
||||
});
|
||||
}
|
||||
|
||||
fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) {
|
||||
self.traps.push(MachTrap {
|
||||
offset: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
code,
|
||||
});
|
||||
}
|
||||
|
||||
fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) {
|
||||
self.call_sites.push(MachCallSite {
|
||||
ret_addr: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
opcode,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// A MachSectionOutput implementation that records only size.
|
||||
pub struct MachSectionSize {
|
||||
/// The starting offset of this section.
|
||||
pub start_offset: CodeOffset,
|
||||
/// The current offset of this section.
|
||||
pub offset: CodeOffset,
|
||||
}
|
||||
|
||||
impl MachSectionSize {
|
||||
/// Create a new size-counting dummy section.
|
||||
pub fn new(start_offset: CodeOffset) -> MachSectionSize {
|
||||
MachSectionSize {
|
||||
start_offset,
|
||||
offset: start_offset,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the size this section would take if emitted with a real sink.
|
||||
pub fn size(&self) -> CodeOffset {
|
||||
self.offset - self.start_offset
|
||||
}
|
||||
}
|
||||
|
||||
impl MachSectionOutput for MachSectionSize {
|
||||
fn cur_offset_from_start(&self) -> CodeOffset {
|
||||
// All size-counting sections conceptually start at offset 0; this doesn't
|
||||
// matter when counting code size.
|
||||
self.offset
|
||||
}
|
||||
|
||||
fn start_offset(&self) -> CodeOffset {
|
||||
self.start_offset
|
||||
}
|
||||
|
||||
fn put1(&mut self, _: u8) {
|
||||
self.offset += 1;
|
||||
}
|
||||
|
||||
fn put_data(&mut self, data: &[u8]) {
|
||||
self.offset += data.len() as CodeOffset;
|
||||
}
|
||||
|
||||
fn add_reloc(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {}
|
||||
|
||||
fn add_trap(&mut self, _: SourceLoc, _: TrapCode) {}
|
||||
|
||||
fn add_call_site(&mut self, _: SourceLoc, _: Opcode) {}
|
||||
}
|
||||
|
||||
/// A relocation resulting from a compilation.
|
||||
pub struct MachReloc {
|
||||
/// The offset at which the relocation applies, *relative to the
|
||||
/// containing section*.
|
||||
pub offset: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The kind of relocation.
|
||||
pub kind: Reloc,
|
||||
/// The external symbol / name to which this relocation refers.
|
||||
pub name: ExternalName,
|
||||
/// The addend to add to the symbol value.
|
||||
pub addend: i64,
|
||||
}
|
||||
|
||||
/// A trap record resulting from a compilation.
|
||||
pub struct MachTrap {
|
||||
/// The offset at which the trap instruction occurs, *relative to the
|
||||
/// containing section*.
|
||||
pub offset: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The trap code.
|
||||
pub code: TrapCode,
|
||||
}
|
||||
|
||||
/// A call site record resulting from a compilation.
|
||||
pub struct MachCallSite {
|
||||
/// The offset of the call's return address, *relative to the containing section*.
|
||||
pub ret_addr: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The call's opcode.
|
||||
pub opcode: Opcode,
|
||||
}
|
||||
730
cranelift/codegen/src/machinst/vcode.rs
Normal file
730
cranelift/codegen/src/machinst/vcode.rs
Normal file
@@ -0,0 +1,730 @@
|
||||
//! This implements the VCode container: a CFG of Insts that have been lowered.
|
||||
//!
|
||||
//! VCode is virtual-register code. An instruction in VCode is almost a machine
|
||||
//! instruction; however, its register slots can refer to virtual registers in
|
||||
//! addition to real machine registers.
|
||||
//!
|
||||
//! VCode is structured with traditional basic blocks, and
|
||||
//! each block must be terminated by an unconditional branch (one target), a
|
||||
//! conditional branch (two targets), or a return (no targets). Note that this
|
||||
//! slightly differs from the machine code of most ISAs: in most ISAs, a
|
||||
//! conditional branch has one target (and the not-taken case falls through).
|
||||
//! However, we expect that machine backends will elide branches to the following
|
||||
//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
|
||||
//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
|
||||
//! play with layout prior to final binary emission, as well, if we want.
|
||||
//!
|
||||
//! See the main module comment in `mod.rs` for more details on the VCode-based
|
||||
//! backend pipeline.
|
||||
|
||||
use crate::ir;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
use regalloc::Function as RegallocFunction;
|
||||
use regalloc::Set as RegallocSet;
|
||||
use regalloc::{BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use log::debug;
|
||||
use smallvec::SmallVec;
|
||||
use std::fmt;
|
||||
use std::iter;
|
||||
use std::string::String;
|
||||
|
||||
/// Index referring to an instruction in VCode.
|
||||
pub type InsnIndex = u32;
|
||||
/// Index referring to a basic block in VCode.
|
||||
pub type BlockIndex = u32;
|
||||
|
||||
/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
|
||||
/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
|
||||
pub trait VCodeInst: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize> {}
|
||||
impl<I: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize>> VCodeInst for I {}
|
||||
|
||||
/// A function in "VCode" (virtualized-register code) form, after lowering.
|
||||
/// This is essentially a standard CFG of basic blocks, where each basic block
|
||||
/// consists of lowered instructions produced by the machine-specific backend.
|
||||
pub struct VCode<I: VCodeInst> {
|
||||
/// Function liveins.
|
||||
liveins: RegallocSet<RealReg>,
|
||||
|
||||
/// Function liveouts.
|
||||
liveouts: RegallocSet<RealReg>,
|
||||
|
||||
/// VReg IR-level types.
|
||||
vreg_types: Vec<Type>,
|
||||
|
||||
/// Lowered machine instructions in order corresponding to the original IR.
|
||||
insts: Vec<I>,
|
||||
|
||||
/// Entry block.
|
||||
entry: BlockIndex,
|
||||
|
||||
/// Block instruction indices.
|
||||
block_ranges: Vec<(InsnIndex, InsnIndex)>,
|
||||
|
||||
/// Block successors: index range in the successor-list below.
|
||||
block_succ_range: Vec<(usize, usize)>,
|
||||
|
||||
/// Block successor lists, concatenated into one Vec. The `block_succ_range`
|
||||
/// list of tuples above gives (start, end) ranges within this list that
|
||||
/// correspond to each basic block's successors.
|
||||
block_succs: Vec<BlockIndex>,
|
||||
|
||||
/// Block indices by IR block.
|
||||
block_by_bb: SecondaryMap<ir::Block, BlockIndex>,
|
||||
|
||||
/// IR block for each VCode Block. The length of this Vec will likely be
|
||||
/// less than the total number of Blocks, because new Blocks (for edge
|
||||
/// splits, for example) are appended during lowering.
|
||||
bb_by_block: Vec<ir::Block>,
|
||||
|
||||
/// Order of block IDs in final generated code.
|
||||
final_block_order: Vec<BlockIndex>,
|
||||
|
||||
/// Final block offsets. Computed during branch finalization and used
|
||||
/// during emission.
|
||||
final_block_offsets: Vec<CodeOffset>,
|
||||
|
||||
/// Size of code, accounting for block layout / alignment.
|
||||
code_size: CodeOffset,
|
||||
|
||||
/// ABI object.
|
||||
abi: Box<dyn ABIBody<I = I>>,
|
||||
}
|
||||
|
||||
/// A builder for a VCode function body. This builder is designed for the
|
||||
/// lowering approach that we take: we traverse basic blocks in forward
|
||||
/// (original IR) order, but within each basic block, we generate code from
|
||||
/// bottom to top; and within each IR instruction that we visit in this reverse
|
||||
/// order, we emit machine instructions in *forward* order again.
|
||||
///
|
||||
/// Hence, to produce the final instructions in proper order, we perform two
|
||||
/// swaps. First, the machine instructions (`I` instances) are produced in
|
||||
/// forward order for an individual IR instruction. Then these are *reversed*
|
||||
/// and concatenated to `bb_insns` at the end of the IR instruction lowering.
|
||||
/// The `bb_insns` vec will thus contain all machine instructions for a basic
|
||||
/// block, in reverse order. Finally, when we're done with a basic block, we
|
||||
/// reverse the whole block's vec of instructions again, and concatenate onto
|
||||
/// the VCode's insts.
|
||||
pub struct VCodeBuilder<I: VCodeInst> {
|
||||
/// In-progress VCode.
|
||||
vcode: VCode<I>,
|
||||
|
||||
/// Current basic block instructions, in reverse order (because blocks are
|
||||
/// built bottom-to-top).
|
||||
bb_insns: SmallVec<[I; 32]>,
|
||||
|
||||
/// Current IR-inst instructions, in forward order.
|
||||
ir_inst_insns: SmallVec<[I; 4]>,
|
||||
|
||||
/// Start of succs for the current block in the concatenated succs list.
|
||||
succ_start: usize,
|
||||
}
|
||||
|
||||
impl<I: VCodeInst> VCodeBuilder<I> {
|
||||
/// Create a new VCodeBuilder.
|
||||
pub fn new(abi: Box<dyn ABIBody<I = I>>) -> VCodeBuilder<I> {
|
||||
let vcode = VCode::new(abi);
|
||||
VCodeBuilder {
|
||||
vcode,
|
||||
bb_insns: SmallVec::new(),
|
||||
ir_inst_insns: SmallVec::new(),
|
||||
succ_start: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Access the ABI object.
|
||||
pub fn abi(&mut self) -> &mut dyn ABIBody<I = I> {
|
||||
&mut *self.vcode.abi
|
||||
}
|
||||
|
||||
/// Set the type of a VReg.
|
||||
pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
|
||||
while self.vcode.vreg_types.len() <= vreg.get_index() {
|
||||
self.vcode.vreg_types.push(ir::types::I8); // Default type.
|
||||
}
|
||||
self.vcode.vreg_types[vreg.get_index()] = ty;
|
||||
}
|
||||
|
||||
/// Return the underlying bb-to-BlockIndex map.
|
||||
pub fn blocks_by_bb(&self) -> &SecondaryMap<ir::Block, BlockIndex> {
|
||||
&self.vcode.block_by_bb
|
||||
}
|
||||
|
||||
/// Initialize the bb-to-BlockIndex map. Returns the first free
|
||||
/// BlockIndex.
|
||||
pub fn init_bb_map(&mut self, blocks: &[ir::Block]) -> BlockIndex {
|
||||
let mut bindex: BlockIndex = 0;
|
||||
for bb in blocks.iter() {
|
||||
self.vcode.block_by_bb[*bb] = bindex;
|
||||
self.vcode.bb_by_block.push(*bb);
|
||||
bindex += 1;
|
||||
}
|
||||
bindex
|
||||
}
|
||||
|
||||
/// Get the BlockIndex for an IR block.
|
||||
pub fn bb_to_bindex(&self, bb: ir::Block) -> BlockIndex {
|
||||
self.vcode.block_by_bb[bb]
|
||||
}
|
||||
|
||||
/// Set the current block as the entry block.
|
||||
pub fn set_entry(&mut self, block: BlockIndex) {
|
||||
self.vcode.entry = block;
|
||||
}
|
||||
|
||||
/// End the current IR instruction. Must be called after pushing any
|
||||
/// instructions and prior to ending the basic block.
|
||||
pub fn end_ir_inst(&mut self) {
|
||||
while let Some(i) = self.ir_inst_insns.pop() {
|
||||
self.bb_insns.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
/// End the current basic block. Must be called after emitting vcode insts
|
||||
/// for IR insts and prior to ending the function (building the VCode).
|
||||
pub fn end_bb(&mut self) -> BlockIndex {
|
||||
assert!(self.ir_inst_insns.is_empty());
|
||||
let block_num = self.vcode.block_ranges.len() as BlockIndex;
|
||||
// Push the instructions.
|
||||
let start_idx = self.vcode.insts.len() as InsnIndex;
|
||||
while let Some(i) = self.bb_insns.pop() {
|
||||
self.vcode.insts.push(i);
|
||||
}
|
||||
let end_idx = self.vcode.insts.len() as InsnIndex;
|
||||
// Add the instruction index range to the list of blocks.
|
||||
self.vcode.block_ranges.push((start_idx, end_idx));
|
||||
// End the successors list.
|
||||
let succ_end = self.vcode.block_succs.len();
|
||||
self.vcode
|
||||
.block_succ_range
|
||||
.push((self.succ_start, succ_end));
|
||||
self.succ_start = succ_end;
|
||||
|
||||
block_num
|
||||
}
|
||||
|
||||
/// Push an instruction for the current BB and current IR inst within the BB.
|
||||
pub fn push(&mut self, insn: I) {
|
||||
match insn.is_term() {
|
||||
MachTerminator::None | MachTerminator::Ret => {}
|
||||
MachTerminator::Uncond(target) => {
|
||||
self.vcode.block_succs.push(target);
|
||||
}
|
||||
MachTerminator::Cond(true_branch, false_branch) => {
|
||||
self.vcode.block_succs.push(true_branch);
|
||||
self.vcode.block_succs.push(false_branch);
|
||||
}
|
||||
MachTerminator::Indirect(targets) => {
|
||||
for target in targets {
|
||||
self.vcode.block_succs.push(*target);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.ir_inst_insns.push(insn);
|
||||
}
|
||||
|
||||
/// Build the final VCode.
|
||||
pub fn build(self) -> VCode<I> {
|
||||
assert!(self.ir_inst_insns.is_empty());
|
||||
assert!(self.bb_insns.is_empty());
|
||||
self.vcode
|
||||
}
|
||||
}
|
||||
|
||||
fn block_ranges(indices: &[InstIx], len: usize) -> Vec<(usize, usize)> {
|
||||
let v = indices
|
||||
.iter()
|
||||
.map(|iix| iix.get() as usize)
|
||||
.chain(iter::once(len))
|
||||
.collect::<Vec<usize>>();
|
||||
v.windows(2).map(|p| (p[0], p[1])).collect()
|
||||
}
|
||||
|
||||
fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
|
||||
if let Some((to, from)) = insn.is_move() {
|
||||
to.to_reg() == from
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> Option<BlockIndex> {
|
||||
let range = vcode.block_insns(BlockIx::new(block));
|
||||
|
||||
debug!(
|
||||
"is_trivial_jump_block: block {} has len {}",
|
||||
block,
|
||||
range.len()
|
||||
);
|
||||
|
||||
if range.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
let insn = range.first();
|
||||
|
||||
debug!(
|
||||
" -> only insn is: {:?} with terminator {:?}",
|
||||
vcode.get_insn(insn),
|
||||
vcode.get_insn(insn).is_term()
|
||||
);
|
||||
|
||||
match vcode.get_insn(insn).is_term() {
|
||||
MachTerminator::Uncond(target) => Some(target),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: VCodeInst> VCode<I> {
|
||||
/// New empty VCode.
|
||||
fn new(abi: Box<dyn ABIBody<I = I>>) -> VCode<I> {
|
||||
VCode {
|
||||
liveins: abi.liveins(),
|
||||
liveouts: abi.liveouts(),
|
||||
vreg_types: vec![],
|
||||
insts: vec![],
|
||||
entry: 0,
|
||||
block_ranges: vec![],
|
||||
block_succ_range: vec![],
|
||||
block_succs: vec![],
|
||||
block_by_bb: SecondaryMap::with_default(0),
|
||||
bb_by_block: vec![],
|
||||
final_block_order: vec![],
|
||||
final_block_offsets: vec![],
|
||||
code_size: 0,
|
||||
abi,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the IR-level type of a VReg.
|
||||
pub fn vreg_type(&self, vreg: VirtualReg) -> Type {
|
||||
self.vreg_types[vreg.get_index()]
|
||||
}
|
||||
|
||||
/// Get the entry block.
|
||||
pub fn entry(&self) -> BlockIndex {
|
||||
self.entry
|
||||
}
|
||||
|
||||
/// Get the number of blocks. Block indices will be in the range `0 ..
|
||||
/// (self.num_blocks() - 1)`.
|
||||
pub fn num_blocks(&self) -> usize {
|
||||
self.block_ranges.len()
|
||||
}
|
||||
|
||||
/// Stack frame size for the full function's body.
|
||||
pub fn frame_size(&self) -> u32 {
|
||||
self.abi.frame_size()
|
||||
}
|
||||
|
||||
/// Get the successors for a block.
|
||||
pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
|
||||
let (start, end) = self.block_succ_range[block as usize];
|
||||
&self.block_succs[start..end]
|
||||
}
|
||||
|
||||
/// Take the results of register allocation, with a sequence of
|
||||
/// instructions including spliced fill/reload/move instructions, and replace
|
||||
/// the VCode with them.
|
||||
pub fn replace_insns_from_regalloc(
|
||||
&mut self,
|
||||
result: RegAllocResult<Self>,
|
||||
flags: &settings::Flags,
|
||||
) {
|
||||
self.final_block_order = compute_final_block_order(self);
|
||||
|
||||
// Record the spillslot count and clobbered registers for the ABI/stack
|
||||
// setup code.
|
||||
self.abi.set_num_spillslots(result.num_spill_slots as usize);
|
||||
self.abi
|
||||
.set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r)));
|
||||
|
||||
// We want to move instructions over in final block order, using the new
|
||||
// block-start map given by the regalloc.
|
||||
let block_ranges: Vec<(usize, usize)> =
|
||||
block_ranges(result.target_map.elems(), result.insns.len());
|
||||
let mut final_insns = vec![];
|
||||
let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
|
||||
|
||||
for block in &self.final_block_order {
|
||||
let (start, end) = block_ranges[*block as usize];
|
||||
let final_start = final_insns.len() as InsnIndex;
|
||||
|
||||
if *block == self.entry {
|
||||
// Start with the prologue.
|
||||
final_insns.extend(self.abi.gen_prologue(flags).into_iter());
|
||||
}
|
||||
|
||||
for i in start..end {
|
||||
let insn = &result.insns[i];
|
||||
|
||||
// Elide redundant moves at this point (we only know what is
|
||||
// redundant once registers are allocated).
|
||||
if is_redundant_move(insn) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Whenever encountering a return instruction, replace it
|
||||
// with the epilogue.
|
||||
let is_ret = insn.is_term() == MachTerminator::Ret;
|
||||
if is_ret {
|
||||
final_insns.extend(self.abi.gen_epilogue(flags).into_iter());
|
||||
} else {
|
||||
final_insns.push(insn.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let final_end = final_insns.len() as InsnIndex;
|
||||
final_block_ranges[*block as usize] = (final_start, final_end);
|
||||
}
|
||||
|
||||
self.insts = final_insns;
|
||||
self.block_ranges = final_block_ranges;
|
||||
}
|
||||
|
||||
/// Removes redundant branches, rewriting targets to point directly to the
|
||||
/// ultimate block at the end of a chain of trivial one-target jumps.
|
||||
pub fn remove_redundant_branches(&mut self) {
|
||||
// For each block, compute the actual target block, looking through up to one
|
||||
// block with single-target jumps (this will remove empty edge blocks inserted
|
||||
// by phi-lowering).
|
||||
let block_rewrites: Vec<BlockIndex> = (0..self.num_blocks() as u32)
|
||||
.map(|bix| is_trivial_jump_block(self, bix).unwrap_or(bix))
|
||||
.collect();
|
||||
let mut refcounts: Vec<usize> = vec![0; self.num_blocks()];
|
||||
|
||||
debug!(
|
||||
"remove_redundant_branches: block_rewrites = {:?}",
|
||||
block_rewrites
|
||||
);
|
||||
|
||||
refcounts[self.entry as usize] = 1;
|
||||
|
||||
for block in 0..self.num_blocks() as u32 {
|
||||
for insn in self.block_insns(BlockIx::new(block)) {
|
||||
self.get_insn_mut(insn)
|
||||
.with_block_rewrites(&block_rewrites[..]);
|
||||
match self.get_insn(insn).is_term() {
|
||||
MachTerminator::Uncond(bix) => {
|
||||
refcounts[bix as usize] += 1;
|
||||
}
|
||||
MachTerminator::Cond(bix1, bix2) => {
|
||||
refcounts[bix1 as usize] += 1;
|
||||
refcounts[bix2 as usize] += 1;
|
||||
}
|
||||
MachTerminator::Indirect(blocks) => {
|
||||
for block in blocks {
|
||||
refcounts[*block as usize] += 1;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let deleted: Vec<bool> = refcounts.iter().map(|r| *r == 0).collect();
|
||||
|
||||
let block_order = std::mem::replace(&mut self.final_block_order, vec![]);
|
||||
self.final_block_order = block_order
|
||||
.into_iter()
|
||||
.filter(|b| !deleted[*b as usize])
|
||||
.collect();
|
||||
|
||||
// Rewrite successor information based on the block-rewrite map.
|
||||
for succ in &mut self.block_succs {
|
||||
let new_succ = block_rewrites[*succ as usize];
|
||||
*succ = new_succ;
|
||||
}
|
||||
}
|
||||
|
||||
/// Mutate branch instructions to (i) lower two-way condbrs to one-way,
|
||||
/// depending on fallthrough; and (ii) use concrete offsets.
|
||||
pub fn finalize_branches(&mut self)
|
||||
where
|
||||
I: MachInstEmit<MachSectionSize>,
|
||||
{
|
||||
// Compute fallthrough block, indexed by block.
|
||||
let num_final_blocks = self.final_block_order.len();
|
||||
let mut block_fallthrough: Vec<Option<BlockIndex>> = vec![None; self.num_blocks()];
|
||||
for i in 0..(num_final_blocks - 1) {
|
||||
let from = self.final_block_order[i];
|
||||
let to = self.final_block_order[i + 1];
|
||||
block_fallthrough[from as usize] = Some(to);
|
||||
}
|
||||
|
||||
// Pass over VCode instructions and finalize two-way branches into
|
||||
// one-way branches with fallthrough.
|
||||
for block in 0..self.num_blocks() {
|
||||
let next_block = block_fallthrough[block];
|
||||
let (start, end) = self.block_ranges[block];
|
||||
|
||||
for iix in start..end {
|
||||
let insn = &mut self.insts[iix as usize];
|
||||
insn.with_fallthrough_block(next_block);
|
||||
}
|
||||
}
|
||||
|
||||
// Compute block offsets.
|
||||
let mut code_section = MachSectionSize::new(0);
|
||||
let mut block_offsets = vec![0; self.num_blocks()];
|
||||
for &block in &self.final_block_order {
|
||||
code_section.offset = I::align_basic_block(code_section.offset);
|
||||
block_offsets[block as usize] = code_section.offset;
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
for iix in start..end {
|
||||
self.insts[iix as usize].emit(&mut code_section);
|
||||
}
|
||||
}
|
||||
|
||||
// We now have the section layout.
|
||||
self.final_block_offsets = block_offsets;
|
||||
self.code_size = code_section.size();
|
||||
|
||||
// Update branches with known block offsets. This looks like the
|
||||
// traversal above, but (i) does not update block_offsets, rather uses
|
||||
// it (so forward references are now possible), and (ii) mutates the
|
||||
// instructions.
|
||||
let mut code_section = MachSectionSize::new(0);
|
||||
for &block in &self.final_block_order {
|
||||
code_section.offset = I::align_basic_block(code_section.offset);
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
for iix in start..end {
|
||||
self.insts[iix as usize]
|
||||
.with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
|
||||
self.insts[iix as usize].emit(&mut code_section);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit the instructions to a list of sections.
|
||||
pub fn emit(&self) -> MachSections
|
||||
where
|
||||
I: MachInstEmit<MachSection>,
|
||||
{
|
||||
let mut sections = MachSections::new();
|
||||
let code_idx = sections.add_section(0, self.code_size);
|
||||
let code_section = sections.get_section(code_idx);
|
||||
|
||||
for &block in &self.final_block_order {
|
||||
let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
|
||||
while new_offset > code_section.cur_offset_from_start() {
|
||||
// Pad with NOPs up to the aligned block offset.
|
||||
let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
|
||||
nop.emit(code_section);
|
||||
}
|
||||
assert_eq!(code_section.cur_offset_from_start(), new_offset);
|
||||
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
for iix in start..end {
|
||||
self.insts[iix as usize].emit(code_section);
|
||||
}
|
||||
}
|
||||
|
||||
sections
|
||||
}
|
||||
|
||||
/// Get the IR block for a BlockIndex, if one exists.
|
||||
pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
|
||||
if (block as usize) < self.bb_by_block.len() {
|
||||
Some(self.bb_by_block[block as usize])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: VCodeInst> RegallocFunction for VCode<I> {
|
||||
type Inst = I;
|
||||
|
||||
fn insns(&self) -> &[I] {
|
||||
&self.insts[..]
|
||||
}
|
||||
|
||||
fn insns_mut(&mut self) -> &mut [I] {
|
||||
&mut self.insts[..]
|
||||
}
|
||||
|
||||
fn get_insn(&self, insn: InstIx) -> &I {
|
||||
&self.insts[insn.get() as usize]
|
||||
}
|
||||
|
||||
fn get_insn_mut(&mut self, insn: InstIx) -> &mut I {
|
||||
&mut self.insts[insn.get() as usize]
|
||||
}
|
||||
|
||||
fn blocks(&self) -> Range<BlockIx> {
|
||||
Range::new(BlockIx::new(0), self.block_ranges.len())
|
||||
}
|
||||
|
||||
fn entry_block(&self) -> BlockIx {
|
||||
BlockIx::new(self.entry)
|
||||
}
|
||||
|
||||
fn block_insns(&self, block: BlockIx) -> Range<InstIx> {
|
||||
let (start, end) = self.block_ranges[block.get() as usize];
|
||||
Range::new(InstIx::new(start), (end - start) as usize)
|
||||
}
|
||||
|
||||
fn block_succs(&self, block: BlockIx) -> Vec<BlockIx> {
|
||||
let (start, end) = self.block_succ_range[block.get() as usize];
|
||||
self.block_succs[start..end]
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(BlockIx::new)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn is_ret(&self, insn: InstIx) -> bool {
|
||||
match self.insts[insn.get() as usize].is_term() {
|
||||
MachTerminator::Ret => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_regs(insn: &I, collector: &mut RegUsageCollector) {
|
||||
insn.get_regs(collector)
|
||||
}
|
||||
|
||||
fn map_regs(
|
||||
insn: &mut I,
|
||||
pre_map: &RegallocMap<VirtualReg, RealReg>,
|
||||
post_map: &RegallocMap<VirtualReg, RealReg>,
|
||||
) {
|
||||
insn.map_regs(pre_map, post_map);
|
||||
}
|
||||
|
||||
fn is_move(&self, insn: &I) -> Option<(Writable<Reg>, Reg)> {
|
||||
insn.is_move()
|
||||
}
|
||||
|
||||
fn get_spillslot_size(&self, regclass: RegClass, vreg: VirtualReg) -> u32 {
|
||||
let ty = self.vreg_type(vreg);
|
||||
self.abi.get_spillslot_size(regclass, ty)
|
||||
}
|
||||
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: VirtualReg) -> I {
|
||||
let ty = self.vreg_type(vreg);
|
||||
self.abi.gen_spill(to_slot, from_reg, ty)
|
||||
}
|
||||
|
||||
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, vreg: VirtualReg) -> I {
|
||||
let ty = self.vreg_type(vreg);
|
||||
self.abi.gen_reload(to_reg, from_slot, ty)
|
||||
}
|
||||
|
||||
fn gen_move(&self, to_reg: Writable<RealReg>, from_reg: RealReg, vreg: VirtualReg) -> I {
|
||||
let ty = self.vreg_type(vreg);
|
||||
I::gen_move(to_reg.map(|r| r.to_reg()), from_reg.to_reg(), ty)
|
||||
}
|
||||
|
||||
fn gen_zero_len_nop(&self) -> I {
|
||||
I::gen_zero_len_nop()
|
||||
}
|
||||
|
||||
fn maybe_direct_reload(&self, insn: &I, reg: VirtualReg, slot: SpillSlot) -> Option<I> {
|
||||
insn.maybe_direct_reload(reg, slot)
|
||||
}
|
||||
|
||||
fn func_liveins(&self) -> RegallocSet<RealReg> {
|
||||
self.liveins.clone()
|
||||
}
|
||||
|
||||
fn func_liveouts(&self) -> RegallocSet<RealReg> {
|
||||
self.liveouts.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: VCodeInst> fmt::Debug for VCode<I> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
writeln!(f, "VCode_Debug {{")?;
|
||||
writeln!(f, " Entry block: {}", self.entry)?;
|
||||
writeln!(f, " Final block order: {:?}", self.final_block_order)?;
|
||||
|
||||
for block in 0..self.num_blocks() {
|
||||
writeln!(f, "Block {}:", block,)?;
|
||||
for succ in self.succs(block as BlockIndex) {
|
||||
writeln!(f, " (successor: Block {})", succ)?;
|
||||
}
|
||||
let (start, end) = self.block_ranges[block];
|
||||
writeln!(f, " (instruction range: {} .. {})", start, end)?;
|
||||
for inst in start..end {
|
||||
writeln!(f, " Inst {}: {:?}", inst, self.insts[inst as usize])?;
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(f, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Pretty-printing with `RealRegUniverse` context.
|
||||
impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
use std::fmt::Write;
|
||||
|
||||
// Calculate an order in which to display the blocks. This is the same
|
||||
// as final_block_order, but also includes blocks which are in the
|
||||
// representation but not in final_block_order.
|
||||
let mut display_order = Vec::<usize>::new();
|
||||
// First display blocks in `final_block_order`
|
||||
for bix in &self.final_block_order {
|
||||
assert!((*bix as usize) < self.num_blocks());
|
||||
display_order.push(*bix as usize);
|
||||
}
|
||||
// Now also take care of those not listed in `final_block_order`.
|
||||
// This is quadratic, but it's also debug-only code.
|
||||
for bix in 0..self.num_blocks() {
|
||||
if display_order.contains(&bix) {
|
||||
continue;
|
||||
}
|
||||
display_order.push(bix);
|
||||
}
|
||||
|
||||
let mut s = String::new();
|
||||
write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
|
||||
write!(&mut s, " Entry block: {}\n", self.entry).unwrap();
|
||||
write!(
|
||||
&mut s,
|
||||
" Final block order: {:?}\n",
|
||||
self.final_block_order
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
for i in 0..self.num_blocks() {
|
||||
let block = display_order[i];
|
||||
|
||||
let omitted = if !self.final_block_order.is_empty() && i >= self.final_block_order.len()
|
||||
{
|
||||
"** OMITTED **"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
write!(&mut s, "Block {}: {}\n", block, omitted).unwrap();
|
||||
if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
|
||||
write!(&mut s, " (original IR block: {})\n", bb).unwrap();
|
||||
}
|
||||
for succ in self.succs(block as BlockIndex) {
|
||||
write!(&mut s, " (successor: Block {})\n", succ).unwrap();
|
||||
}
|
||||
let (start, end) = self.block_ranges[block];
|
||||
write!(&mut s, " (instruction range: {} .. {})\n", start, end).unwrap();
|
||||
for inst in start..end {
|
||||
write!(
|
||||
&mut s,
|
||||
" Inst {}: {}\n",
|
||||
inst,
|
||||
self.insts[inst as usize].show_rru(mb_rru)
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
write!(&mut s, "}}}}\n").unwrap();
|
||||
|
||||
s
|
||||
}
|
||||
}
|
||||
52
cranelift/codegen/src/num_uses.rs
Normal file
52
cranelift/codegen/src/num_uses.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
//! A pass that computes the number of uses of any given instruction.
|
||||
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::ir::dfg::ValueDef;
|
||||
use crate::ir::Value;
|
||||
use crate::ir::{DataFlowGraph, Function, Inst};
|
||||
|
||||
/// Auxiliary data structure that counts the number of uses of any given
|
||||
/// instruction in a Function. This is used during instruction selection
|
||||
/// to essentially do incremental DCE: when an instruction is no longer
|
||||
/// needed because its computation has been isel'd into another machine
|
||||
/// instruction at every use site, we can skip it.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NumUses {
|
||||
uses: SecondaryMap<Inst, u32>,
|
||||
}
|
||||
|
||||
impl NumUses {
|
||||
fn new() -> NumUses {
|
||||
NumUses {
|
||||
uses: SecondaryMap::with_default(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the NumUses analysis result for a function.
|
||||
pub fn compute(func: &Function) -> NumUses {
|
||||
let mut uses = NumUses::new();
|
||||
for bb in func.layout.blocks() {
|
||||
for inst in func.layout.block_insts(bb) {
|
||||
for arg in func.dfg.inst_args(inst) {
|
||||
let v = func.dfg.resolve_aliases(*arg);
|
||||
uses.add_value(&func.dfg, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
uses
|
||||
}
|
||||
|
||||
fn add_value(&mut self, dfg: &DataFlowGraph, v: Value) {
|
||||
match dfg.value_def(v) {
|
||||
ValueDef::Result(inst, _) => {
|
||||
self.uses[inst] += 1;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Take the complete uses map, consuming this analysis result.
|
||||
pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
|
||||
self.uses
|
||||
}
|
||||
}
|
||||
@@ -360,10 +360,11 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI
|
||||
pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) {
|
||||
let _tt = timing::postopt();
|
||||
let mut pos = EncCursor::new(func, isa);
|
||||
let is_mach_backend = isa.get_mach_backend().is_some();
|
||||
while let Some(_block) = pos.next_block() {
|
||||
let mut last_flags_clobber = None;
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
if isa.uses_cpu_flags() {
|
||||
if !is_mach_backend && isa.uses_cpu_flags() {
|
||||
// Optimize instructions to make use of flags.
|
||||
optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
|
||||
|
||||
|
||||
@@ -28,10 +28,15 @@ pub fn verify_flags(
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
let _tt = timing::verify_flags();
|
||||
let encinfo = if isa.is_none() || isa.unwrap().get_mach_backend().is_some() {
|
||||
None
|
||||
} else {
|
||||
Some(isa.unwrap().encoding_info())
|
||||
};
|
||||
let mut verifier = FlagsVerifier {
|
||||
func,
|
||||
cfg,
|
||||
encinfo: isa.map(|isa| isa.encoding_info()),
|
||||
encinfo,
|
||||
livein: SecondaryMap::new(),
|
||||
};
|
||||
verifier.check(errors)
|
||||
|
||||
@@ -22,3 +22,4 @@ memmap = "0.7.0"
|
||||
num_cpus = "1.8.0"
|
||||
region = "2.1.2"
|
||||
byteorder = { version = "1.3.2", default-features = false }
|
||||
target-lexicon = "0.10"
|
||||
|
||||
243
cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif
Normal file
243
cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif
Normal file
@@ -0,0 +1,243 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = iadd.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: add x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = isub.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sub x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = imul.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: madd x0, x0, x1, xzr
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = umulhi.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: umulh x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = smulhi.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: smulh x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = sdiv.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sdiv x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i64 2
|
||||
v2 = sdiv.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x1, #2
|
||||
; nextln: sdiv x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = udiv.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: udiv x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i64 2
|
||||
v2 = udiv.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x1, #2
|
||||
; nextln: udiv x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = srem.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sdiv x2, x0, x1
|
||||
; nextln: msub x0, x2, x1, x0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = urem.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: udiv x2, x0, x1
|
||||
; nextln: msub x0, x2, x1, x0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = band.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: and x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = bor.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: orr x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = bxor.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: eor x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = band_not.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: bic x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = bor_not.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: orn x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = bxor_not.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: eon x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = bnot.i64 v0
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: orn x0, xzr, x0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
14
cranelift/filetests/filetests/vcode/aarch64/basic1.clif
Normal file
14
cranelift/filetests/filetests/vcode/aarch64/basic1.clif
Normal file
@@ -0,0 +1,14 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; check: mov fp, sp
|
||||
v2 = iadd v0, v1
|
||||
; check: add w0, w0, w1
|
||||
return v2
|
||||
; check: mov sp, fp
|
||||
; check: ldp fp, lr, [sp], #16
|
||||
; check: ret
|
||||
}
|
||||
158
cranelift/filetests/filetests/vcode/aarch64/bitops.clif
Normal file
158
cranelift/filetests/filetests/vcode/aarch64/bitops.clif
Normal file
@@ -0,0 +1,158 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %a(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = bitrev v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: rbit w0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %a(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = bitrev v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: rbit x0, x0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %b(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = clz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: clz w0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %b(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = clz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: clz x0, x0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %c(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: cls w0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %c(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: cls x0, x0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %d(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = ctz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: rbit w0, w0
|
||||
; nextln: clz w0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %d(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = ctz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: rbit x0, x0
|
||||
; nextln: clz x0, x0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %d(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsr x1, x0, #1
|
||||
; nextln: and x1, x1, #6148914691236517205
|
||||
; nextln: sub x1, x0, x1
|
||||
; nextln: and x0, x1, #3689348814741910323
|
||||
; nextln: lsr x1, x1, #2
|
||||
; nextln: and x1, x1, #3689348814741910323
|
||||
; nextln: add x0, x1, x0
|
||||
; nextln: add x0, x0, x0, LSR 4
|
||||
; nextln: and x0, x0, #1085102592571150095
|
||||
; nextln: add x0, x0, x0, LSL 8
|
||||
; nextln: add x0, x0, x0, LSL 16
|
||||
; nextln: add x0, x0, x0, LSL 32
|
||||
; nextln: lsr x0, x0, #56
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %d(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsr w1, w0, #1
|
||||
; nextln: and x1, x1, #6148914691236517205
|
||||
; nextln: sub x1, x0, x1
|
||||
; nextln: and x0, x1, #3689348814741910323
|
||||
; nextln: lsr x1, x1, #2
|
||||
; nextln: and x1, x1, #3689348814741910323
|
||||
; nextln: add x0, x1, x0
|
||||
; nextln: add x0, x0, x0, LSR 4
|
||||
; nextln: and x0, x0, #1085102592571150095
|
||||
; nextln: add x0, x0, x0, LSL 8
|
||||
; nextln: add x0, x0, x0, LSL 16
|
||||
; nextln: add x0, x0, x0, LSL 32
|
||||
; nextln: lsr x0, x0, #56
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
@@ -0,0 +1,16 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
sig0 = (i64) -> i64
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = call_indirect.i64 sig0, v1(v0)
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: blr x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
17
cranelift/filetests/filetests/vcode/aarch64/call.clif
Normal file
17
cranelift/filetests/filetests/vcode/aarch64/call.clif
Normal file
@@ -0,0 +1,17 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f(i64) -> i64 {
|
||||
fn0 = %g(i64) -> i64
|
||||
|
||||
block0(v0: i64):
|
||||
v1 = call fn0(v0)
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: bl 0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
66
cranelift/filetests/filetests/vcode/aarch64/condbr.clif
Normal file
66
cranelift/filetests/filetests/vcode/aarch64/condbr.clif
Normal file
@@ -0,0 +1,66 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f(i64, i64) -> b1 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = icmp eq v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: subs xzr, x0, x1
|
||||
; nextln: cset x0, eq
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = ifcmp v0, v1
|
||||
brif eq v2, block1
|
||||
jump block2
|
||||
|
||||
block1:
|
||||
v4 = iconst.i64 1
|
||||
return v4
|
||||
|
||||
block2:
|
||||
v5 = iconst.i64 2
|
||||
return v5
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: subs xzr, x0, x1
|
||||
; nextln: b.eq 20
|
||||
; check: Block 2:
|
||||
; check: movz x0, #2
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
; check: Block 1:
|
||||
; check: movz x0, #1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = ifcmp v0, v1
|
||||
brif eq v2, block1
|
||||
jump block1
|
||||
|
||||
block1:
|
||||
v4 = iconst.i64 1
|
||||
return v4
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: subs xzr, x0, x1
|
||||
; check: Block 1:
|
||||
; check: movz x0, #1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
43
cranelift/filetests/filetests/vcode/aarch64/condops.clif
Normal file
43
cranelift/filetests/filetests/vcode/aarch64/condops.clif
Normal file
@@ -0,0 +1,43 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f(i8, i64, i64) -> i64 {
|
||||
block0(v0: i8, v1: i64, v2: i64):
|
||||
v3 = iconst.i8 42
|
||||
v4 = ifcmp v0, v3
|
||||
v5 = selectif.i64 eq v4, v1, v2
|
||||
return v5
|
||||
}
|
||||
|
||||
; check: subs wzr
|
||||
; check: csel x0, $(=x[0-9]+, x[0-9]+), eq
|
||||
|
||||
function %g(i8) -> b1 {
|
||||
block0(v0: i8):
|
||||
v3 = iconst.i8 42
|
||||
v4 = ifcmp v0, v3
|
||||
v5 = trueif eq v4
|
||||
return v5
|
||||
}
|
||||
|
||||
; check: subs wzr
|
||||
; check: cset x0, eq
|
||||
|
||||
function %h(i8, i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8, v2: i8):
|
||||
v3 = bitselect.i8 v0, v1, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: and
|
||||
; nextln: bic
|
||||
; nextln: orr
|
||||
|
||||
function %i(b1, i8, i8) -> i8 {
|
||||
block0(v0: b1, v1: i8, v2: i8):
|
||||
v3 = select.i8 v0, v1, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: subs wzr
|
||||
; nextln: csel
|
||||
176
cranelift/filetests/filetests/vcode/aarch64/constants.clif
Normal file
176
cranelift/filetests/filetests/vcode/aarch64/constants.clif
Normal file
@@ -0,0 +1,176 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0xffff
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #65535
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0xffff0000
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #65535, LSL #16
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0xffff00000000
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #65535, LSL #32
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0xffff000000000000
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #65535, LSL #48
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0xffffffffffffffff
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movn x0, #0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0xffffffffffff0000
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movn x0, #65535
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0xffffffff0000ffff
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movn x0, #65535, LSL #16
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0xffff0000ffffffff
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movn x0, #65535, LSL #32
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0x0000ffffffffffff
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movn x0, #65535, LSL #48
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0xf34bf0a31212003a ; random digits
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #58
|
||||
; nextln: movk x0, #4626, LSL #16
|
||||
; nextln: movk x0, #61603, LSL #32
|
||||
; nextln: movk x0, #62283, LSL #48
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0x12e900001ef40000 ; random digits with 2 clear half words
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x0, #7924, LSL #16
|
||||
; nextln: movk x0, #4841, LSL #48
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f() -> i64 {
|
||||
block0:
|
||||
v0 = iconst.i64 0x12e9ffff1ef4ffff ; random digits with 2 full half words
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movn x0, #57611, LSL #16
|
||||
; nextln: movk x0, #4841, LSL #48
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
18
cranelift/filetests/filetests/vcode/aarch64/extend-op.clif
Normal file
18
cranelift/filetests/filetests/vcode/aarch64/extend-op.clif
Normal file
@@ -0,0 +1,18 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f(i8) -> i64 {
|
||||
block0(v0: i8):
|
||||
v1 = sextend.i64 v0
|
||||
v2 = iconst.i64 42
|
||||
v3 = iadd.i64 v2, v1
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: movz x1, #42
|
||||
; nextln: add x0, x1, x0, SXTB
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
44
cranelift/filetests/filetests/vcode/aarch64/jumptable.clif
Normal file
44
cranelift/filetests/filetests/vcode/aarch64/jumptable.clif
Normal file
@@ -0,0 +1,44 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f(i64) -> i64 {
|
||||
jt0 = jump_table [block1, block2, block3]
|
||||
|
||||
block0(v0: i64):
|
||||
br_table v0, block4, jt0
|
||||
|
||||
block1:
|
||||
v1 = iconst.i64 1
|
||||
jump block5(v1)
|
||||
|
||||
block2:
|
||||
v2 = iconst.i64 2
|
||||
jump block5(v2)
|
||||
|
||||
block3:
|
||||
v3 = iconst.i64 3
|
||||
jump block5(v3)
|
||||
|
||||
block4:
|
||||
v4 = iconst.i64 4
|
||||
jump block5(v4)
|
||||
|
||||
block5(v5: i64):
|
||||
v6 = iadd.i64 v0, v5
|
||||
return v6
|
||||
}
|
||||
|
||||
; check: subs wzr, w0, #3
|
||||
; nextln: b.hs
|
||||
; nextln: adr x2, pc+16 ; ldrsw x1, [x2, x0, LSL 2] ; add x2, x2, x1 ; br x2 ; jt_entries
|
||||
|
||||
; check: movz x1, #3
|
||||
; nextln: b
|
||||
|
||||
; check: movz x1, #2
|
||||
; nextln: b
|
||||
|
||||
; check: movz x1, #1
|
||||
|
||||
; check: add x0, x0, x1
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %add8(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = iadd.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: add w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %add16(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = iadd.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: add w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %add32(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = iadd.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: add w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %add32_8(i32, i8) -> i32 {
|
||||
block0(v0: i32, v1: i8):
|
||||
v2 = sextend.i32 v1
|
||||
v3 = iadd.i32 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: add w0, w0, w1, SXTB
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %add64_32(i64, i32) -> i64 {
|
||||
block0(v0: i64, v1: i32):
|
||||
v2 = sextend.i64 v1
|
||||
v3 = iadd.i64 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: add x0, x0, x1, SXTW
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
@@ -0,0 +1,36 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %uaddsat64(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = uadd_sat.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: mov v0.d[0], x0
|
||||
; nextln: mov v1.d[0], x1
|
||||
; nextln: uqadd d0, d0, d1
|
||||
; nextln: mov x0, v0.d[0]
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %uaddsat8(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = uadd_sat.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxtb x0, w0
|
||||
; nextln: uxtb x1, w1
|
||||
; nextln: mov v0.d[0], x0
|
||||
; nextln: mov v1.d[0], x1
|
||||
; nextln: uqadd d0, d0, d1
|
||||
; nextln: mov x0, v0.d[0]
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
17
cranelift/filetests/filetests/vcode/aarch64/shift-op.clif
Normal file
17
cranelift/filetests/filetests/vcode/aarch64/shift-op.clif
Normal file
@@ -0,0 +1,17 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i64 3
|
||||
v2 = ishl.i64 v0, v1
|
||||
v3 = iadd.i64 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: add x0, x0, x0, LSL 3
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
440
cranelift/filetests/filetests/vcode/aarch64/shift-rotate.clif
Normal file
440
cranelift/filetests/filetests/vcode/aarch64/shift-rotate.clif
Normal file
@@ -0,0 +1,440 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; ROR, variable
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %f0(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = rotr.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ror x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f1(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = rotr.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ror w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f2(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = rotr.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxth w0, w0
|
||||
; nextln: sub w2, w1, #16
|
||||
; nextln: sub w2, wzr, w2
|
||||
; nextln: lsr w1, w0, w1
|
||||
; nextln: lsl w0, w0, w2
|
||||
; nextln: orr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f3(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = rotr.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxtb w0, w0
|
||||
; nextln: sub w2, w1, #8
|
||||
; nextln: sub w2, wzr, w2
|
||||
; nextln: lsr w1, w0, w1
|
||||
; nextln: lsl w0, w0, w2
|
||||
; nextln: orr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; ROL, variable
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %f4(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = rotl.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sub w2, w1, #64
|
||||
; nextln: sub w2, wzr, w2
|
||||
; nextln: lsl x1, x0, x1
|
||||
; nextln: lsr x0, x0, x2
|
||||
; nextln: orr x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f5(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = rotl.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sub w2, w1, #32
|
||||
; nextln: sub w2, wzr, w2
|
||||
; nextln: lsl w1, w0, w1
|
||||
; nextln: lsr w0, w0, w2
|
||||
; nextln: orr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f6(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = rotl.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxth w0, w0
|
||||
; nextln: sub w2, w1, #16
|
||||
; nextln: sub w2, wzr, w2
|
||||
; nextln: lsl w1, w0, w1
|
||||
; nextln: lsr w0, w0, w2
|
||||
; nextln: orr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f7(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = rotl.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxtb w0, w0
|
||||
; nextln: sub w2, w1, #8
|
||||
; nextln: sub w2, wzr, w2
|
||||
; nextln: lsl w1, w0, w1
|
||||
; nextln: lsr w0, w0, w2
|
||||
; nextln: orr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; LSR, variable
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %f8(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = ushr.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsr x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f9(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = ushr.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f10(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = ushr.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxth w0, w0
|
||||
; nextln: lsr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f11(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = ushr.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxtb w0, w0
|
||||
; nextln: lsr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; LSL, variable
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %f12(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = ishl.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsl x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f13(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = ishl.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsl w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f14(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = ishl.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsl w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f15(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = ishl.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsl w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; ASR, variable
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %f16(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = sshr.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: asr x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f17(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = sshr.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: asr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f18(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = sshr.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sxth w0, w0
|
||||
; nextln: asr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f19(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = sshr.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sxtb w0, w0
|
||||
; nextln: asr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; immediate forms
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %f20(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i32 17
|
||||
v2 = rotr.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ror x0, x0, #17
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f21(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i32 17
|
||||
v2 = rotl.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsl x1, x0, #17
|
||||
; nextln: lsr x0, x0, #47
|
||||
; nextln: orr x0, x0, x1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f22(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i32 17
|
||||
v2 = rotl.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsl w1, w0, #17
|
||||
; nextln: lsr w0, w0, #15
|
||||
; nextln: orr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f23(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = iconst.i32 10
|
||||
v2 = rotl.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxth w0, w0
|
||||
; nextln: lsl w1, w0, #10
|
||||
; nextln: lsr w0, w0, #6
|
||||
; nextln: orr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f24(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = iconst.i32 3
|
||||
v2 = rotl.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxtb w0, w0
|
||||
; nextln: lsl w1, w0, #3
|
||||
; nextln: lsr w0, w0, #5
|
||||
; nextln: orr w0, w0, w1
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f25(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i32 17
|
||||
v2 = ushr.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsr x0, x0, #17
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f26(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i32 17
|
||||
v2 = sshr.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: asr x0, x0, #17
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f27(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i32 17
|
||||
v2 = ishl.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: lsl x0, x0, #17
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
@@ -0,0 +1,17 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f() -> i64 {
|
||||
gv0 = symbol %my_global
|
||||
|
||||
block0:
|
||||
v0 = symbol_value.i64 gv0
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: ldr x0, 8 ; b 12 ; data
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
29
cranelift/filetests/filetests/vcode/aarch64/traps.clif
Normal file
29
cranelift/filetests/filetests/vcode/aarch64/traps.clif
Normal file
@@ -0,0 +1,29 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f() {
|
||||
block0:
|
||||
trap user0
|
||||
}
|
||||
|
||||
; check: udf
|
||||
|
||||
function %g(i64) {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i64 42
|
||||
v2 = ifcmp v0, v1
|
||||
trapif eq v2, user0
|
||||
return
|
||||
}
|
||||
|
||||
; check: subs xzr, x0, #42
|
||||
; nextln: b.ne 8
|
||||
; nextln: udf
|
||||
|
||||
function %h() {
|
||||
block0:
|
||||
debugtrap
|
||||
return
|
||||
}
|
||||
|
||||
; check: brk #0
|
||||
158
cranelift/filetests/filetests/vcode/aarch64/uextend-sextend.clif
Normal file
158
cranelift/filetests/filetests/vcode/aarch64/uextend-sextend.clif
Normal file
@@ -0,0 +1,158 @@
|
||||
test vcode
|
||||
target aarch64
|
||||
|
||||
function %f_u_8_64(i8) -> i64 {
|
||||
block0(v0: i8):
|
||||
v1 = uextend.i64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxtb x0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f_u_8_32(i8) -> i32 {
|
||||
block0(v0: i8):
|
||||
v1 = uextend.i32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxtb w0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f_u_8_16(i8) -> i16 {
|
||||
block0(v0: i8):
|
||||
v1 = uextend.i16 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxtb w0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f_s_8_64(i8) -> i64 {
|
||||
block0(v0: i8):
|
||||
v1 = sextend.i64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sxtb x0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f_s_8_32(i8) -> i32 {
|
||||
block0(v0: i8):
|
||||
v1 = sextend.i32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sxtb w0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f_s_8_16(i8) -> i16 {
|
||||
block0(v0: i8):
|
||||
v1 = sextend.i16 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sxtb w0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f_u_16_64(i16) -> i64 {
|
||||
block0(v0: i16):
|
||||
v1 = uextend.i64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxth x0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f_u_16_32(i16) -> i32 {
|
||||
block0(v0: i16):
|
||||
v1 = uextend.i32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: uxth w0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f_s_16_64(i16) -> i64 {
|
||||
block0(v0: i16):
|
||||
v1 = sextend.i64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sxth x0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f_s_16_32(i16) -> i32 {
|
||||
block0(v0: i16):
|
||||
v1 = sextend.i32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sxth w0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f_u_32_64(i32) -> i64 {
|
||||
block0(v0: i32):
|
||||
v1 = uextend.i64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: mov w0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f_s_32_64(i32) -> i64 {
|
||||
block0(v0: i32):
|
||||
v1 = sextend.i64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: sxtw x0, w0
|
||||
; nextln: mov sp, fp
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
@@ -56,6 +56,7 @@ mod test_shrink;
|
||||
mod test_simple_gvn;
|
||||
mod test_simple_preopt;
|
||||
mod test_unwind;
|
||||
mod test_vcode;
|
||||
mod test_verifier;
|
||||
|
||||
/// The result of running the test in a file.
|
||||
@@ -134,6 +135,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::SubtestResult<Box<dyn subtest::
|
||||
"run" => test_run::subtest(parsed),
|
||||
"shrink" => test_shrink::subtest(parsed),
|
||||
"simple-gvn" => test_simple_gvn::subtest(parsed),
|
||||
"vcode" => test_vcode::subtest(parsed),
|
||||
"verifier" => test_verifier::subtest(parsed),
|
||||
"preopt" => test_preopt::subtest(parsed),
|
||||
"safepoint" => test_safepoint::subtest(parsed),
|
||||
|
||||
67
cranelift/filetests/src/test_vcode.rs
Normal file
67
cranelift/filetests/src/test_vcode.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult};
|
||||
use cranelift_codegen::ir::Function;
|
||||
use cranelift_codegen::isa::lookup;
|
||||
use cranelift_codegen::settings;
|
||||
use cranelift_codegen::Context as CodegenContext;
|
||||
use cranelift_reader::{TestCommand, TestOption};
|
||||
|
||||
use log::info;
|
||||
use std::borrow::Cow;
|
||||
use std::string::String;
|
||||
|
||||
struct TestVCode {
|
||||
arch: String,
|
||||
}
|
||||
|
||||
pub fn subtest(parsed: &TestCommand) -> SubtestResult<Box<dyn SubTest>> {
|
||||
assert_eq!(parsed.command, "vcode");
|
||||
|
||||
let mut arch = "arm64".to_string();
|
||||
for option in &parsed.options {
|
||||
match option {
|
||||
TestOption::Value(k, v) if k == &"arch" => {
|
||||
arch = v.to_string();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Box::new(TestVCode { arch }))
|
||||
}
|
||||
|
||||
impl SubTest for TestVCode {
|
||||
fn name(&self) -> &'static str {
|
||||
"vcode"
|
||||
}
|
||||
|
||||
fn is_mutating(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn needs_isa(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn run(&self, func: Cow<Function>, context: &Context) -> SubtestResult<()> {
|
||||
let triple = context.isa.unwrap().triple().clone();
|
||||
let func = func.into_owned();
|
||||
|
||||
let mut isa = lookup(triple)
|
||||
.map_err(|_| format!("Could not look up backend for arch '{}'", self.arch))?
|
||||
.finish(settings::Flags::new(settings::builder()));
|
||||
|
||||
let mut codectx = CodegenContext::for_function(func);
|
||||
codectx.set_disasm(true);
|
||||
|
||||
codectx
|
||||
.compile(&mut *isa)
|
||||
.map_err(|e| format!("Could not compile with arch '{}': {:?}", self.arch, e))?;
|
||||
|
||||
let result = codectx.mach_compile_result.take().unwrap();
|
||||
let text = result.disasm.unwrap();
|
||||
|
||||
info!("text input to filecheck is:\n{}\n", text);
|
||||
|
||||
run_filecheck(&text, context)
|
||||
}
|
||||
}
|
||||
@@ -49,42 +49,42 @@ fn handle_module(
|
||||
|
||||
// If we have an isa from the command-line, use that. Otherwise if the
|
||||
// file contains a unique isa, use that.
|
||||
let isa = if let Some(isa) = fisa.isa {
|
||||
isa
|
||||
} else if let Some(isa) = test_file.isa_spec.unique_isa() {
|
||||
isa
|
||||
} else {
|
||||
let isa = fisa.isa.or(test_file.isa_spec.unique_isa());
|
||||
|
||||
if isa.is_none() {
|
||||
return Err(String::from("compilation requires a target isa"));
|
||||
};
|
||||
|
||||
for (func, _) in test_file.functions {
|
||||
let mut context = Context::new();
|
||||
context.func = func;
|
||||
|
||||
let mut relocs = PrintRelocs::new(flag_print);
|
||||
let mut traps = PrintTraps::new(flag_print);
|
||||
let mut stackmaps = PrintStackmaps::new(flag_print);
|
||||
let mut mem = vec![];
|
||||
|
||||
// Compile and encode the result to machine code.
|
||||
let code_info = context
|
||||
.compile_and_emit(isa, &mut mem, &mut relocs, &mut traps, &mut stackmaps)
|
||||
.map_err(|err| pretty_error(&context.func, Some(isa), err))?;
|
||||
if let Some(isa) = isa {
|
||||
let mut context = Context::new();
|
||||
context.func = func;
|
||||
let mut mem = vec![];
|
||||
|
||||
if flag_print {
|
||||
println!("{}", context.func.display(isa));
|
||||
}
|
||||
// Compile and encode the result to machine code.
|
||||
let code_info = context
|
||||
.compile_and_emit(isa, &mut mem, &mut relocs, &mut traps, &mut stackmaps)
|
||||
.map_err(|err| pretty_error(&context.func, Some(isa), err))?;
|
||||
|
||||
if flag_disasm {
|
||||
print_all(
|
||||
isa,
|
||||
&mem,
|
||||
code_info.code_size,
|
||||
code_info.jumptables_size + code_info.rodata_size,
|
||||
&relocs,
|
||||
&traps,
|
||||
&stackmaps,
|
||||
)?;
|
||||
if flag_print {
|
||||
println!("{}", context.func.display(isa));
|
||||
}
|
||||
|
||||
if flag_disasm {
|
||||
print_all(
|
||||
isa,
|
||||
&mem,
|
||||
code_info.code_size,
|
||||
code_info.jumptables_size + code_info.rodata_size,
|
||||
&relocs,
|
||||
&traps,
|
||||
&stackmaps,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
use crate::Compilation;
|
||||
use cranelift_codegen::binemit::Reloc;
|
||||
use std::ptr::write_unaligned;
|
||||
use std::ptr::{read_unaligned, write_unaligned};
|
||||
use wasmtime_environ::{Module, Relocation, RelocationTarget};
|
||||
use wasmtime_runtime::libcalls;
|
||||
use wasmtime_runtime::VMFunctionBody;
|
||||
@@ -101,6 +101,23 @@ fn apply_reloc(
|
||||
Reloc::X86PCRelRodata4 => {
|
||||
// ignore
|
||||
}
|
||||
Reloc::Arm64Call => unsafe {
|
||||
let reloc_address = body.add(r.offset as usize) as usize;
|
||||
let reloc_addend = r.addend as isize;
|
||||
let reloc_delta = (target_func_address as u64).wrapping_sub(reloc_address as u64);
|
||||
// TODO: come up with a PLT-like solution for longer calls. We can't extend the
|
||||
// code segment at this point, but we could conservatively allocate space at the
|
||||
// end of the function during codegen, a fixed amount per call, to allow for
|
||||
// potential branch islands.
|
||||
assert!((reloc_delta as i64) < (1 << 27));
|
||||
assert!((reloc_delta as i64) >= -(1 << 27));
|
||||
let reloc_delta = reloc_delta as u32;
|
||||
let reloc_delta = reloc_delta.wrapping_add(reloc_addend as u32);
|
||||
let delta_bits = reloc_delta >> 2;
|
||||
let insn = read_unaligned(reloc_address as *const u32);
|
||||
let new_insn = (insn & 0xfc00_0000) | (delta_bits & 0x03ff_ffff);
|
||||
write_unaligned(reloc_address as *mut u32, new_insn);
|
||||
},
|
||||
_ => panic!("unsupported reloc kind"),
|
||||
}
|
||||
}
|
||||
@@ -108,14 +125,11 @@ fn apply_reloc(
|
||||
// A declaration for the stack probe function in Rust's standard library, for
|
||||
// catching callstack overflow.
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(any(
|
||||
target_arch="aarch64",
|
||||
all(
|
||||
if #[cfg(all(
|
||||
target_os = "windows",
|
||||
target_env = "msvc",
|
||||
target_pointer_width = "64"
|
||||
)
|
||||
))] {
|
||||
))] {
|
||||
extern "C" {
|
||||
pub fn __chkstk();
|
||||
}
|
||||
@@ -128,6 +142,13 @@ cfg_if::cfg_if! {
|
||||
pub fn ___chkstk();
|
||||
}
|
||||
const PROBESTACK: unsafe extern "C" fn() = ___chkstk;
|
||||
} else if #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] {
|
||||
// As per
|
||||
// https://github.com/rust-lang/compiler-builtins/blob/cae3e6ea23739166504f9f9fb50ec070097979d4/src/probestack.rs#L39,
|
||||
// LLVM only has stack-probe support on x86-64 and x86. Thus, on any other CPU
|
||||
// architecture, we simply use an empty stack-probe function.
|
||||
extern "C" fn empty_probestack() {}
|
||||
const PROBESTACK: unsafe extern "C" fn() = empty_probestack;
|
||||
} else {
|
||||
extern "C" {
|
||||
pub fn __rust_probestack();
|
||||
|
||||
@@ -26,3 +26,12 @@ void* GetPcFromUContext(ucontext_t *cx) {
|
||||
return (void*) cx->uc_mcontext->__ss.__rip;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && defined(__aarch64__)
|
||||
#include <sys/ucontext.h>
|
||||
|
||||
void* GetPcFromUContext(ucontext_t *cx) {
|
||||
return (void*) cx->uc_mcontext.pc;
|
||||
}
|
||||
|
||||
#endif // __linux__ && __aarch64__
|
||||
|
||||
@@ -156,6 +156,12 @@ cfg_if::cfg_if! {
|
||||
if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] {
|
||||
let cx = &*(cx as *const libc::ucontext_t);
|
||||
cx.uc_mcontext.gregs[libc::REG_RIP as usize] as *const u8
|
||||
} else if #[cfg(all(target_os = "linux", target_arch = "aarch64"))] {
|
||||
// libc doesn't seem to support Linux/aarch64 at the moment?
|
||||
extern "C" {
|
||||
fn GetPcFromUContext(cx: *mut libc::c_void) -> *const u8;
|
||||
}
|
||||
GetPcFromUContext(cx)
|
||||
} else if #[cfg(target_os = "macos")] {
|
||||
// FIXME(rust-lang/libc#1702) - once that lands and is
|
||||
// released we should inline the definition here
|
||||
|
||||
Reference in New Issue
Block a user