Merge pull request #1494 from cfallin/arm64-merge

Add new `MachInst` backend and ARM64 support.
This commit is contained in:
Chris Fallin
2020-04-16 10:02:02 -07:00
committed by GitHub
63 changed files with 16668 additions and 322 deletions

View File

@@ -0,0 +1,885 @@
//! Implementation of the standard AArch64 ABI.
use crate::ir;
use crate::ir::types;
use crate::ir::types::*;
use crate::ir::StackSlot;
use crate::isa;
use crate::isa::aarch64::inst::*;
use crate::machinst::*;
use crate::settings;
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
use log::debug;
/// A location for an argument or return value.
#[derive(Clone, Copy, Debug)]
enum ABIArg {
/// In a real register.
Reg(RealReg, ir::Type),
/// Arguments only: on stack, at given offset from SP at entry.
Stack(i64, ir::Type),
}
/// AArch64 ABI information shared between body (callee) and caller.
struct ABISig {
args: Vec<ABIArg>,
rets: Vec<ABIArg>,
stack_arg_space: i64,
call_conv: isa::CallConv,
}
// Spidermonkey specific ABI convention.
/// This is SpiderMonkey's `WasmTableCallSigReg`.
static BALDRDASH_SIG_REG: u8 = 10;
/// This is SpiderMonkey's `WasmTlsReg`.
static BALDRDASH_TLS_REG: u8 = 23;
// These two lists represent the registers the JIT may *not* use at any point in generated code.
//
// So these are callee-preserved from the JIT's point of view, and every register not in this list
// has to be caller-preserved by definition.
//
// Keep these lists in sync with the NonAllocatableMask set in Spidermonkey's
// Architecture-arm64.cpp.
// Indexed by physical register number.
#[rustfmt::skip]
static BALDRDASH_JIT_CALLEE_SAVED_GPR: &[bool] = &[
/* 0 = */ false, false, false, false, false, false, false, false,
/* 8 = */ false, false, false, false, false, false, false, false,
/* 16 = */ true /* x16 / ip1 */, true /* x17 / ip2 */, true /* x18 / TLS */, false,
/* 20 = */ false, false, false, false,
/* 24 = */ false, false, false, false,
// There should be 28, the pseudo stack pointer in this list, however the wasm stubs trash it
// gladly right now.
/* 28 = */ false, false, true /* x30 = FP */, true /* x31 = SP */
];
#[rustfmt::skip]
static BALDRDASH_JIT_CALLEE_SAVED_FPU: &[bool] = &[
/* 0 = */ false, false, false, false, false, false, false, false,
/* 8 = */ false, false, false, false, false, false, false, false,
/* 16 = */ false, false, false, false, false, false, false, false,
/* 24 = */ false, false, false, false, false, false, false, true /* v31 / d31 */
];
/// Try to fill a Baldrdash register, returning it if it was found.
fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
if call_conv.extends_baldrdash() {
match &param.purpose {
&ir::ArgumentPurpose::VMContext => {
// This is SpiderMonkey's `WasmTlsReg`.
Some(ABIArg::Reg(
xreg(BALDRDASH_TLS_REG).to_real_reg(),
ir::types::I64,
))
}
&ir::ArgumentPurpose::SignatureId => {
// This is SpiderMonkey's `WasmTableCallSigReg`.
Some(ABIArg::Reg(
xreg(BALDRDASH_SIG_REG).to_real_reg(),
ir::types::I64,
))
}
_ => None,
}
} else {
None
}
}
/// Process a list of parameters or return values and allocate them to X-regs,
/// V-regs, and stack slots.
///
/// Returns the list of argument locations, and the stack-space used (rounded up
/// to a 16-byte-aligned boundary).
fn compute_arg_locs(call_conv: isa::CallConv, params: &[ir::AbiParam]) -> (Vec<ABIArg>, i64) {
// See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
let mut next_xreg = 0;
let mut next_vreg = 0;
let mut next_stack: u64 = 0;
let mut ret = vec![];
for param in params {
// Validate "purpose".
match &param.purpose {
&ir::ArgumentPurpose::VMContext
| &ir::ArgumentPurpose::Normal
| &ir::ArgumentPurpose::SignatureId => {}
_ => panic!(
"Unsupported argument purpose {:?} in signature: {:?}",
param.purpose, params
),
}
if in_int_reg(param.value_type) {
if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
ret.push(param);
} else if next_xreg < 8 {
ret.push(ABIArg::Reg(xreg(next_xreg).to_real_reg(), param.value_type));
next_xreg += 1;
} else {
ret.push(ABIArg::Stack(next_stack as i64, param.value_type));
next_stack += 8;
}
} else if in_vec_reg(param.value_type) {
if next_vreg < 8 {
ret.push(ABIArg::Reg(vreg(next_vreg).to_real_reg(), param.value_type));
next_vreg += 1;
} else {
let size: u64 = match param.value_type {
F32 | F64 => 8,
_ => panic!("Unsupported vector-reg argument type"),
};
// Align.
assert!(size.is_power_of_two());
next_stack = (next_stack + size - 1) & !(size - 1);
ret.push(ABIArg::Stack(next_stack as i64, param.value_type));
next_stack += size;
}
}
}
next_stack = (next_stack + 15) & !15;
(ret, next_stack as i64)
}
impl ABISig {
fn from_func_sig(sig: &ir::Signature) -> ABISig {
// Compute args and retvals from signature.
// TODO: pass in arg-mode or ret-mode. (Does not matter
// for the types of arguments/return values that we support.)
let (args, stack_arg_space) = compute_arg_locs(sig.call_conv, &sig.params);
let (rets, _) = compute_arg_locs(sig.call_conv, &sig.returns);
// Verify that there are no return values on the stack.
assert!(rets.iter().all(|a| match a {
&ABIArg::Stack(..) => false,
_ => true,
}));
ABISig {
args,
rets,
stack_arg_space,
call_conv: sig.call_conv,
}
}
}
/// AArch64 ABI object for a function body.
pub struct AArch64ABIBody {
/// signature: arg and retval regs
sig: ABISig,
/// offsets to each stackslot
stackslots: Vec<u32>,
/// total stack size of all stackslots
stackslots_size: u32,
/// clobbered registers, from regalloc.
clobbered: Set<Writable<RealReg>>,
/// total number of spillslots, from regalloc.
spillslots: Option<usize>,
/// Total frame size.
frame_size: Option<u32>,
/// Calling convention this function expects.
call_conv: isa::CallConv,
}
fn in_int_reg(ty: ir::Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 | types::I64 => true,
types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
_ => false,
}
}
fn in_vec_reg(ty: ir::Type) -> bool {
match ty {
types::F32 | types::F64 => true,
_ => false,
}
}
impl AArch64ABIBody {
/// Create a new body ABI instance.
pub fn new(f: &ir::Function) -> Self {
debug!("AArch64 ABI: func signature {:?}", f.signature);
let sig = ABISig::from_func_sig(&f.signature);
let call_conv = f.signature.call_conv;
// Only these calling conventions are supported.
assert!(
call_conv == isa::CallConv::SystemV
|| call_conv == isa::CallConv::Fast
|| call_conv == isa::CallConv::Cold
|| call_conv.extends_baldrdash(),
"Unsupported calling convention: {:?}",
call_conv
);
// Compute stackslot locations and total stackslot size.
let mut stack_offset: u32 = 0;
let mut stackslots = vec![];
for (stackslot, data) in f.stack_slots.iter() {
let off = stack_offset;
stack_offset += data.size;
stack_offset = (stack_offset + 7) & !7;
assert_eq!(stackslot.as_u32() as usize, stackslots.len());
stackslots.push(off);
}
Self {
sig,
stackslots,
stackslots_size: stack_offset,
clobbered: Set::empty(),
spillslots: None,
frame_size: None,
call_conv,
}
}
}
fn load_stack(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst {
let mem = MemArg::FPOffset(fp_offset);
match ty {
types::B1
| types::B8
| types::I8
| types::B16
| types::I16
| types::B32
| types::I32
| types::B64
| types::I64 => Inst::ULoad64 {
rd: into_reg,
mem,
srcloc: None,
},
types::F32 => Inst::FpuLoad32 {
rd: into_reg,
mem,
srcloc: None,
},
types::F64 => Inst::FpuLoad64 {
rd: into_reg,
mem,
srcloc: None,
},
_ => unimplemented!("load_stack({})", ty),
}
}
fn store_stack(fp_offset: i64, from_reg: Reg, ty: Type) -> Inst {
let mem = MemArg::FPOffset(fp_offset);
match ty {
types::B1
| types::B8
| types::I8
| types::B16
| types::I16
| types::B32
| types::I32
| types::B64
| types::I64 => Inst::Store64 {
rd: from_reg,
mem,
srcloc: None,
},
types::F32 => Inst::FpuStore32 {
rd: from_reg,
mem,
srcloc: None,
},
types::F64 => Inst::FpuStore64 {
rd: from_reg,
mem,
srcloc: None,
},
_ => unimplemented!("store_stack({})", ty),
}
}
fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool {
if call_conv.extends_baldrdash() {
match r.get_class() {
RegClass::I64 => {
let enc = r.get_hw_encoding();
if BALDRDASH_JIT_CALLEE_SAVED_GPR[enc] {
return true;
}
// Otherwise, fall through to preserve native ABI registers.
}
RegClass::V128 => {
let enc = r.get_hw_encoding();
if BALDRDASH_JIT_CALLEE_SAVED_FPU[enc] {
return true;
}
// Otherwise, fall through to preserve native ABI registers.
}
_ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
};
}
match r.get_class() {
RegClass::I64 => {
// x19 - x28 inclusive are callee-saves.
r.get_hw_encoding() >= 19 && r.get_hw_encoding() <= 28
}
RegClass::V128 => {
// v8 - v15 inclusive are callee-saves.
r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15
}
_ => panic!("Unexpected RegClass"),
}
}
fn get_callee_saves(
call_conv: isa::CallConv,
regs: Vec<Writable<RealReg>>,
) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
let mut int_saves = vec![];
let mut vec_saves = vec![];
for reg in regs.into_iter() {
if is_callee_save(call_conv, reg.to_reg()) {
match reg.to_reg().get_class() {
RegClass::I64 => int_saves.push(reg),
RegClass::V128 => vec_saves.push(reg),
_ => panic!("Unexpected RegClass"),
}
}
}
(int_saves, vec_saves)
}
fn is_caller_save(call_conv: isa::CallConv, r: RealReg) -> bool {
if call_conv.extends_baldrdash() {
match r.get_class() {
RegClass::I64 => {
let enc = r.get_hw_encoding();
if !BALDRDASH_JIT_CALLEE_SAVED_GPR[enc] {
return true;
}
// Otherwise, fall through to preserve native's ABI caller-saved.
}
RegClass::V128 => {
let enc = r.get_hw_encoding();
if !BALDRDASH_JIT_CALLEE_SAVED_FPU[enc] {
return true;
}
// Otherwise, fall through to preserve native's ABI caller-saved.
}
_ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
};
}
match r.get_class() {
RegClass::I64 => {
// x0 - x17 inclusive are caller-saves.
r.get_hw_encoding() <= 17
}
RegClass::V128 => {
// v0 - v7 inclusive and v16 - v31 inclusive are caller-saves.
r.get_hw_encoding() <= 7 || (r.get_hw_encoding() >= 16 && r.get_hw_encoding() <= 31)
}
_ => panic!("Unexpected RegClass"),
}
}
fn get_caller_saves_set(call_conv: isa::CallConv) -> Set<Writable<Reg>> {
let mut set = Set::empty();
for i in 0..29 {
let x = writable_xreg(i);
if is_caller_save(call_conv, x.to_reg().to_real_reg()) {
set.insert(x);
}
}
for i in 0..32 {
let v = writable_vreg(i);
if is_caller_save(call_conv, v.to_reg().to_real_reg()) {
set.insert(v);
}
}
set
}
impl ABIBody for AArch64ABIBody {
type I = Inst;
fn liveins(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for &arg in &self.sig.args {
if let ABIArg::Reg(r, _) = arg {
set.insert(r);
}
}
set
}
fn liveouts(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for &ret in &self.sig.rets {
if let ABIArg::Reg(r, _) = ret {
set.insert(r);
}
}
set
}
fn num_args(&self) -> usize {
self.sig.args.len()
}
fn num_retvals(&self) -> usize {
self.sig.rets.len()
}
fn num_stackslots(&self) -> usize {
self.stackslots.len()
}
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Inst {
match &self.sig.args[idx] {
&ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty),
&ABIArg::Stack(off, ty) => load_stack(off + 16, into_reg, ty),
}
}
fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> Inst {
match &self.sig.rets[idx] {
&ABIArg::Reg(r, ty) => Inst::gen_move(Writable::from_reg(r.to_reg()), from_reg, ty),
&ABIArg::Stack(off, ty) => store_stack(off + 16, from_reg, ty),
}
}
fn gen_ret(&self) -> Inst {
Inst::Ret {}
}
fn gen_epilogue_placeholder(&self) -> Inst {
Inst::EpiloguePlaceholder {}
}
fn set_num_spillslots(&mut self, slots: usize) {
self.spillslots = Some(slots);
}
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
self.clobbered = clobbered;
}
fn load_stackslot(
&self,
slot: StackSlot,
offset: u32,
ty: Type,
into_reg: Writable<Reg>,
) -> Inst {
// Offset from beginning of stackslot area, which is at FP - stackslots_size.
let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
load_stack(fp_off, into_reg, ty)
}
fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Inst {
// Offset from beginning of stackslot area, which is at FP - stackslots_size.
let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
store_stack(fp_off, from_reg, ty)
}
// Load from a spillslot.
fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Inst {
// Note that when spills/fills are generated, we don't yet know how many
// spillslots there will be, so we allocate *downward* from the beginning
// of the stackslot area. Hence: FP - stackslot_size - 8*spillslot -
// sizeof(ty).
let islot = slot.get() as i64;
let ty_size = self.get_spillslot_size(into_reg.to_reg().get_class(), ty) * 8;
let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64;
load_stack(fp_off, into_reg, ty)
}
// Store to a spillslot.
fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst {
let islot = slot.get() as i64;
let ty_size = self.get_spillslot_size(from_reg.get_class(), ty) * 8;
let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64;
store_stack(fp_off, from_reg, ty)
}
fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<Inst> {
let mut insts = vec![];
if !self.call_conv.extends_baldrdash() {
// stp fp (x29), lr (x30), [sp, #-16]!
insts.push(Inst::StoreP64 {
rt: fp_reg(),
rt2: link_reg(),
mem: PairMemArg::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
),
});
// mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
// the usual encoding (`ORR`) does not work with SP.
insts.push(Inst::AluRRImm12 {
alu_op: ALUOp::Add64,
rd: writable_fp_reg(),
rn: stack_reg(),
imm12: Imm12 {
bits: 0,
shift12: false,
},
});
}
let mut total_stacksize = self.stackslots_size + 8 * self.spillslots.unwrap() as u32;
if self.call_conv.extends_baldrdash() {
debug_assert!(
!flags.enable_probestack(),
"baldrdash does not expect cranelift to emit stack probes"
);
total_stacksize += flags.baldrdash_prologue_words() as u32 * 8;
}
let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack.
if !self.call_conv.extends_baldrdash() && total_stacksize > 0 {
// sub sp, sp, #total_stacksize
if let Some(imm12) = Imm12::maybe_from_u64(total_stacksize as u64) {
let sub_inst = Inst::AluRRImm12 {
alu_op: ALUOp::Sub64,
rd: writable_stack_reg(),
rn: stack_reg(),
imm12,
};
insts.push(sub_inst);
} else {
let tmp = writable_spilltmp_reg();
let const_inst = Inst::LoadConst64 {
rd: tmp,
const_data: total_stacksize as u64,
};
let sub_inst = Inst::AluRRRExtend {
alu_op: ALUOp::Sub64,
rd: writable_stack_reg(),
rn: stack_reg(),
rm: tmp.to_reg(),
extendop: ExtendOp::UXTX,
};
insts.push(const_inst);
insts.push(sub_inst);
}
}
// Save clobbered registers.
let (clobbered_int, clobbered_vec) =
get_callee_saves(self.call_conv, self.clobbered.to_vec());
for reg_pair in clobbered_int.chunks(2) {
let (r1, r2) = if reg_pair.len() == 2 {
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
(reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg())
} else {
(reg_pair[0].to_reg().to_reg(), zero_reg())
};
debug_assert!(r1.get_class() == RegClass::I64);
debug_assert!(r2.get_class() == RegClass::I64);
// stp r1, r2, [sp, #-16]!
insts.push(Inst::StoreP64 {
rt: r1,
rt2: r2,
mem: PairMemArg::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
),
});
}
let vec_save_bytes = clobbered_vec.len() * 16;
if vec_save_bytes != 0 {
insts.push(Inst::AluRRImm12 {
alu_op: ALUOp::Sub64,
rd: writable_stack_reg(),
rn: stack_reg(),
imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(),
});
}
for (i, reg) in clobbered_vec.iter().enumerate() {
insts.push(Inst::FpuStore128 {
rd: reg.to_reg().to_reg(),
mem: MemArg::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()),
srcloc: None,
});
}
self.frame_size = Some(total_stacksize);
insts
}
fn gen_epilogue(&self, _flags: &settings::Flags) -> Vec<Inst> {
let mut insts = vec![];
// Restore clobbered registers.
let (clobbered_int, clobbered_vec) =
get_callee_saves(self.call_conv, self.clobbered.to_vec());
for (i, reg) in clobbered_vec.iter().enumerate() {
insts.push(Inst::FpuLoad128 {
rd: Writable::from_reg(reg.to_reg().to_reg()),
mem: MemArg::Unscaled(stack_reg(), SImm9::maybe_from_i64((i * 16) as i64).unwrap()),
srcloc: None,
});
}
let vec_save_bytes = clobbered_vec.len() * 16;
if vec_save_bytes != 0 {
insts.push(Inst::AluRRImm12 {
alu_op: ALUOp::Add64,
rd: writable_stack_reg(),
rn: stack_reg(),
imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(),
});
}
for reg_pair in clobbered_int.chunks(2).rev() {
let (r1, r2) = if reg_pair.len() == 2 {
(
reg_pair[0].map(|r| r.to_reg()),
reg_pair[1].map(|r| r.to_reg()),
)
} else {
(reg_pair[0].map(|r| r.to_reg()), writable_zero_reg())
};
debug_assert!(r1.to_reg().get_class() == RegClass::I64);
debug_assert!(r2.to_reg().get_class() == RegClass::I64);
// ldp r1, r2, [sp], #16
insts.push(Inst::LoadP64 {
rt: r1,
rt2: r2,
mem: PairMemArg::PostIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
),
});
}
if !self.call_conv.extends_baldrdash() {
// The MOV (alias of ORR) interprets x31 as XZR, so use an ADD here.
// MOV to SP is an alias of ADD.
insts.push(Inst::AluRRImm12 {
alu_op: ALUOp::Add64,
rd: writable_stack_reg(),
rn: fp_reg(),
imm12: Imm12 {
bits: 0,
shift12: false,
},
});
insts.push(Inst::LoadP64 {
rt: writable_fp_reg(),
rt2: writable_link_reg(),
mem: PairMemArg::PostIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
),
});
insts.push(Inst::Ret {});
}
debug!("Epilogue: {:?}", insts);
insts
}
fn frame_size(&self) -> u32 {
self.frame_size
.expect("frame size not computed before prologue generation")
}
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
(RegClass::I64, _) => 1,
(RegClass::V128, F32) | (RegClass::V128, F64) => 1,
(RegClass::V128, _) => 2,
_ => panic!("Unexpected register class!"),
}
}
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Inst {
self.store_spillslot(to_slot, ty, from_reg.to_reg())
}
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> Inst {
self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg()))
}
}
enum CallDest {
ExtName(ir::ExternalName),
Reg(Reg),
}
/// AArch64 ABI object for a function call.
pub struct AArch64ABICall {
sig: ABISig,
uses: Set<Reg>,
defs: Set<Writable<Reg>>,
dest: CallDest,
loc: ir::SourceLoc,
opcode: ir::Opcode,
}
fn abisig_to_uses_and_defs(sig: &ABISig) -> (Set<Reg>, Set<Writable<Reg>>) {
// Compute uses: all arg regs.
let mut uses = Set::empty();
for arg in &sig.args {
match arg {
&ABIArg::Reg(reg, _) => uses.insert(reg.to_reg()),
_ => {}
}
}
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
let mut defs = get_caller_saves_set(sig.call_conv);
for ret in &sig.rets {
match ret {
&ABIArg::Reg(reg, _) => defs.insert(Writable::from_reg(reg.to_reg())),
_ => {}
}
}
(uses, defs)
}
impl AArch64ABICall {
/// Create a callsite ABI object for a call directly to the specified function.
pub fn from_func(
sig: &ir::Signature,
extname: &ir::ExternalName,
loc: ir::SourceLoc,
) -> AArch64ABICall {
let sig = ABISig::from_func_sig(sig);
let (uses, defs) = abisig_to_uses_and_defs(&sig);
AArch64ABICall {
sig,
uses,
defs,
dest: CallDest::ExtName(extname.clone()),
loc,
opcode: ir::Opcode::Call,
}
}
/// Create a callsite ABI object for a call to a function pointer with the
/// given signature.
pub fn from_ptr(
sig: &ir::Signature,
ptr: Reg,
loc: ir::SourceLoc,
opcode: ir::Opcode,
) -> AArch64ABICall {
let sig = ABISig::from_func_sig(sig);
let (uses, defs) = abisig_to_uses_and_defs(&sig);
AArch64ABICall {
sig,
uses,
defs,
dest: CallDest::Reg(ptr),
loc,
opcode,
}
}
}
fn adjust_stack(amt: u64, is_sub: bool) -> Vec<Inst> {
if amt > 0 {
let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
if let Some(imm12) = Imm12::maybe_from_u64(amt) {
vec![Inst::AluRRImm12 {
alu_op,
rd: writable_stack_reg(),
rn: stack_reg(),
imm12,
}]
} else {
let const_load = Inst::LoadConst64 {
rd: writable_spilltmp_reg(),
const_data: amt,
};
let adj = Inst::AluRRRExtend {
alu_op,
rd: writable_stack_reg(),
rn: stack_reg(),
rm: spilltmp_reg(),
extendop: ExtendOp::UXTX,
};
vec![const_load, adj]
}
} else {
vec![]
}
}
impl ABICall for AArch64ABICall {
type I = Inst;
fn num_args(&self) -> usize {
self.sig.args.len()
}
fn gen_stack_pre_adjust(&self) -> Vec<Inst> {
adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ true)
}
fn gen_stack_post_adjust(&self) -> Vec<Inst> {
adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ false)
}
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Inst {
match &self.sig.args[idx] {
&ABIArg::Reg(reg, ty) => Inst::gen_move(Writable::from_reg(reg.to_reg()), from_reg, ty),
&ABIArg::Stack(off, _) => Inst::Store64 {
rd: from_reg,
mem: MemArg::SPOffset(off),
srcloc: None,
},
}
}
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Inst {
match &self.sig.rets[idx] {
&ABIArg::Reg(reg, ty) => Inst::gen_move(into_reg, reg.to_reg(), ty),
_ => unimplemented!(),
}
}
fn gen_call(&self) -> Vec<Inst> {
let (uses, defs) = (self.uses.clone(), self.defs.clone());
match &self.dest {
&CallDest::ExtName(ref name) => vec![Inst::Call {
dest: name.clone(),
uses,
defs,
loc: self.loc,
opcode: self.opcode,
}],
&CallDest::Reg(reg) => vec![Inst::CallInd {
rn: reg,
uses,
defs,
loc: self.loc,
opcode: self.opcode,
}],
}
}
}

View File

@@ -0,0 +1,528 @@
//! AArch64 ISA definitions: instruction arguments.
// Some variants are never constructed, but we still want them as options in the future.
#![allow(dead_code)]
use crate::binemit::CodeOffset;
use crate::ir::Type;
use crate::isa::aarch64::inst::*;
use regalloc::{RealRegUniverse, Reg, Writable};
use core::convert::{Into, TryFrom};
use std::string::String;
/// A shift operator for a register or immediate.
#[derive(Clone, Copy, Debug)]
#[repr(u8)]
pub enum ShiftOp {
LSL = 0b00,
LSR = 0b01,
ASR = 0b10,
ROR = 0b11,
}
impl ShiftOp {
/// Get the encoding of this shift op.
pub fn bits(self) -> u8 {
self as u8
}
}
/// A shift operator amount.
#[derive(Clone, Copy, Debug)]
pub struct ShiftOpShiftImm(u8);
impl ShiftOpShiftImm {
/// Maximum shift for shifted-register operands.
pub const MAX_SHIFT: u64 = 63;
/// Create a new shiftop shift amount, if possible.
pub fn maybe_from_shift(shift: u64) -> Option<ShiftOpShiftImm> {
if shift <= Self::MAX_SHIFT {
Some(ShiftOpShiftImm(shift as u8))
} else {
None
}
}
/// Return the shift amount.
pub fn value(self) -> u8 {
self.0
}
}
/// A shift operator with an amount, guaranteed to be within range.
#[derive(Clone, Debug)]
pub struct ShiftOpAndAmt {
op: ShiftOp,
shift: ShiftOpShiftImm,
}
impl ShiftOpAndAmt {
pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt {
ShiftOpAndAmt { op, shift }
}
/// Get the shift op.
pub fn op(&self) -> ShiftOp {
self.op
}
/// Get the shift amount.
pub fn amt(&self) -> ShiftOpShiftImm {
self.shift
}
}
/// An extend operator for a register.
#[derive(Clone, Copy, Debug)]
#[repr(u8)]
pub enum ExtendOp {
UXTB = 0b000,
UXTH = 0b001,
UXTW = 0b010,
UXTX = 0b011,
SXTB = 0b100,
SXTH = 0b101,
SXTW = 0b110,
SXTX = 0b111,
}
impl ExtendOp {
/// Encoding of this op.
pub fn bits(self) -> u8 {
self as u8
}
}
//=============================================================================
// Instruction sub-components (memory addresses): definitions
/// A reference to some memory address.
#[derive(Clone, Debug)]
pub enum MemLabel {
/// An address in the code, a constant pool or jumptable, with relative
/// offset from this instruction. This form must be used at emission time;
/// see `memlabel_finalize()` for how other forms are lowered to this one.
PCRel(i32),
}
/// A memory argument to load/store, encapsulating the possible addressing modes.
#[derive(Clone, Debug)]
pub enum MemArg {
Label(MemLabel),
/// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
PostIndexed(Writable<Reg>, SImm9),
/// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
PreIndexed(Writable<Reg>, SImm9),
// N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
// what the ISA calls the "register offset" addressing mode. We split out
// several options here for more ergonomic codegen.
/// Register plus register offset.
RegReg(Reg, Reg),
/// Register plus register offset, scaled by type's size.
RegScaled(Reg, Reg, Type),
/// Register plus register offset, scaled by type's size, with index sign- or zero-extended
/// first.
RegScaledExtended(Reg, Reg, Type, ExtendOp),
/// Unscaled signed 9-bit immediate offset from reg.
Unscaled(Reg, SImm9),
/// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
UnsignedOffset(Reg, UImm12Scaled),
/// Offset from the stack pointer. Lowered into a real amode at emission.
SPOffset(i64),
/// Offset from the frame pointer. Lowered into a real amode at emission.
FPOffset(i64),
}
impl MemArg {
/// Memory reference using an address in a register.
pub fn reg(reg: Reg) -> MemArg {
// Use UnsignedOffset rather than Unscaled to use ldr rather than ldur.
// This also does not use PostIndexed / PreIndexed as they update the register.
MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64))
}
/// Memory reference using an address in a register and an offset, if possible.
pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option<MemArg> {
if let Some(simm9) = SImm9::maybe_from_i64(offset) {
Some(MemArg::Unscaled(reg, simm9))
} else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) {
Some(MemArg::UnsignedOffset(reg, uimm12s))
} else {
None
}
}
/// Memory reference using the sum of two registers as an address.
pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg {
MemArg::RegReg(reg1, reg2)
}
/// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
pub fn reg_plus_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> MemArg {
MemArg::RegScaled(reg1, reg2, ty)
}
/// Memory reference using `reg1 + sizeof(ty) * reg2` as an address, with `reg2` sign- or
/// zero-extended as per `op`.
pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> MemArg {
MemArg::RegScaledExtended(reg1, reg2, ty, op)
}
/// Memory reference to a label: a global function or value, or data in the constant pool.
pub fn label(label: MemLabel) -> MemArg {
MemArg::Label(label)
}
}
/// A memory argument to a load/store-pair.
#[derive(Clone, Debug)]
pub enum PairMemArg {
SignedOffset(Reg, SImm7Scaled),
PreIndexed(Writable<Reg>, SImm7Scaled),
PostIndexed(Writable<Reg>, SImm7Scaled),
}
//=============================================================================
// Instruction sub-components (conditions, branches and branch targets):
// definitions
/// Condition for conditional branches.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum Cond {
Eq = 0,
Ne = 1,
Hs = 2,
Lo = 3,
Mi = 4,
Pl = 5,
Vs = 6,
Vc = 7,
Hi = 8,
Ls = 9,
Ge = 10,
Lt = 11,
Gt = 12,
Le = 13,
Al = 14,
Nv = 15,
}
impl Cond {
/// Return the inverted condition.
pub fn invert(self) -> Cond {
match self {
Cond::Eq => Cond::Ne,
Cond::Ne => Cond::Eq,
Cond::Hs => Cond::Lo,
Cond::Lo => Cond::Hs,
Cond::Mi => Cond::Pl,
Cond::Pl => Cond::Mi,
Cond::Vs => Cond::Vc,
Cond::Vc => Cond::Vs,
Cond::Hi => Cond::Ls,
Cond::Ls => Cond::Hi,
Cond::Ge => Cond::Lt,
Cond::Lt => Cond::Ge,
Cond::Gt => Cond::Le,
Cond::Le => Cond::Gt,
Cond::Al => Cond::Nv,
Cond::Nv => Cond::Al,
}
}
/// Return the machine encoding of this condition.
pub fn bits(self) -> u32 {
self as u32
}
}
/// The kind of conditional branch: the common-case-optimized "reg-is-zero" /
/// "reg-is-nonzero" variants, or the generic one that tests the machine
/// condition codes.
#[derive(Clone, Copy, Debug)]
pub enum CondBrKind {
/// Condition: given register is zero.
Zero(Reg),
/// Condition: given register is nonzero.
NotZero(Reg),
/// Condition: the given condition-code test is true.
Cond(Cond),
}
impl CondBrKind {
/// Return the inverted branch condition.
pub fn invert(self) -> CondBrKind {
match self {
CondBrKind::Zero(reg) => CondBrKind::NotZero(reg),
CondBrKind::NotZero(reg) => CondBrKind::Zero(reg),
CondBrKind::Cond(c) => CondBrKind::Cond(c.invert()),
}
}
}
/// A branch target. Either unresolved (basic-block index) or resolved (offset
/// from end of current instruction).
#[derive(Clone, Copy, Debug)]
pub enum BranchTarget {
/// An unresolved reference to a BlockIndex, as passed into
/// `lower_branch_group()`.
Block(BlockIndex),
/// A resolved reference to another instruction, after
/// `Inst::with_block_offsets()`.
ResolvedOffset(isize),
}
impl BranchTarget {
/// Lower the branch target given offsets of each block.
pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
match self {
&mut BranchTarget::Block(bix) => {
let bix = usize::try_from(bix).unwrap();
assert!(bix < targets.len());
let block_offset_in_func = targets[bix];
let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
*self = BranchTarget::ResolvedOffset(branch_offset);
}
&mut BranchTarget::ResolvedOffset(..) => {}
}
}
/// Get the block index.
pub fn as_block_index(&self) -> Option<BlockIndex> {
match self {
&BranchTarget::Block(bix) => Some(bix),
_ => None,
}
}
/// Get the offset as 4-byte words. Returns `0` if not
/// yet resolved (in that case, we're only computing
/// size and the offset doesn't matter).
pub fn as_offset_words(&self) -> isize {
match self {
&BranchTarget::ResolvedOffset(off) => off >> 2,
_ => 0,
}
}
/// Get the offset as a 26-bit offset suitable for a 26-bit jump, or `None` if overflow.
pub fn as_off26(&self) -> Option<u32> {
let off = self.as_offset_words();
if (off < (1 << 25)) && (off >= -(1 << 25)) {
Some((off as u32) & ((1 << 26) - 1))
} else {
None
}
}
/// Get the offset as a 19-bit offset, or `None` if overflow.
pub fn as_off19(&self) -> Option<u32> {
let off = self.as_offset_words();
if (off < (1 << 18)) && (off >= -(1 << 18)) {
Some((off as u32) & ((1 << 19) - 1))
} else {
None
}
}
/// Map the block index given a transform map.
pub fn map(&mut self, block_index_map: &[BlockIndex]) {
match self {
&mut BranchTarget::Block(ref mut bix) => {
let n = block_index_map[usize::try_from(*bix).unwrap()];
*bix = n;
}
&mut BranchTarget::ResolvedOffset(_) => {}
}
}
}
impl ShowWithRRU for ShiftOpAndAmt {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("{:?} {}", self.op(), self.amt().value())
}
}
impl ShowWithRRU for ExtendOp {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("{:?}", self)
}
}
impl ShowWithRRU for MemLabel {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&MemLabel::PCRel(off) => format!("pc+{}", off),
}
}
}
fn shift_for_type(ty: Type) -> usize {
match ty.bytes() {
1 => 0,
2 => 1,
4 => 2,
8 => 3,
16 => 4,
_ => panic!("unknown type: {}", ty),
}
}
impl ShowWithRRU for MemArg {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&MemArg::Unscaled(reg, simm9) => {
if simm9.value != 0 {
format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru))
} else {
format!("[{}]", reg.show_rru(mb_rru))
}
}
&MemArg::UnsignedOffset(reg, uimm12) => {
if uimm12.value != 0 {
format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru))
} else {
format!("[{}]", reg.show_rru(mb_rru))
}
}
&MemArg::RegReg(r1, r2) => {
format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),)
}
&MemArg::RegScaled(r1, r2, ty) => {
let shift = shift_for_type(ty);
format!(
"[{}, {}, LSL #{}]",
r1.show_rru(mb_rru),
r2.show_rru(mb_rru),
shift,
)
}
&MemArg::RegScaledExtended(r1, r2, ty, op) => {
let shift = shift_for_type(ty);
let size = match op {
ExtendOp::SXTW | ExtendOp::UXTW => InstSize::Size32,
_ => InstSize::Size64,
};
let op = op.show_rru(mb_rru);
format!(
"[{}, {}, {} #{}]",
r1.show_rru(mb_rru),
show_ireg_sized(r2, mb_rru, size),
op,
shift
)
}
&MemArg::Label(ref label) => label.show_rru(mb_rru),
&MemArg::PreIndexed(r, simm9) => format!(
"[{}, {}]!",
r.to_reg().show_rru(mb_rru),
simm9.show_rru(mb_rru)
),
&MemArg::PostIndexed(r, simm9) => format!(
"[{}], {}",
r.to_reg().show_rru(mb_rru),
simm9.show_rru(mb_rru)
),
// Eliminated by `mem_finalize()`.
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
panic!("Unexpected stack-offset mem-arg mode!")
}
}
}
}
impl ShowWithRRU for PairMemArg {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&PairMemArg::SignedOffset(reg, simm7) => {
if simm7.value != 0 {
format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru))
} else {
format!("[{}]", reg.show_rru(mb_rru))
}
}
&PairMemArg::PreIndexed(reg, simm7) => format!(
"[{}, {}]!",
reg.to_reg().show_rru(mb_rru),
simm7.show_rru(mb_rru)
),
&PairMemArg::PostIndexed(reg, simm7) => format!(
"[{}], {}",
reg.to_reg().show_rru(mb_rru),
simm7.show_rru(mb_rru)
),
}
}
}
impl ShowWithRRU for Cond {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
let mut s = format!("{:?}", self);
s.make_ascii_lowercase();
s
}
}
impl ShowWithRRU for BranchTarget {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&BranchTarget::Block(block) => format!("block{}", block),
&BranchTarget::ResolvedOffset(off) => format!("{}", off),
}
}
}
/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
/// 64-bit variants of many instructions (and integer registers).
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum InstSize {
Size32,
Size64,
}
impl InstSize {
/// 32-bit case?
pub fn is32(self) -> bool {
self == InstSize::Size32
}
/// 64-bit case?
pub fn is64(self) -> bool {
self == InstSize::Size64
}
/// Convert from an `is32` boolean flag to an `InstSize`.
pub fn from_is32(is32: bool) -> InstSize {
if is32 {
InstSize::Size32
} else {
InstSize::Size64
}
}
/// Convert from a needed width to the smallest size that fits.
pub fn from_bits<I: Into<usize>>(bits: I) -> InstSize {
let bits: usize = bits.into();
assert!(bits <= 64);
if bits <= 32 {
InstSize::Size32
} else {
InstSize::Size64
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,752 @@
//! AArch64 ISA definitions: immediate constants.
// Some variants are never constructed, but we still want them as options in the future.
#[allow(dead_code)]
use crate::ir::types::*;
use crate::ir::Type;
use crate::machinst::*;
use regalloc::RealRegUniverse;
use core::convert::TryFrom;
use std::string::String;
/// A signed, scaled 7-bit offset.
#[derive(Clone, Copy, Debug)]
pub struct SImm7Scaled {
/// The value.
pub value: i16,
/// multiplied by the size of this type
pub scale_ty: Type,
}
impl SImm7Scaled {
/// Create a SImm7Scaled from a raw offset and the known scale type, if
/// possible.
pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> {
assert!(scale_ty == I64 || scale_ty == I32);
let scale = scale_ty.bytes();
assert!(scale.is_power_of_two());
let scale = i64::from(scale);
let upper_limit = 63 * scale;
let lower_limit = -(64 * scale);
if value >= lower_limit && value <= upper_limit && (value & (scale - 1)) == 0 {
Some(SImm7Scaled {
value: i16::try_from(value).unwrap(),
scale_ty,
})
} else {
None
}
}
/// Create a zero immediate of this format.
pub fn zero(scale_ty: Type) -> SImm7Scaled {
SImm7Scaled { value: 0, scale_ty }
}
/// Bits for encoding.
pub fn bits(&self) -> u32 {
let ty_bytes: i16 = self.scale_ty.bytes() as i16;
let scaled: i16 = self.value / ty_bytes;
assert!(scaled <= 63 && scaled >= -64);
let scaled: i8 = scaled as i8;
let encoded: u32 = scaled as u32;
encoded & 0x7f
}
}
/// a 9-bit signed offset.
#[derive(Clone, Copy, Debug)]
pub struct SImm9 {
/// The value.
pub value: i16,
}
impl SImm9 {
/// Create a signed 9-bit offset from a full-range value, if possible.
pub fn maybe_from_i64(value: i64) -> Option<SImm9> {
if value >= -256 && value <= 255 {
Some(SImm9 {
value: value as i16,
})
} else {
None
}
}
/// Create a zero immediate of this format.
pub fn zero() -> SImm9 {
SImm9 { value: 0 }
}
/// Bits for encoding.
pub fn bits(&self) -> u32 {
(self.value as u32) & 0x1ff
}
}
/// An unsigned, scaled 12-bit offset.
#[derive(Clone, Copy, Debug)]
pub struct UImm12Scaled {
/// The value.
pub value: u16,
/// multiplied by the size of this type
pub scale_ty: Type,
}
impl UImm12Scaled {
/// Create a UImm12Scaled from a raw offset and the known scale type, if
/// possible.
pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<UImm12Scaled> {
let scale = scale_ty.bytes();
assert!(scale.is_power_of_two());
let scale = scale as i64;
let limit = 4095 * scale;
if value >= 0 && value <= limit && (value & (scale - 1)) == 0 {
Some(UImm12Scaled {
value: value as u16,
scale_ty,
})
} else {
None
}
}
/// Create a zero immediate of this format.
pub fn zero(scale_ty: Type) -> UImm12Scaled {
UImm12Scaled { value: 0, scale_ty }
}
/// Encoded bits.
pub fn bits(&self) -> u32 {
(self.value as u32 / self.scale_ty.bytes()) & 0xfff
}
}
/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted
/// left by 0 or 12 places.
#[derive(Clone, Debug)]
pub struct Imm12 {
/// The immediate bits.
pub bits: u16,
/// Whether the immediate bits are shifted left by 12 or not.
pub shift12: bool,
}
impl Imm12 {
/// Compute a Imm12 from raw bits, if possible.
pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
if val == 0 {
Some(Imm12 {
bits: 0,
shift12: false,
})
} else if val < 0xfff {
Some(Imm12 {
bits: val as u16,
shift12: false,
})
} else if val < 0xfff_000 && (val & 0xfff == 0) {
Some(Imm12 {
bits: (val >> 12) as u16,
shift12: true,
})
} else {
None
}
}
/// Bits for 2-bit "shift" field in e.g. AddI.
pub fn shift_bits(&self) -> u32 {
if self.shift12 {
0b01
} else {
0b00
}
}
/// Bits for 12-bit "imm" field in e.g. AddI.
pub fn imm_bits(&self) -> u32 {
self.bits as u32
}
}
/// An immediate for logical instructions.
#[derive(Clone, Debug)]
#[cfg_attr(test, derive(PartialEq))]
pub struct ImmLogic {
/// The actual value.
value: u64,
/// `N` flag.
pub n: bool,
/// `S` field: element size and element bits.
pub r: u8,
/// `R` field: rotate amount.
pub s: u8,
}
impl ImmLogic {
/// Compute an ImmLogic from raw bits, if possible.
pub fn maybe_from_u64(value: u64, ty: Type) -> Option<ImmLogic> {
// Note: This function is a port of VIXL's Assembler::IsImmLogical.
if ty != I64 && ty != I32 {
return None;
}
let original_value = value;
let value = if ty == I32 {
// To handle 32-bit logical immediates, the very easiest thing is to repeat
// the input value twice to make a 64-bit word. The correct encoding of that
// as a logical immediate will also be the correct encoding of the 32-bit
// value.
// Avoid making the assumption that the most-significant 32 bits are zero by
// shifting the value left and duplicating it.
let value = value << 32;
value | value >> 32
} else {
value
};
// Logical immediates are encoded using parameters n, imm_s and imm_r using
// the following table:
//
// N imms immr size S R
// 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
// 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
// 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
// 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
// 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
// 0 11110s xxxxxr 2 UInt(s) UInt(r)
// (s bits must not be all set)
//
// A pattern is constructed of size bits, where the least significant S+1 bits
// are set. The pattern is rotated right by R, and repeated across a 32 or
// 64-bit value, depending on destination register width.
//
// Put another way: the basic format of a logical immediate is a single
// contiguous stretch of 1 bits, repeated across the whole word at intervals
// given by a power of 2. To identify them quickly, we first locate the
// lowest stretch of 1 bits, then the next 1 bit above that; that combination
// is different for every logical immediate, so it gives us all the
// information we need to identify the only logical immediate that our input
// could be, and then we simply check if that's the value we actually have.
//
// (The rotation parameter does give the possibility of the stretch of 1 bits
// going 'round the end' of the word. To deal with that, we observe that in
// any situation where that happens the bitwise NOT of the value is also a
// valid logical immediate. So we simply invert the input whenever its low bit
// is set, and then we know that the rotated case can't arise.)
let (value, inverted) = if value & 1 == 1 {
(!value, true)
} else {
(value, false)
};
if value == 0 {
return None;
}
// The basic analysis idea: imagine our input word looks like this.
//
// 0011111000111110001111100011111000111110001111100011111000111110
// c b a
// |<--d-->|
//
// We find the lowest set bit (as an actual power-of-2 value, not its index)
// and call it a. Then we add a to our original number, which wipes out the
// bottommost stretch of set bits and replaces it with a 1 carried into the
// next zero bit. Then we look for the new lowest set bit, which is in
// position b, and subtract it, so now our number is just like the original
// but with the lowest stretch of set bits completely gone. Now we find the
// lowest set bit again, which is position c in the diagram above. Then we'll
// measure the distance d between bit positions a and c (using CLZ), and that
// tells us that the only valid logical immediate that could possibly be equal
// to this number is the one in which a stretch of bits running from a to just
// below b is replicated every d bits.
fn lowest_set_bit(value: u64) -> u64 {
let bit = value.trailing_zeros();
1u64.checked_shl(bit).unwrap_or(0)
}
let a = lowest_set_bit(value);
assert_ne!(0, a);
let value_plus_a = value.wrapping_add(a);
let b = lowest_set_bit(value_plus_a);
let value_plus_a_minus_b = value_plus_a - b;
let c = lowest_set_bit(value_plus_a_minus_b);
let (d, clz_a, out_n, mask) = if c != 0 {
// The general case, in which there is more than one stretch of set bits.
// Compute the repeat distance d, and set up a bitmask covering the basic
// unit of repetition (i.e. a word with the bottom d bits set). Also, in all
// of these cases the N bit of the output will be zero.
let clz_a = a.leading_zeros();
let clz_c = c.leading_zeros();
let d = clz_a - clz_c;
let mask = (1 << d) - 1;
(d, clz_a, 0, mask)
} else {
(64, a.leading_zeros(), 1, u64::max_value())
};
// If the repeat period d is not a power of two, it can't be encoded.
if !d.is_power_of_two() {
return None;
}
if ((b.wrapping_sub(a)) & !mask) != 0 {
// If the bit stretch (b - a) does not fit within the mask derived from the
// repeat period, then fail.
return None;
}
// The only possible option is b - a repeated every d bits. Now we're going to
// actually construct the valid logical immediate derived from that
// specification, and see if it equals our original input.
//
// To repeat a value every d bits, we multiply it by a number of the form
// (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
// be derived using a table lookup on CLZ(d).
const MULTIPLIERS: [u64; 6] = [
0x0000000000000001,
0x0000000100000001,
0x0001000100010001,
0x0101010101010101,
0x1111111111111111,
0x5555555555555555,
];
let multiplier = MULTIPLIERS[(u64::from(d).leading_zeros() - 57) as usize];
let candidate = b.wrapping_sub(a) * multiplier;
if value != candidate {
// The candidate pattern doesn't match our input value, so fail.
return None;
}
// We have a match! This is a valid logical immediate, so now we have to
// construct the bits and pieces of the instruction encoding that generates
// it.
// Count the set bits in our basic stretch. The special case of clz(0) == -1
// makes the answer come out right for stretches that reach the very top of
// the word (e.g. numbers like 0xffffc00000000000).
let clz_b = if b == 0 {
u32::max_value() // -1
} else {
b.leading_zeros()
};
let s = clz_a.wrapping_sub(clz_b);
// Decide how many bits to rotate right by, to put the low bit of that basic
// stretch in position a.
let (s, r) = if inverted {
// If we inverted the input right at the start of this function, here's
// where we compensate: the number of set bits becomes the number of clear
// bits, and the rotation count is based on position b rather than position
// a (since b is the location of the 'lowest' 1 bit after inversion).
// Need wrapping for when clz_b is max_value() (for when b == 0).
(d - s, clz_b.wrapping_add(1) & (d - 1))
} else {
(s, (clz_a + 1) & (d - 1))
};
// Now we're done, except for having to encode the S output in such a way that
// it gives both the number of set bits and the length of the repeated
// segment. The s field is encoded like this:
//
// imms size S
// ssssss 64 UInt(ssssss)
// 0sssss 32 UInt(sssss)
// 10ssss 16 UInt(ssss)
// 110sss 8 UInt(sss)
// 1110ss 4 UInt(ss)
// 11110s 2 UInt(s)
//
// So we 'or' (2 * -d) with our computed s to form imms.
let s = ((d * 2).wrapping_neg() | (s - 1)) & 0x3f;
debug_assert!(u8::try_from(r).is_ok());
debug_assert!(u8::try_from(s).is_ok());
Some(ImmLogic {
value: original_value,
n: out_n != 0,
r: r as u8,
s: s as u8,
})
}
pub fn from_raw(value: u64, n: bool, r: u8, s: u8) -> ImmLogic {
ImmLogic { n, r, s, value }
}
/// Returns bits ready for encoding: (N:1, R:6, S:6)
pub fn enc_bits(&self) -> u32 {
((self.n as u32) << 12) | ((self.r as u32) << 6) | (self.s as u32)
}
/// Returns the value that this immediate represents.
pub fn value(&self) -> u64 {
self.value
}
/// Return an immediate for the bitwise-inverted value.
pub fn invert(&self) -> ImmLogic {
// For every ImmLogical immediate, the inverse can also be encoded.
Self::maybe_from_u64(!self.value, I64).unwrap()
}
}
/// An immediate for shift instructions.
#[derive(Clone, Debug)]
pub struct ImmShift {
/// 6-bit shift amount.
pub imm: u8,
}
impl ImmShift {
/// Create an ImmShift from raw bits, if possible.
pub fn maybe_from_u64(val: u64) -> Option<ImmShift> {
if val < 64 {
Some(ImmShift { imm: val as u8 })
} else {
None
}
}
/// Get the immediate value.
pub fn value(&self) -> u8 {
self.imm
}
}
/// A 16-bit immediate for a MOVZ instruction, with a {0,16,32,48}-bit shift.
#[derive(Clone, Copy, Debug)]
pub struct MoveWideConst {
/// The value.
pub bits: u16,
/// Result is `bits` shifted 16*shift bits to the left.
pub shift: u8,
}
impl MoveWideConst {
/// Construct a MoveWideConst from an arbitrary 64-bit constant if possible.
pub fn maybe_from_u64(value: u64) -> Option<MoveWideConst> {
let mask0 = 0x0000_0000_0000_ffffu64;
let mask1 = 0x0000_0000_ffff_0000u64;
let mask2 = 0x0000_ffff_0000_0000u64;
let mask3 = 0xffff_0000_0000_0000u64;
if value == (value & mask0) {
return Some(MoveWideConst {
bits: (value & mask0) as u16,
shift: 0,
});
}
if value == (value & mask1) {
return Some(MoveWideConst {
bits: ((value >> 16) & mask0) as u16,
shift: 1,
});
}
if value == (value & mask2) {
return Some(MoveWideConst {
bits: ((value >> 32) & mask0) as u16,
shift: 2,
});
}
if value == (value & mask3) {
return Some(MoveWideConst {
bits: ((value >> 48) & mask0) as u16,
shift: 3,
});
}
None
}
pub fn maybe_with_shift(imm: u16, shift: u8) -> Option<MoveWideConst> {
let shift_enc = shift / 16;
if shift_enc > 3 {
None
} else {
Some(MoveWideConst {
bits: imm,
shift: shift_enc,
})
}
}
/// Returns the value that this constant represents.
pub fn value(&self) -> u64 {
(self.bits as u64) << (16 * self.shift)
}
}
impl ShowWithRRU for Imm12 {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
let shift = if self.shift12 { 12 } else { 0 };
let value = u32::from(self.bits) << shift;
format!("#{}", value)
}
}
impl ShowWithRRU for SImm7Scaled {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value)
}
}
impl ShowWithRRU for SImm9 {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value)
}
}
impl ShowWithRRU for UImm12Scaled {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value)
}
}
impl ShowWithRRU for ImmLogic {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value())
}
}
impl ShowWithRRU for ImmShift {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.imm)
}
}
impl ShowWithRRU for MoveWideConst {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
if self.shift == 0 {
format!("#{}", self.bits)
} else {
format!("#{}, LSL #{}", self.bits, self.shift * 16)
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn imm_logical_test() {
assert_eq!(None, ImmLogic::maybe_from_u64(0, I64));
assert_eq!(None, ImmLogic::maybe_from_u64(u64::max_value(), I64));
assert_eq!(
Some(ImmLogic {
value: 1,
n: true,
r: 0,
s: 0
}),
ImmLogic::maybe_from_u64(1, I64)
);
assert_eq!(
Some(ImmLogic {
value: 2,
n: true,
r: 63,
s: 0
}),
ImmLogic::maybe_from_u64(2, I64)
);
assert_eq!(None, ImmLogic::maybe_from_u64(5, I64));
assert_eq!(None, ImmLogic::maybe_from_u64(11, I64));
assert_eq!(
Some(ImmLogic {
value: 248,
n: true,
r: 61,
s: 4
}),
ImmLogic::maybe_from_u64(248, I64)
);
assert_eq!(None, ImmLogic::maybe_from_u64(249, I64));
assert_eq!(
Some(ImmLogic {
value: 1920,
n: true,
r: 57,
s: 3
}),
ImmLogic::maybe_from_u64(1920, I64)
);
assert_eq!(
Some(ImmLogic {
value: 0x7ffe,
n: true,
r: 63,
s: 13
}),
ImmLogic::maybe_from_u64(0x7ffe, I64)
);
assert_eq!(
Some(ImmLogic {
value: 0x30000,
n: true,
r: 48,
s: 1
}),
ImmLogic::maybe_from_u64(0x30000, I64)
);
assert_eq!(
Some(ImmLogic {
value: 0x100000,
n: true,
r: 44,
s: 0
}),
ImmLogic::maybe_from_u64(0x100000, I64)
);
assert_eq!(
Some(ImmLogic {
value: u64::max_value() - 1,
n: true,
r: 63,
s: 62
}),
ImmLogic::maybe_from_u64(u64::max_value() - 1, I64)
);
assert_eq!(
Some(ImmLogic {
value: 0xaaaaaaaaaaaaaaaa,
n: false,
r: 1,
s: 60
}),
ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64)
);
assert_eq!(
Some(ImmLogic {
value: 0x8181818181818181,
n: false,
r: 1,
s: 49
}),
ImmLogic::maybe_from_u64(0x8181818181818181, I64)
);
assert_eq!(
Some(ImmLogic {
value: 0xffc3ffc3ffc3ffc3,
n: false,
r: 10,
s: 43
}),
ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64)
);
assert_eq!(
Some(ImmLogic {
value: 0x100000001,
n: false,
r: 0,
s: 0
}),
ImmLogic::maybe_from_u64(0x100000001, I64)
);
assert_eq!(
Some(ImmLogic {
value: 0x1111111111111111,
n: false,
r: 0,
s: 56
}),
ImmLogic::maybe_from_u64(0x1111111111111111, I64)
);
for n in 0..2 {
let types = if n == 0 { vec![I64, I32] } else { vec![I64] };
for s in 0..64 {
for r in 0..64 {
let imm = get_logical_imm(n, s, r);
for &ty in &types {
match ImmLogic::maybe_from_u64(imm, ty) {
Some(ImmLogic { value, .. }) => {
assert_eq!(imm, value);
ImmLogic::maybe_from_u64(!value, ty).unwrap();
}
None => assert_eq!(0, imm),
};
}
}
}
}
}
// Repeat a value that has `width` bits, across a 64-bit value.
fn repeat(value: u64, width: u64) -> u64 {
let mut result = value & ((1 << width) - 1);
let mut i = width;
while i < 64 {
result |= result << i;
i *= 2;
}
result
}
// Get the logical immediate, from the encoding N/R/S bits.
fn get_logical_imm(n: u32, s: u32, r: u32) -> u64 {
// An integer is constructed from the n, imm_s and imm_r bits according to
// the following table:
//
// N imms immr size S R
// 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
// 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
// 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
// 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
// 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
// 0 11110s xxxxxr 2 UInt(s) UInt(r)
// (s bits must not be all set)
//
// A pattern is constructed of size bits, where the least significant S+1
// bits are set. The pattern is rotated right by R, and repeated across a
// 64-bit value.
if n == 1 {
if s == 0x3f {
return 0;
}
let bits = (1u64 << (s + 1)) - 1;
bits.rotate_right(r)
} else {
if (s >> 1) == 0x1f {
return 0;
}
let mut width = 0x20;
while width >= 0x2 {
if (s & width) == 0 {
let mask = width - 1;
if (s & mask) == mask {
return 0;
}
let bits = (1u64 << ((s & mask) + 1)) - 1;
return repeat(bits.rotate_right(r & mask), width.into());
}
width >>= 1;
}
unreachable!();
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,270 @@
//! AArch64 ISA definitions: registers.
use crate::isa::aarch64::inst::InstSize;
use crate::machinst::*;
use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES};
use std::string::{String, ToString};
//=============================================================================
// Registers, the Universe thereof, and printing
#[rustfmt::skip]
const XREG_INDICES: [u8; 31] = [
// X0 - X7
32, 33, 34, 35, 36, 37, 38, 39,
// X8 - X14
40, 41, 42, 43, 44, 45, 46,
// X15
59,
// X16, X17
47, 48,
// X18
60,
// X19 - X28
49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
// X29
61,
// X30
62,
];
const ZERO_REG_INDEX: u8 = 63;
const SP_REG_INDEX: u8 = 64;
/// Get a reference to an X-register (integer register).
pub fn xreg(num: u8) -> Reg {
assert!(num < 31);
Reg::new_real(
RegClass::I64,
/* enc = */ num,
/* index = */ XREG_INDICES[num as usize],
)
}
/// Get a writable reference to an X-register.
pub fn writable_xreg(num: u8) -> Writable<Reg> {
Writable::from_reg(xreg(num))
}
/// Get a reference to a V-register (vector/FP register).
pub fn vreg(num: u8) -> Reg {
assert!(num < 32);
Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num)
}
/// Get a writable reference to a V-register.
pub fn writable_vreg(num: u8) -> Writable<Reg> {
Writable::from_reg(vreg(num))
}
/// Get a reference to the zero-register.
pub fn zero_reg() -> Reg {
// This should be the same as what xreg(31) returns, except that
// we use the special index into the register index space.
Reg::new_real(
RegClass::I64,
/* enc = */ 31,
/* index = */ ZERO_REG_INDEX,
)
}
/// Get a writable reference to the zero-register (this discards a result).
pub fn writable_zero_reg() -> Writable<Reg> {
Writable::from_reg(zero_reg())
}
/// Get a reference to the stack-pointer register.
pub fn stack_reg() -> Reg {
// XSP (stack) and XZR (zero) are logically different registers which have
// the same hardware encoding, and whose meaning, in real aarch64
// instructions, is context-dependent. For convenience of
// universe-construction and for correct printing, we make them be two
// different real registers.
Reg::new_real(
RegClass::I64,
/* enc = */ 31,
/* index = */ SP_REG_INDEX,
)
}
/// Get a writable reference to the stack-pointer register.
pub fn writable_stack_reg() -> Writable<Reg> {
Writable::from_reg(stack_reg())
}
/// Get a reference to the link register (x30).
pub fn link_reg() -> Reg {
xreg(30)
}
/// Get a writable reference to the link register.
pub fn writable_link_reg() -> Writable<Reg> {
Writable::from_reg(link_reg())
}
/// Get a reference to the frame pointer (x29).
pub fn fp_reg() -> Reg {
xreg(29)
}
/// Get a writable reference to the frame pointer.
pub fn writable_fp_reg() -> Writable<Reg> {
Writable::from_reg(fp_reg())
}
/// Get a reference to the "spill temp" register. This register is used to
/// compute the address of a spill slot when a direct offset addressing mode from
/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc
/// and reserve it for this purpose for simplicity; otherwise we need a
/// multi-stage analysis where we first determine how many spill slots we have,
/// then perhaps remove the reg from the pool and recompute regalloc.
pub fn spilltmp_reg() -> Reg {
xreg(15)
}
/// Get a writable reference to the spilltmp reg.
pub fn writable_spilltmp_reg() -> Writable<Reg> {
Writable::from_reg(spilltmp_reg())
}
/// Create the register universe for AArch64.
pub fn create_reg_universe() -> RealRegUniverse {
let mut regs = vec![];
let mut allocable_by_class = [None; NUM_REG_CLASSES];
// Numbering Scheme: we put V-regs first, then X-regs. The X-regs
// exclude several registers: x18 (globally reserved for platform-specific
// purposes), x29 (frame pointer), x30 (link register), x31 (stack pointer
// or zero register, depending on context).
let v_reg_base = 0u8; // in contiguous real-register index space
let v_reg_count = 32;
for i in 0u8..v_reg_count {
let reg = Reg::new_real(
RegClass::V128,
/* enc = */ i,
/* index = */ v_reg_base + i,
)
.to_real_reg();
let name = format!("v{}", i);
regs.push((reg, name));
}
let v_reg_last = v_reg_base + v_reg_count - 1;
// Add the X registers. N.B.: the order here must match the order implied
// by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above.
let x_reg_base = 32u8; // in contiguous real-register index space
let mut x_reg_count = 0;
for i in 0u8..32u8 {
// See above for excluded registers.
if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 {
continue;
}
let reg = Reg::new_real(
RegClass::I64,
/* enc = */ i,
/* index = */ x_reg_base + x_reg_count,
)
.to_real_reg();
let name = format!("x{}", i);
regs.push((reg, name));
x_reg_count += 1;
}
let x_reg_last = x_reg_base + x_reg_count - 1;
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
first: x_reg_base as usize,
last: x_reg_last as usize,
suggested_scratch: Some(XREG_INDICES[13] as usize),
});
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
first: v_reg_base as usize,
last: v_reg_last as usize,
suggested_scratch: Some(/* V31: */ 31),
});
// Other regs, not available to the allocator.
let allocable = regs.len();
regs.push((xreg(15).to_real_reg(), "x15".to_string()));
regs.push((xreg(18).to_real_reg(), "x18".to_string()));
regs.push((fp_reg().to_real_reg(), "fp".to_string()));
regs.push((link_reg().to_real_reg(), "lr".to_string()));
regs.push((zero_reg().to_real_reg(), "xzr".to_string()));
regs.push((stack_reg().to_real_reg(), "sp".to_string()));
// FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs
// to 65, which is potentially inconvenient from a compiler performance
// standpoint. We could possibly drop back to 64 by "losing" a vector
// register in future.
// Assert sanity: the indices in the register structs must match their
// actual indices in the array.
for (i, reg) in regs.iter().enumerate() {
assert_eq!(i, reg.0.get_index());
}
RealRegUniverse {
regs,
allocable,
allocable_by_class,
}
}
/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
/// its name at the 32-bit size.
pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
let mut s = reg.show_rru(mb_rru);
if reg.get_class() != RegClass::I64 || !size.is32() {
// We can't do any better.
return s;
}
if reg.is_real() {
// Change (eg) "x42" into "w42" as appropriate
if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") {
s = "w".to_string() + &s[1..];
}
} else {
// Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role
if reg.get_class() == RegClass::I64 && size.is32() {
s.push('w');
}
}
s
}
/// Show a vector register when its use as a 32-bit or 64-bit float is known.
pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
let mut s = reg.show_rru(mb_rru);
if reg.get_class() != RegClass::V128 {
return s;
}
let prefix = if size.is32() { "s" } else { "d" };
s.replace_range(0..1, prefix);
s
}
/// Show a vector register used in a scalar context.
pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
let mut s = reg.show_rru(mb_rru);
if reg.get_class() != RegClass::V128 {
// We can't do any better.
return s;
}
if reg.is_real() {
// Change (eg) "v0" into "d0".
if reg.get_class() == RegClass::V128 && s.starts_with("v") {
s.replace_range(0..1, "d");
}
} else {
// Add a "d" suffix to RegClass::V128 vregs.
if reg.get_class() == RegClass::V128 {
s.push('d');
}
}
s
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,220 @@
//! ARM 64-bit Instruction Set Architecture.
use crate::ir::Function;
use crate::isa::Builder as IsaBuilder;
use crate::machinst::{
compile, MachBackend, MachCompileResult, ShowWithRRU, TargetIsaAdapter, VCode,
};
use crate::result::CodegenResult;
use crate::settings;
use alloc::boxed::Box;
use regalloc::RealRegUniverse;
use target_lexicon::{Aarch64Architecture, Architecture, Triple};
// New backend:
mod abi;
mod inst;
mod lower;
use inst::create_reg_universe;
/// An AArch64 backend.
pub struct AArch64Backend {
triple: Triple,
flags: settings::Flags,
}
impl AArch64Backend {
/// Create a new AArch64 backend with the given (shared) flags.
pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
AArch64Backend { triple, flags }
}
fn compile_vcode(&self, func: &Function, flags: &settings::Flags) -> VCode<inst::Inst> {
// This performs lowering to VCode, register-allocates the code, computes
// block layout and finalizes branches. The result is ready for binary emission.
let abi = Box::new(abi::AArch64ABIBody::new(func));
compile::compile::<AArch64Backend>(func, self, abi, flags)
}
}
impl MachBackend for AArch64Backend {
fn compile_function(
&self,
func: &Function,
want_disasm: bool,
) -> CodegenResult<MachCompileResult> {
let flags = self.flags();
let vcode = self.compile_vcode(func, flags);
let sections = vcode.emit();
let frame_size = vcode.frame_size();
let disasm = if want_disasm {
Some(vcode.show_rru(Some(&create_reg_universe())))
} else {
None
};
Ok(MachCompileResult {
sections,
frame_size,
disasm,
})
}
fn name(&self) -> &'static str {
"aarch64"
}
fn triple(&self) -> Triple {
self.triple.clone()
}
fn flags(&self) -> &settings::Flags {
&self.flags
}
fn reg_universe(&self) -> RealRegUniverse {
create_reg_universe()
}
}
/// Create a new `isa::Builder`.
pub fn isa_builder(triple: Triple) -> IsaBuilder {
assert!(triple.architecture == Architecture::Aarch64(Aarch64Architecture::Aarch64));
IsaBuilder {
triple,
setup: settings::builder(),
constructor: |triple, shared_flags, _| {
let backend = AArch64Backend::new_with_flags(triple, shared_flags);
Box::new(TargetIsaAdapter::new(backend))
},
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::types::*;
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
use crate::isa::CallConv;
use crate::settings;
use crate::settings::Configurable;
use core::str::FromStr;
use target_lexicon::Triple;
#[test]
fn test_compile_function() {
let name = ExternalName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
sig.returns.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);
let bb0 = func.dfg.make_block();
let arg0 = func.dfg.append_block_param(bb0, I32);
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(bb0);
let v0 = pos.ins().iconst(I32, 0x1234);
let v1 = pos.ins().iadd(arg0, v0);
pos.ins().return_(&[v1]);
let mut shared_flags = settings::builder();
shared_flags.set("opt_level", "none").unwrap();
let backend = AArch64Backend::new_with_flags(
Triple::from_str("aarch64").unwrap(),
settings::Flags::new(shared_flags),
);
let sections = backend.compile_function(&mut func, false).unwrap().sections;
let code = &sections.sections[0].data;
// stp x29, x30, [sp, #-16]!
// mov x29, sp
// mov x1, #0x1234
// add w0, w0, w1
// mov sp, x29
// ldp x29, x30, [sp], #16
// ret
let golden = vec![
0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0x81, 0x46, 0x82, 0xd2, 0x00, 0x00,
0x01, 0x0b, 0xbf, 0x03, 0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
];
assert_eq!(code, &golden);
}
#[test]
fn test_branch_lowering() {
let name = ExternalName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
sig.returns.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);
let bb0 = func.dfg.make_block();
let arg0 = func.dfg.append_block_param(bb0, I32);
let bb1 = func.dfg.make_block();
let bb2 = func.dfg.make_block();
let bb3 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(bb0);
let v0 = pos.ins().iconst(I32, 0x1234);
let v1 = pos.ins().iadd(arg0, v0);
pos.ins().brnz(v1, bb1, &[]);
pos.ins().jump(bb2, &[]);
pos.insert_block(bb1);
pos.ins().brnz(v1, bb2, &[]);
pos.ins().jump(bb3, &[]);
pos.insert_block(bb2);
let v2 = pos.ins().iadd(v1, v0);
pos.ins().brnz(v2, bb2, &[]);
pos.ins().jump(bb1, &[]);
pos.insert_block(bb3);
let v3 = pos.ins().isub(v1, v0);
pos.ins().return_(&[v3]);
let mut shared_flags = settings::builder();
shared_flags.set("opt_level", "none").unwrap();
let backend = AArch64Backend::new_with_flags(
Triple::from_str("aarch64").unwrap(),
settings::Flags::new(shared_flags),
);
let result = backend
.compile_function(&mut func, /* want_disasm = */ false)
.unwrap();
let code = &result.sections.sections[0].data;
// stp x29, x30, [sp, #-16]!
// mov x29, sp
// mov x1, x0
// mov x0, #0x1234
// add w1, w1, w0
// mov w2, w1
// cbz x2, ...
// mov w2, w1
// cbz x2, ...
// sub w0, w1, w0
// mov sp, x29
// ldp x29, x30, [sp], #16
// ret
// add w2, w1, w0
// mov w2, w2
// cbnz x2, ... <---- compound branch (cond / uncond)
// b ... <----
let golden = vec![
0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0xe1, 0x03, 0x00, 0xaa, 0x80, 0x46,
0x82, 0xd2, 0x21, 0x00, 0x00, 0x0b, 0xe2, 0x03, 0x01, 0x2a, 0xe2, 0x00, 0x00, 0xb4,
0xe2, 0x03, 0x01, 0x2a, 0xa2, 0x00, 0x00, 0xb5, 0x20, 0x00, 0x00, 0x4b, 0xbf, 0x03,
0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6, 0x22, 0x00, 0x00, 0x0b,
0xe2, 0x03, 0x02, 0x2a, 0xc2, 0xff, 0xff, 0xb5, 0xf7, 0xff, 0xff, 0x17,
];
assert_eq!(code, &golden);
}
}

View File

@@ -1,31 +0,0 @@
//! ARM 64 ABI implementation.
use super::registers::{FPR, GPR};
use crate::ir;
use crate::isa::RegClass;
use crate::regalloc::RegisterSet;
use crate::settings as shared_settings;
use alloc::borrow::Cow;
/// Legalize `sig`.
pub fn legalize_signature(
_sig: &mut Cow<ir::Signature>,
_flags: &shared_settings::Flags,
_current: bool,
) {
unimplemented!()
}
/// Get register class for a type appearing in a legalized signature.
pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
if ty.is_int() {
GPR
} else {
FPR
}
}
/// Get the set of allocatable registers for `func`.
pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
unimplemented!()
}

View File

@@ -1,8 +0,0 @@
//! Emitting binary ARM64 machine code.
use crate::binemit::{bad_encoding, CodeSink};
use crate::ir::{Function, Inst};
use crate::isa::TargetIsa;
use crate::regalloc::RegDiversions;
include!(concat!(env!("OUT_DIR"), "/binemit-arm64.rs"));

View File

@@ -1,10 +0,0 @@
//! Encoding tables for ARM64 ISA.
use crate::ir;
use crate::isa;
use crate::isa::constraints::*;
use crate::isa::enc_tables::*;
use crate::isa::encoding::RecipeSizing;
include!(concat!(env!("OUT_DIR"), "/encoding-arm64.rs"));
include!(concat!(env!("OUT_DIR"), "/legalize-arm64.rs"));

View File

@@ -1,132 +0,0 @@
//! ARM 64-bit Instruction Set Architecture.
mod abi;
mod binemit;
mod enc_tables;
mod registers;
pub mod settings;
use super::super::settings as shared_settings;
#[cfg(feature = "testing_hooks")]
use crate::binemit::CodeSink;
use crate::binemit::{emit_function, MemoryCodeSink};
use crate::ir;
use crate::isa::enc_tables::{lookup_enclist, Encodings};
use crate::isa::Builder as IsaBuilder;
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
use crate::regalloc;
use alloc::borrow::Cow;
use alloc::boxed::Box;
use core::fmt;
use target_lexicon::Triple;
#[allow(dead_code)]
struct Isa {
triple: Triple,
shared_flags: shared_settings::Flags,
isa_flags: settings::Flags,
}
/// Get an ISA builder for creating ARM64 targets.
pub fn isa_builder(triple: Triple) -> IsaBuilder {
IsaBuilder {
triple,
setup: settings::builder(),
constructor: isa_constructor,
}
}
fn isa_constructor(
triple: Triple,
shared_flags: shared_settings::Flags,
builder: shared_settings::Builder,
) -> Box<dyn TargetIsa> {
Box::new(Isa {
triple,
isa_flags: settings::Flags::new(&shared_flags, builder),
shared_flags,
})
}
impl TargetIsa for Isa {
fn name(&self) -> &'static str {
"arm64"
}
fn triple(&self) -> &Triple {
&self.triple
}
fn flags(&self) -> &shared_settings::Flags {
&self.shared_flags
}
fn register_info(&self) -> RegInfo {
registers::INFO.clone()
}
fn encoding_info(&self) -> EncInfo {
enc_tables::INFO.clone()
}
fn legal_encodings<'a>(
&'a self,
func: &'a ir::Function,
inst: &'a ir::InstructionData,
ctrl_typevar: ir::Type,
) -> Encodings<'a> {
lookup_enclist(
ctrl_typevar,
inst,
func,
&enc_tables::LEVEL1_A64[..],
&enc_tables::LEVEL2[..],
&enc_tables::ENCLISTS[..],
&enc_tables::LEGALIZE_ACTIONS[..],
&enc_tables::RECIPE_PREDICATES[..],
&enc_tables::INST_PREDICATES[..],
self.isa_flags.predicate_view(),
)
}
fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool) {
abi::legalize_signature(sig, &self.shared_flags, current)
}
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
abi::regclass_for_abi_type(ty)
}
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
abi::allocatable_registers(func)
}
#[cfg(feature = "testing_hooks")]
fn emit_inst(
&self,
func: &ir::Function,
inst: ir::Inst,
divert: &mut regalloc::RegDiversions,
sink: &mut dyn CodeSink,
) {
binemit::emit_inst(func, inst, divert, sink, self)
}
fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
emit_function(func, binemit::emit_inst, sink, self)
}
fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
ir::condcodes::IntCC::UnsignedLessThan
}
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
ir::condcodes::IntCC::UnsignedGreaterThanOrEqual
}
}
impl fmt::Display for Isa {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
}
}

View File

@@ -1,39 +0,0 @@
//! ARM64 register descriptions.
use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
include!(concat!(env!("OUT_DIR"), "/registers-arm64.rs"));
#[cfg(test)]
mod tests {
use super::INFO;
use crate::isa::RegUnit;
use alloc::string::{String, ToString};
#[test]
fn unit_encodings() {
assert_eq!(INFO.parse_regunit("x0"), Some(0));
assert_eq!(INFO.parse_regunit("x31"), Some(31));
assert_eq!(INFO.parse_regunit("v0"), Some(32));
assert_eq!(INFO.parse_regunit("v31"), Some(63));
assert_eq!(INFO.parse_regunit("x32"), None);
assert_eq!(INFO.parse_regunit("v32"), None);
}
#[test]
fn unit_names() {
fn uname(ru: RegUnit) -> String {
INFO.display_regunit(ru).to_string()
}
assert_eq!(uname(0), "%x0");
assert_eq!(uname(1), "%x1");
assert_eq!(uname(31), "%x31");
assert_eq!(uname(32), "%v0");
assert_eq!(uname(33), "%v1");
assert_eq!(uname(63), "%v31");
assert_eq!(uname(64), "%nzcv");
assert_eq!(uname(65), "%INVALID65");
}
}

View File

@@ -1,9 +0,0 @@
//! ARM64 Settings.
use crate::settings::{self, detail, Builder};
use core::fmt;
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a
// public `Flags` struct with an impl for all of the settings defined in
// `cranelift-codegen/meta/src/isa/arm64/mod.rs`.
include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));

View File

@@ -48,6 +48,7 @@ pub use crate::isa::call_conv::CallConv;
pub use crate::isa::constraints::{
BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints,
};
pub use crate::isa::enc_tables::Encodings;
pub use crate::isa::encoding::{base_size, EncInfo, Encoding};
pub use crate::isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit};
pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
@@ -55,9 +56,9 @@ pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
use crate::binemit;
use crate::flowgraph;
use crate::ir;
use crate::isa::enc_tables::Encodings;
#[cfg(feature = "unwind")]
use crate::isa::fde::RegisterMappingError;
#[cfg(feature = "unwind")]
use crate::machinst::MachBackend;
use crate::regalloc;
use crate::result::CodegenResult;
use crate::settings;
@@ -83,7 +84,7 @@ pub mod fde;
mod arm32;
#[cfg(feature = "arm64")]
mod arm64;
mod aarch64;
mod call_conv;
mod constraints;
@@ -92,6 +93,9 @@ mod encoding;
pub mod registers;
mod stack;
#[cfg(test)]
mod test_utils;
/// Returns a builder that can create a corresponding `TargetIsa`
/// or `Err(LookupError::SupportDisabled)` if not enabled.
macro_rules! isa_builder {
@@ -116,7 +120,7 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
isa_builder!(x86, "x86", triple)
}
Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
Architecture::Aarch64 { .. } => isa_builder!(arm64, "arm64", triple),
Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple),
_ => Err(LookupError::Unsupported),
}
}
@@ -402,6 +406,11 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
// No-op by default
Ok(())
}
/// Get the new-style MachBackend, if this is an adapter around one.
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
None
}
}
impl Debug for &dyn TargetIsa {

View File

@@ -0,0 +1,88 @@
// This is unused when no platforms with the new backend are enabled.
#![allow(dead_code)]
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
use crate::ir::Value;
use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, Opcode, SourceLoc, TrapCode};
use crate::isa::TargetIsa;
use alloc::vec::Vec;
use std::string::String;
pub struct TestCodeSink {
bytes: Vec<u8>,
}
impl TestCodeSink {
/// Create a new TestCodeSink.
pub fn new() -> TestCodeSink {
TestCodeSink { bytes: vec![] }
}
/// Return the code emitted to this sink as a hex string.
pub fn stringify(&self) -> String {
// This is pretty lame, but whatever ..
use std::fmt::Write;
let mut s = String::with_capacity(self.bytes.len() * 2);
for b in &self.bytes {
write!(&mut s, "{:02X}", b).unwrap();
}
s
}
}
impl CodeSink for TestCodeSink {
fn offset(&self) -> CodeOffset {
self.bytes.len() as CodeOffset
}
fn put1(&mut self, x: u8) {
self.bytes.push(x);
}
fn put2(&mut self, x: u16) {
self.bytes.push((x >> 0) as u8);
self.bytes.push((x >> 8) as u8);
}
fn put4(&mut self, mut x: u32) {
for _ in 0..4 {
self.bytes.push(x as u8);
x >>= 8;
}
}
fn put8(&mut self, mut x: u64) {
for _ in 0..8 {
self.bytes.push(x as u8);
x >>= 8;
}
}
fn reloc_block(&mut self, _rel: Reloc, _block_offset: CodeOffset) {}
fn reloc_external(
&mut self,
_srcloc: SourceLoc,
_rel: Reloc,
_name: &ExternalName,
_addend: Addend,
) {
}
fn reloc_constant(&mut self, _rel: Reloc, _constant_offset: ConstantOffset) {}
fn reloc_jt(&mut self, _rel: Reloc, _jt: JumpTable) {}
fn trap(&mut self, _code: TrapCode, _srcloc: SourceLoc) {}
fn begin_jumptables(&mut self) {}
fn begin_rodata(&mut self) {}
fn end_codegen(&mut self) {}
fn add_stackmap(&mut self, _val_list: &[Value], _func: &Function, _isa: &dyn TargetIsa) {}
fn add_call_site(&mut self, _opcode: Opcode, _srcloc: SourceLoc) {}
}